#!/usr/bin/env python # coding: utf-8 # # `numpy.arange()` with floats considered harmful # # A few examples used in this notebook are taken from http://quantumwise.com/forum/index.php?topic=110.0#.VIVgyIctjRZ. # # We often need series of regularly spaced numbers. [numpy.arange()](https://docs.scipy.org/doc/numpy/reference/generated/numpy.arange.html) is great for that, as long as the step size is an integer. # # If the step size is a floating-point number, however, the result might not be what you expect. # In[1]: import numpy as np # ## But when I last tried it worked as expected ... # # [numpy.arange()](https://docs.scipy.org/doc/numpy/reference/generated/numpy.arange.html) is NumPy's extension of Python's built-in [range()](https://docs.python.org/3/library/functions.html#func-range) function. # The latter only works with integers, which are also no problem for the former: # In[2]: np.arange(7) # defaults: start=0, step=1 # In[3]: np.arange(1, 7) # In[4]: np.arange(1, 7, 2) # In[5]: np.arange(1, 7, 6) # The `start` value is always part of the returned range, the `stop` value is never part of it. # # Contrary to `range()`, `numpy.arange()` also works with floating-point numbers. # In[6]: np.arange(0.4, 1.1, 0.1) # Also here, as expected, the `stop` value is not part of the range. # ## So what's the problem? # # The problem is that some numbers (infinitely many, actually) cannot be represented exactly as fixed-size binary floating-point numbers. Doing calculations with floating-point numbers will often lead to rounding errors. And exactly those rounding errors may lead to unexpected results. # # For example here: # In[7]: np.arange(0.5, 1.1, 0.1) # Wow, now `stop` is suddenly included in the range! We didn't expect that! # # That's the reason why care has to be taken when using `np.arange()` with floating-point numbers. # ## OK, I see the problem now, but what's the solution? # # Well, there isn't a single solution for all use cases, it depends on what you need. # # If you know the first and the last value that should be in your range and the total number of values, you should use [numpy.linspace()](https://docs.scipy.org/doc/numpy/reference/generated/numpy.linspace.html). # In[8]: np.linspace(0.5, 1.1, 7) # In contrast to `np.arange()`, the resulting range includes the last value. # # If you want it to exclude the last value, you can use the `endpoint` argument (and reduce the number of values by 1). # In[9]: np.linspace(0.5, 1.1, 6, endpoint=False) # If you, however, want to create a range with a given *spacing*, you'd have to manually calculate the *number of values* before being able to use `numpy.linspace()`. # # It would be great if there were a feature of either `numpy.linspace()` or `numpy.arange()` that would allow specifying a *spacing* but would also avoid the above-mentioned problem. # This was discussed [years ago on the NumPy mailing list](https://mail.python.org/pipermail/numpy-discussion/2007-September/029129.html). Sadly, nobody could come up with a good solution. # # So for now, you're stuck with adding or subtracting small amounts to/from the `stop` value to get the desired behavior. # In[10]: step = 0.1 np.arange(0.5, 1.1 - step*0.5, step) # This yields the expected result, but it only works if you know that the distance between `start` and `stop` is (approximately) an integer multiple of the spacing. # In[11]: assert np.isclose((1.1 - 0.5) % step, 0.0) # If you want to include the `stop` value, you can do something like this: # In[12]: np.arange(0.5, 1.1 + step*0.5, step) # Again, you have to make sure that the spacing fits to the distance: # In[13]: assert np.isclose((1.1 + 0.5) % step, 0.0) # ## Can't this be done automatically? # # Well, let's try ... # In[14]: def myrange(start, stop, step=1): # Don't use this function, see below! return np.arange(start, stop - step*0.5, step) # In[15]: myrange(0.5, 1.1, 0.1) # OK, the problematic case from above works. But let's change the `stop` value a little bit: # In[16]: myrange(0.5, 1.11, 0.1) # Here, we would expect `1.1` to be part of the range, because it's clearly smaller than `1.11`. So let's modify our function: # In[17]: def myrange(start, stop, step=1): if np.isclose((stop - start) % step, 0.0): stop -= step * 0.5 return np.arange(start, stop, step) # In[18]: myrange(0.5, 1.11, 0.1) # OK, now `1.1` is part of the range, as expected. # In[19]: myrange(0.5, 1.1, 0.1) # The previous example still works. Good. # # This isn't a very elegant solution, though. The behavior depends on the tolerances used in [numpy.isclose()](https://docs.scipy.org/doc/numpy/reference/generated/numpy.isclose.html), namely `rtol` and `atol`. Let's add those to our function! # In[20]: def myrange(start, stop, step=1, dtype=None, **kwargs): if np.isclose((stop - start) % step, 0.0, **kwargs): stop -= step * 0.5 return np.arange(start, stop, step, dtype) # Note that we also enabled the `dtype` argument from `numpy.arange()`. # In[21]: myrange(0.5, 1.11, 0.1) # In[22]: myrange(0.5, 1.11, 0.1, atol=0.02) # This is a quite constructed example, but it should show that setting the tolerance works # # One difference to `numpy.arange()` is that even when only passing integers, the result consists of floating-point numbers. # In[23]: myrange(1, 7) # We could add this as branch to our function implementation, but I think it isn't that important. And if we really want integers, we can still force them with `dtype`: # In[24]: myrange(1, 7, dtype=int) # OK, that doesn't look that bad, does it? # ## But what if I want to include the endpoint? # # OK, you're right. Sometimes, we want the endpoint to be included, so let's try to add this as an option. # In[25]: def myrange(start, stop, step=1, endpoint=False, dtype=None, **kwargs): # Don't use this function, see below! if np.isclose((stop - start) % step, 0.0, **kwargs): if endpoint: stop += step * 0.5 else: stop -= step * 0.5 return np.arange(start, stop, step, dtype) # In[26]: myrange(0.5, 1.1, 0.1, endpoint=True) # OK, this seems to work. Let's try another example ... # In[27]: myrange(1, 2, 0.2, endpoint=True) # What? The endpoint clearly isn't part of the range! # # The problem is that we check if the result of the modulus operator (`%`) is close to `0.0`, but we should also check the case where it is close to `step`! # # We should try to add this check: # In[28]: def myrange(start, stop, step=1, endpoint=False, dtype=None, **kwargs): # Don't use this function, see below! remainder = (stop - start) % step if np.any(np.isclose(remainder, (0.0, step), **kwargs)): if endpoint: stop += step * 0.5 else: stop -= step * 0.5 return np.arange(start, stop, step, dtype) # In[29]: myrange(1, 2, 0.2, endpoint=True) # Good, that's settled. Let's try another one: # In[30]: myrange(0.5, 1.11, 0.1, endpoint=True) # Now this is a little strange, because `1.11` is not included. But on the other hand, it isn't even part of the series! # # As far as I can tell, the argument `endpoint` only really makes sense for integer multiples. # For now, I don't know a better solution than to just disallow this case. # In[31]: def myrange(start, stop, step=1, endpoint=False, dtype=None, **kwargs): remainder = (stop - start) % step if np.any(np.isclose(remainder, (0.0, step), **kwargs)): if endpoint: stop += step * 0.5 else: stop -= step * 0.5 elif endpoint: raise ValueError("Invalid stop value for endpoint=True") return np.arange(start, stop, step, dtype) # In[32]: myrange(0.5, 1.1, 0.1, endpoint=True) # In[33]: myrange(0.5, 1.11, 0.1, endpoint=True) # OK, I guess I'll leave it at that for now. # This is also more or less what I implemented as helper function in the [Sound Field Synthesis Toolbox](http://sfs-python.readthedocs.io/#sfs.util.strict_arange). # # It's not a perfect solution, but probably it works for your use case. # # Have fun! #

# # CC0 # #
# To the extent possible under law, # the person who associated CC0 # with this work has waived all copyright and related or neighboring # rights to this work. #