#!/usr/bin/env python # coding: utf-8 # ### X LINES OF PYTHON # # # Physical units with `pint` # # This notebook goes with [a blog post on the same subject](https://agilescientific.com/blog/2019/8/19/x-lines-of-python-physical-units). # # Have you ever wished you could carry units around with your quantities — and have the computer figure out the best units and multipliers to use? # # [`pint`](https://pint.readthedocs.io/en/0.9/tutorial.html) is a nince, compact library for doing just this, handling all your [dimensional analysis](https://en.wikipedia.org/wiki/Dimensional_analysis) needs. It can also detect units from strings. We can define our own units, it knows about multipliers (kilo, mega, etc), and it even works with `numpy` and `pandas`. # # Install `pint` with `pip` or `conda`, e.g. # # pip install pint # # **NB** If you are running this on Google Colaboratory, you must uncomment these lines (delete the initial `#`) and run this first: # In[ ]: #!pip install pint #!pip install git+https://github.com/hgrecco/pint-pandas#egg=Pint-Pandas-0.1.dev0 # To use it in its typical mode, we import the library then instantiate a `UnitRegistry` object. The registry contains lots of physical units. # In[3]: import pint units = pint.UnitRegistry() # In[4]: pint.__version__ # ## Attaching and printing units # In[5]: thickness = 68 * units.m thickness # In a Jupyter Notebook you see a 'pretty' version of the quantity. In the interpreter, you'll see something slightly different (the so-called `repr` of the class): # # >>> thickness # # # We can get at the magnitude, the units, and the dimensionality of this quantity: # In[6]: thickness.magnitude, thickness.units, thickness.dimensionality # You can also use the following abbreviations for magnitude and units: # # thickness.m, thickness.u # # For printing, we can use Python's string formatting: # In[7]: f'{thickness**2}' # But `pint` extends the string formatting options to include special options for `Quantity` objects. The most useful option is `P` for 'pretty', but there's also `L` for $\LaTeX$ and `H` for HTML. Adding a `~` (tilde) before the option tells `pint` to use unit abbreviations instead of the full names: # In[8]: print(f'{thickness**2:P}') print(f'{thickness**2:~P}') print(f'{thickness**2:~L}') print(f'{thickness**2:~H}') # ## Doing maths # # If we multiply by a scalar, `pint` produces the result you'd expect: # In[9]: thickness * 2 # Note that you must use units when you need them: # In[10]: thickness + 10 # This is meant to produce an error... # Let's try defining an area of $60\ \mathrm{km}^2$, then multiplying it by our thickness. To make it more like a hydrocarbon volume, I'll also multiply by net:gross `n2g`, porosity `phi`, and saturation `sat`, all of which are dimensionless: # In[11]: area = 60 * units.km**2 n2g = 0.5 * units.dimensionless # Optional dimensionless 'units'... phi = 0.2 # ... but you can just do this. sat = 0.7 volume = area * thickness * n2g * phi * sat volume # We can convert to something more compact: # In[12]: volume.to_compact() # Or be completely explicit about the units and multipliers we want: # In[13]: volume.to('m**3') # Or use m^3 # The `to_compact()` method can also take units, if you want to be more explicit; it applies multipliers automatically: # In[14]: volume.to_compact('L') # Oil barrels are already defined (**careful**, they are abbreviated as `oil_bbl` not `bbl` — that's a 31.5 gallon barrel, about the same as a beer barrel). # In[15]: volume.to_compact('oil_barrel') # If we use string formatting (see above), we can get pretty specific: # In[16]: f"The volume is {volume.to_compact('oil_barrel'):~0.2fL}" # ## Defining new units # # `pint` defines hundreads of units ([here's the list](https://github.com/hgrecco/pint/blob/master/pint/default_en.txt)), and it knows about tonnes of oil equivalent... but it doesn't know about barrels of oil equivalent ([for more on conversion to BOE](https://en.wikipedia.org/wiki/Barrel_of_oil_equivalent)). So let's define a custom unit, using the USGS's conversion factor: # In[17]: units.define('barrel_of_oil_equivalent = 6000 ft**3 = boe') # Let's suspend reality for a moment and imagine we now want to compute our gross rock volume in BOEs... # In[18]: volume.to('boe') # In[19]: volume.to_compact('boe') # ## Getting units from strings # # `pint` can also parse strings and attempt to convert them to `Quantity` instances: # In[20]: units('2.34 km') # This looks useful! Let's try something less nicely formatted. # In[21]: units('2.34*10^3 km') # In[22]: units('-12,000.ft') # In[23]: units('3.2 m') # You can also use the `Quantity` constructor, like this: # # >>> qty = pint.Quantity # >>> qty('2.34 km') # 2.34 kilometer # # But the `UnitRegistry` seems to do the same things and might be more convenient. # ## `pint` with `uncertainties` # # Conveniently, `pint` works well with [`uncertainties`](https://pythonhosted.org/uncertainties/). Maybe I'll do an _X lines_ on that package in the future. Install it with `conda` or `pip`, e.g. # # pip install uncertainties # In[24]: from uncertainties import ufloat area = ufloat(64, 5) * units.km**2 # 64 +/- 5 km**2 (thickness * area).to('Goil_bbl') # ## `pint` with `numpy` # # `pint` works fine with NumPy arrays: # In[25]: import numpy as np vp = np.array([2300, 2400, 2550, 3200]) * units.m/units.s rho = np.array([2400, 2550, 2500, 2650]) * units.kg/units.m**3 z = vp * rho z # For some reason, this sometimes doesn't render properly. But we can always do this: # In[26]: print(z) # As expected, the magnitude of this quantity is just a NumPy array: # In[27]: z.m # ## `pint` with `pandas` # # **Note** that this functionality is fairly new and is still settling down. YMMV. # # To use `pint` (version 0.9 and later) with `pandas` (version 0.24.2 works; 0.25.0 does not work at the time of writing), we must first install `pintpandas`, which must be done from source; [get the code from GitHub](https://github.com/hgrecco/pint-pandas). Here's how I do it: # # cd pint-pandas # python setup.py sdist # pip install dist/Pint-Pandas-0.1.dev0.tar.gz # # You could also do: # # pip install git+https://github.com/hgrecco/pint-pandas#egg=Pint-Pandas-0.1.dev0 # # Once you have done that, the following should evaluate to `True`: # In[36]: pint._HAS_PINTPANDAS # To use this integration, we pass special `pint` data types to the `pd.Series()` object: # In[30]: import pandas as pd df = pd.DataFrame({ "Vp": pd.Series(vp.m, dtype="pint[m/s]"), "Vs": pd.Series([1200, 1200, 1250, 1300], dtype="pint[m/s]"), "rho": pd.Series(rho.m, dtype="pint[kg/m**3]"), }) df # In[31]: import bruges as bg df['E'] = bg.rockphysics.moduli.youngs(df.Vp, df.Vs, df.rho) df.E # We can't convert the units of a whole `Series` but we can do one: # In[32]: df.loc[0, 'E'].to('GPa') # So to convert a whole series, we can use `Series.apply()`: # In[33]: df.E.apply(lambda x: x.to('GPa')) # ## Bonus: dataframe display with units # # We *could* subclass dataframes to tweak their `_repr_html_()` method, which would allow us to make units show up in the Notebook representation of the dataframe... # In[34]: class UnitDataFrame(pd.DataFrame): def _repr_html_(self): """New repr for Jupyter Notebook.""" html = super()._repr_html_() # Get the old repr string. units = [''] + [f"{dtype.units:~H}" for dtype in self.dtypes] style = "text-align: right; color: gray;" new = f'' + "".join(units) + "" return html.replace('', new) # In[35]: df = UnitDataFrame({ "Vp": pd.Series(vp.m, dtype="pint[m/s]"), "Vs": pd.Series([1200, 1200, 1250, 1300], dtype="pint[m/s]"), "rho": pd.Series(rho.m, dtype="pint[kg/m**3]"), }) df # Cute. # ---- # # © Agile Scientific 2019, licensed CC-BY