#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # # NPD data # # Start here >> https://portal.diskos.cgg.com/whereoil-data/ # # Bit confusing as there are a lot of places to get data. # # I'll start with FactPages because I've heard of that... # ## FactPages... Use pandas to read CSV directly # # Right-click and copy URL for CSV from this link: # # http://factpages.npd.no/factpages/Default.aspx?culture=nb-no&nav1=field&nav2=TableView|Production|Saleable|Monthly # In[2]: csv = "http://factpages.npd.no/ReportServer?/FactPages/TableView/field_production_monthly&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&rs:Format=CSV&Top100=false&IpAddress=213.236.218.114&CultureCode=nb-no" # In[3]: import pandas as pd df = pd.read_csv(csv) # In[4]: df.head() # In[5]: len(df) # We could save this if we wanted to... # In[6]: #df.to_csv('npd.csv', index=False) # ## Add a datetime # Give this a 'DS' (datestamp) column with `pandas` datetimes. To do this easily, we need columns named like `'year'`, `'month'`, `'day'`. # In[26]: #df = pd.read_csv('npd.csv') # In[20]: columns = {'prfYear': 'year', 'prfMonth': 'month', 'prfInformationCarrier': 'field', 'prfPrdOilNetMillSm3': 'oil', 'prfPrdOeNetMillSm3': 'OE', 'prfPrdProducedWaterInFieldMillSm3': 'water', } df = df.rename(columns=columns) # In[21]: df['day'] = 1 # In[22]: df['DS'] = pd.to_datetime(df[['year', 'month', 'day']]) df = df.set_index('DS') # In[23]: df.head() # In[24]: df = df[['field', 'oil', 'OE', 'water']] df.OE -= df.oil # In[25]: df.head() # ## Pandas is very useful for time series! # In[27]: import matplotlib.pyplot as plt plt.figure(figsize=(15,3)) df.water[df.field=='TROLL'].plot() df.oil[df.field=='TROLL'].plot() df.OE[df.field=='TROLL'].plot() # Once you're used to the `pandas` syntax, it's fairly easy to make the mean monthly production for a time range: # In[28]: df.loc[df.field=='TROLL'].loc['2000':'2010'].resample('Y').mean().plot() # Or to get the summed annual production: # In[29]: df.loc[df.field=='TROLL'].loc['2000':'2010'].resample('Y').sum() # Throw `.plot()` on the end: # In[30]: df.loc[df.field=='TROLL'].loc['2000':'2010'].resample('Y').sum().plot() # Or we can get totals for *ALL* fields in the database: # In[31]: df.loc['2010':].resample('M').sum().plot() # Let's look at the contribution TROLL made to NCS production since 1993: # In[32]: fig, ax = plt.subplots() df.loc['1993':'2018', 'oil'].resample('Y').sum().plot(ax=ax) df.loc[df.field!='TROLL'].loc['1993':'2018', 'oil'].resample('Y').sum().plot(ax=ax) # In[ ]: