#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')


# # NPD data
# 
# Start here >> https://portal.diskos.cgg.com/whereoil-data/
# 
# Bit confusing as there are a lot of places to get data.
# 
# I'll start with FactPages because I've heard of that...

# ## FactPages... Use pandas to read CSV directly
# 
# Right-click and copy URL for CSV from this link:
# 
# http://factpages.npd.no/factpages/Default.aspx?culture=nb-no&nav1=field&nav2=TableView|Production|Saleable|Monthly

# In[2]:


csv = "http://factpages.npd.no/ReportServer?/FactPages/TableView/field_production_monthly&rs:Command=Render&rc:Toolbar=false&rc:Parameters=f&rs:Format=CSV&Top100=false&IpAddress=213.236.218.114&CultureCode=nb-no"


# In[3]:


import pandas as pd

df = pd.read_csv(csv)


# In[4]:


df.head()


# In[5]:


len(df)


# We could save this if we wanted to...

# In[6]:


#df.to_csv('npd.csv', index=False)


# ## Add a datetime

# Give this a 'DS' (datestamp) column with `pandas` datetimes. To do this easily, we need columns named like `'year'`, `'month'`, `'day'`.

# In[26]:


#df = pd.read_csv('npd.csv')


# In[20]:


columns = {'prfYear': 'year',
           'prfMonth': 'month',
           'prfInformationCarrier': 'field',
           'prfPrdOilNetMillSm3': 'oil',
           'prfPrdOeNetMillSm3': 'OE',
           'prfPrdProducedWaterInFieldMillSm3': 'water',
          }

df = df.rename(columns=columns)


# In[21]:


df['day'] = 1


# In[22]:


df['DS'] = pd.to_datetime(df[['year', 'month', 'day']])
df = df.set_index('DS')


# In[23]:


df.head()


# In[24]:


df = df[['field', 'oil', 'OE', 'water']]
df.OE -= df.oil


# In[25]:


df.head()


# ## Pandas is very useful for time series!

# In[27]:


import matplotlib.pyplot as plt

plt.figure(figsize=(15,3))

df.water[df.field=='TROLL'].plot()
df.oil[df.field=='TROLL'].plot()
df.OE[df.field=='TROLL'].plot()


# Once you're used to the `pandas` syntax, it's fairly easy to make the mean monthly production for a time range:

# In[28]:


df.loc[df.field=='TROLL'].loc['2000':'2010'].resample('Y').mean().plot()


# Or to get the summed annual production:

# In[29]:


df.loc[df.field=='TROLL'].loc['2000':'2010'].resample('Y').sum()


# Throw `.plot()` on the end:

# In[30]:


df.loc[df.field=='TROLL'].loc['2000':'2010'].resample('Y').sum().plot()


# Or we can get totals for *ALL* fields in the database:

# In[31]:


df.loc['2010':].resample('M').sum().plot()


# Let's look at the contribution TROLL made to NCS production since 1993:

# In[32]:


fig, ax = plt.subplots()
df.loc['1993':'2018', 'oil'].resample('Y').sum().plot(ax=ax)
df.loc[df.field!='TROLL'].loc['1993':'2018', 'oil'].resample('Y').sum().plot(ax=ax)


# In[ ]: