#!/usr/bin/env python
# coding: utf-8

# In[2]:


series = 'B13'


# In[3]:


import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()


# In[4]:


df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])


# In[5]:


series_details.display_summary(series, df)


# ## Content preview

# In[6]:


# Change the number_of_rows value to see more
number_of_rows = 5

# Display dataframe 
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
                       dict(selector='.row_heading, .blank', props=[('display', 'none')])])


# ## Plot content dates

# In[12]:


fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')


# ## View word frequencies

# In[13]:


# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')


# In[14]:


series_details.display_word_counts(title_text)


# In[15]:


# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)


# In[ ]: