#!/usr/bin/env python # coding: utf-8 # In[2]: series = 'B13' # In[3]: import os import pandas as pd import series_details import plotly.offline as py py.init_notebook_mode() # In[4]: df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date']) # In[5]: series_details.display_summary(series, df) # ## Content preview # In[6]: # Change the number_of_rows value to see more number_of_rows = 5 # Display dataframe df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]), dict(selector='.row_heading, .blank', props=[('display', 'none')])]) # ## Plot content dates # In[12]: fig = series_details.plot_dates(df) py.iplot(fig, filename='series-dates-bar') # ## View word frequencies # In[13]: # Combine all of the file titles into a single string title_text = a = df['title'].str.lower().str.cat(sep=' ') # In[14]: series_details.display_word_counts(title_text) # In[15]: # Change ngram_count for larger ngrams (trigrams etc) ngram_count = 2 series_details.display_top_ngrams(title_text, ngram_count) # In[ ]: