series = 'D1902'
import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()
df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])
series_details.display_summary(series, df)
Total items | 3 |
---|---|
Access status | |
Open | 3 (100.00%) |
Number of items digitised | 0 (0.00%) |
Number of pages digitised | 0 |
Date of earliest content | 1920 |
Date of latest content | 1960 |
# Change the number_of_rows value to see more
number_of_rows = 5
# Display dataframe
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
dict(selector='.row_heading, .blank', props=[('display', 'none')])])
identifier | series | control_symbol | title | contents_dates | start_date | end_date | access_status | location | digitised_status | digitised_pages | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 441930 | D1902 | 5227 | Daniel PETROS | 1938 - 1938 | 1938-01-01 00:00:00 | 1938-01-01 00:00:00 | Open | Adelaide | False | 0 |
1 | 441931 | D1902 | SA28038 | LP Cutts | 1954 - 1954 | 1954-01-01 00:00:00 | 1954-01-01 00:00:00 | Open | Adelaide | False | 0 |
2 | 943006 | D1902 | WHOLE SERIES | Nominal index cards to investigation case files | 1920 - 1960 | 1920-01-01 00:00:00 | 1960-01-01 00:00:00 | Open | Adelaide | False | 0 |
fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')
# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')
series_details.display_word_counts(title_text)
/Users/tim/mycode/ozglam-workbench-naa-asio/lib/python3.6/site-packages/pandas/io/formats/style.py:939: RuntimeWarning: divide by zero encountered in long_scalars
word | count | |
---|---|---|
0 | daniel | 1 |
1 | petros | 1 |
2 | lp | 1 |
3 | cutts | 1 |
4 | nominal | 1 |
5 | index | 1 |
6 | cards | 1 |
7 | investigation | 1 |
8 | case | 1 |
9 | files | 1 |