In [1]:

series = 'D1902'

In [2]:

import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()

In [3]:

df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])

In [4]:

series_details.display_summary(series, df)

National Archives of Australia: Series D1902

Nominal Index cards to investigation case files.

Total items	3
Access status
Open	3 (100.00%)
Number of items digitised	0 (0.00%)
Number of pages digitised	0
Date of earliest content	1920
Date of latest content	1960

Content preview¶

In [5]:

# Change the number_of_rows value to see more
number_of_rows = 5

# Display dataframe 
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
                       dict(selector='.row_heading, .blank', props=[('display', 'none')])])

Out[5]:

	identifier	series	control_symbol	title	contents_dates	start_date	end_date	access_status	location	digitised_status
0	441930	D1902	5227	Daniel PETROS	1938 - 1938	1938-01-01 00:00:00	1938-01-01 00:00:00	Open	Adelaide	False
1	441931	D1902	SA28038	LP Cutts	1954 - 1954	1954-01-01 00:00:00	1954-01-01 00:00:00	Open	Adelaide	False
2	943006	D1902	WHOLE SERIES	Nominal index cards to investigation case files	1920 - 1960	1920-01-01 00:00:00	1960-01-01 00:00:00	Open	Adelaide	False

Plot content dates¶

In [6]:

fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')

View word frequencies¶

In [7]:

# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')

In [8]:

series_details.display_word_counts(title_text)

/Users/tim/mycode/ozglam-workbench-naa-asio/lib/python3.6/site-packages/pandas/io/formats/style.py:939: RuntimeWarning:

divide by zero encountered in long_scalars

Out[8]:

	word	count
0	daniel	1
1	petros	1
2	lp	1
3	cutts	1
4	nominal	1
5	index	1
6	cards	1
7	investigation	1
8	case	1
9	files	1

In [ ]: