In [1]:

series = 'SP726/1'

In [2]:

import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()

In [3]:

df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])

In [4]:

series_details.display_summary(series, df)

National Archives of Australia: Series SP726/1

Register of Applications for Certificate of Exemption Dictation Tests

Total items	6
Access status
Open	6 (100.00%)
Number of items digitised	0 (0.00%)
Number of pages digitised	0
Date of earliest content	1902
Date of latest content	1959

Download the complete CSV file

Content preview¶

In [4]:

# Change the number_of_rows value to see more
number_of_rows = 5

# Display dataframe 
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
                       dict(selector='.row_heading, .blank', props=[('display', 'none')])])

Out[4]:

	identifier	series	control_symbol	title	contents_dates	start_date	end_date	access_status	location	digitised_status
0	12145139	SP726/1	BOOK 2	Register of names relating to exemption from Dictation Tests (1911-1918)	1911 - 1918	1911-01-01 00:00:00	1918-01-01 00:00:00	Open	Sydney	False
1	12145140	SP726/1	BOOK 3	Register of names relating to exemption from Dictation Tests (1918-1925)	1918 - 1925	1918-01-01 00:00:00	1925-01-01 00:00:00	Open	Sydney	False
2	12145522	SP726/1	BOOK 1	Register of names relating to exemption from Dictation Tests (1902-1910)	1902 - 1910	1902-01-01 00:00:00	1910-01-01 00:00:00	Open	Sydney	False
3	12154684	SP726/1	BOOK 4	Register of names relating to exemption from Dictation Tests (1919-1924)	1919 - 1924	1919-01-01 00:00:00	1924-01-01 00:00:00	Open	Sydney	False
4	12154685	SP726/1	BOOK 5	Register of names relating to exemption from Dictation Tests (1925-1934)	1925 - 1934	1925-01-01 00:00:00	1934-01-01 00:00:00	Open	Sydney	False

Plot content dates¶

In [5]:

fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')

View word frequencies¶

In [6]:

# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')

In [7]:

series_details.display_word_counts(title_text)

Out[7]:

	word	count
0	register	6
1	names	6
2	relating	6
3	exemption	6
4	dictation	6
5	tests	6
6	1911-1918	1
7	1918-1925	1
8	1902-1910	1
9	1919-1924	1
10	1925-1934	1
11	1934-1959	1

In [8]:

# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)

	ngram	count
0	register of	6
1	exemption from	6
2	dictation tests	6
3	of names	6
4	names relating	6
5	from dictation	6
6	relating to	6
7	to exemption	6
8	tests 1934-1959	1
9	tests 1918-1925	1
10	1919-1924 register	1
11	1902-1910 register	1
12	1918-1925 register	1
13	1925-1934 register	1
14	tests 1911-1918	1
15	tests 1902-1910	1
16	1911-1918 register	1
17	tests 1925-1934	1
18	tests 1919-1924	1

In [10]: