In [1]:

series = 'D596'

In [2]:

import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()

In [3]:

df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])

In [4]:

series_details.display_summary(series, df)

National Archives of Australia: Series D596

Correspondence files, annual single number series

Total items	11,395
Access status
Not yet examined	8,381 (73.55%)
Open	2,983 (26.18%)
Open with exception	31 (0.27%)
Number of items digitised	185 (1.62%)
Number of pages digitised	3,031
Date of earliest content	1871
Date of latest content	1971

Download the complete CSV file

Content preview¶

In [4]:

# Change the number_of_rows value to see more
number_of_rows = 5

# Display dataframe 
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
                       dict(selector='.row_heading, .blank', props=[('display', 'none')])])

Out[4]:

	identifier	series	control_symbol	title	contents_dates	start_date	end_date	access_status	location	digitised_status
0	319709	D596	1902/647	Immigration Restriction Act - Domicile Certificate	1902 - 1902	1902-01-01 00:00:00	1902-01-01 00:00:00	Open	Adelaide	False
1	319888	D596	1908/5433	Chinese prohibited immigrants	1908 - 1908	1908-01-01 00:00:00	1908-01-01 00:00:00	Open	Adelaide	False
2	320267	D596	1914/5906	War between Great Britain & Turkey - Proclamation	1914 - 1914	1914-01-01 00:00:00	1914-01-01 00:00:00	Open	Adelaide	False
3	320290	D596	1914/6869	Proclamation extending the scope of certain existing proclamations and a certain order in Council connected with the war	1914 - 1914	1914-01-01 00:00:00	1914-01-01 00:00:00	Open	Adelaide	False
4	320382	D596	1916/1544	Public Trustee Herman P ZONDER - enemy shareholder	1917 - 1918	1917-01-01 00:00:00	1918-01-01 00:00:00	Open	Adelaide	False

Plot content dates¶

In [5]:

fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')

View word frequencies¶

In [6]:

# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')

In [7]:

series_details.display_word_counts(title_text)

Out[7]:

	word	count
353	ltd	946
2	act	937
3931	passport	935
376	ss	813
0	immigration	749
241	co	739
3893	classification	717
207	adelaide	601
1388	enquiry	598
112	customs	581
58	report	534
5344	wife	509
423	tariff	480
6	prohibited	450
106	mr	434
206	port	427
50	ex	426
274	duty	413
204	shipping	377
3114	permit	370
666	claim	364
161	australia	353
321	goods	350
289	office	347
298	regarding	336

In [8]:

# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)

	ngram	count
0	enquiry by	537
1	immigration report	497
2	classification of	406
3	by wife	366
4	claim no	325
5	merchant shipping	282
6	shipping act	280
7	co ltd	270
8	clearing office	251
9	office claim	238
10	application for	233
11	port adelaide	209
12	tariff classification	203
13	prohibited publication	199
14	immigration act	190
15	passport enquiry	181
16	landing permit	173
17	at port	169
18	official no	167
19	report mv	154
20	of the	149
21	crew ss	137
22	for duty	132
23	pty ltd	130
24	transfer of	127

In [10]: