In [1]:

series = 'A6335'

In [2]:

import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()

In [3]:

df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])

In [4]:

series_details.display_summary(series, df)

National Archives of Australia: Series A6335

Miscellaneous security files inherited by the Australian Security Intelligence Organization

Total items	42
Access status
Open	38 (90.48%)
Open with exception	4 (9.52%)
Number of items digitised	25 (59.52%)
Number of pages digitised	2,607
Date of earliest content	1922
Date of latest content	1956

Content preview¶

In [5]:

# Change the number_of_rows value to see more
number_of_rows = 5

# Display dataframe 
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
                       dict(selector='.row_heading, .blank', props=[('display', 'none')])])

Out[5]:

	identifier	series	control_symbol	title	contents_dates	start_date	end_date	access_status	location	digitised_status	digitised_pages
0	241500	A6335	1	New South Wales Police Organisation - ramifications and branches of "The National Socialist German Workers Party" in Australia and Germany	1940 - 1940	1940-01-01 00:00:00	1940-01-01 00:00:00	Open	Canberra	True	125
1	241501	A6335	2	National Socialist Spheres of influence in NSW	1940 - 1940	1940-01-01 00:00:00	1940-01-01 00:00:00	Open	Canberra	True	180
2	241502	A6335	3	Australia First Movement (The Publicist)	1939 - 1942	1939-01-01 00:00:00	1942-01-01 00:00:00	Open	Canberra	True	105
3	272220	A6335	4	Australia - Soviet Friendship League.	1942 - 1942	1942-01-01 00:00:00	1942-01-01 00:00:00	Open	Canberra	False	0
4	272221	A6335	5	Christian Socialist Movement, Sydney.	1936 - 1943	1936-01-01 00:00:00	1943-01-01 00:00:00	Open	Canberra	False	0

Plot content dates¶

In [6]:

fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')

View word frequencies¶

In [7]:

# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')

In [8]:

series_details.display_word_counts(title_text)

Out[8]:

	word	count
28	communist	8
11	party	5
12	australia	5
34	activities	4
1	south	4
30	communism	4
22	league	4
8	socialist	3
2	wales	3
0	new	3
54	2	2
90	council	2
21	friendship	2
52	volume	2
51	horn	2
33	organisations	2
32	services	2
31	fighting	2
50	irvan	2
49	james	2
24	sydney	2
85	intelligence	2
20	soviet	2
16	nsw	2
73	1942	2

In [9]:

# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)

	ngram	count
0	communist party	4
1	in the	3
2	new south	3
3	south wales	3
4	fighting services	2
5	national socialist	2
6	irvan horn	2
7	the fighting	2
8	horn volume	2
9	friendship league	2
10	communism in	2
11	james irvan	2
12	of the	2
13	roman catholic	1
14	socialist spheres	1
15	council for	1
16	organisations activities	1
17	tribune general	1
18	column roman	1
19	879/52 friends	1
20	mcleod broken	1
21	branches of	1
22	sydney accommodation	1
23	australia-soviet friendship	1
24	publications 1930-1950	1

In [ ]: