In [1]:

series = 'A6281'

In [2]:

import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()

In [3]:

df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])

In [4]:

series_details.display_summary(series, df)

National Archives of Australia: Series A6281

Long-playing gramophone records relating to the Royal Commission on Espionage, alpha-numeric series

Total items	17
Access status
Open	11 (64.71%)
Not yet examined	5 (29.41%)
Open with exception	1 (5.88%)
Number of items digitised	0 (0.00%)
Number of pages digitised	0
Date of earliest content	None
Date of latest content	nan

Content preview¶

In [5]:

# Change the number_of_rows value to see more
number_of_rows = 5

# Display dataframe 
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
                       dict(selector='.row_heading, .blank', props=[('display', 'none')])])

Out[5]:

	identifier	series	control_symbol	title	contents_dates	start_date	end_date	access_status	location	digitised_status
0	13187837	A6281	1042788	Conversation Between Dr Michael Bialoguski, Vladimir Petrov And Ron Richards. Conversation Between Dr Michael Bialoguski And Ron Richards. Conversation Between Dr Bialoguski And Vladimir Petrov. - Primary Version	circa1954 - circa1954	NaT	NaT	Open	Various locations	False
1	13187838	A6281	1042796	Conversation between Dr Michael Bialoguski, Vladimir Petrov and Ron Richards. Conversation between Vladimir Petrov and Ron Richards - Primary Version	circa1954 - circa1954	NaT	NaT	Open	Various locations	False
2	13187840	A6281	1042945	Parliamentary Debate On The Royal Commission On Espionage [House Of Representatives, 25 October 1955 - Rg Menzies - Part 2] - Primary Version	circa1955 - circa1955	NaT	NaT	Open	Various locations	False
3	13187841	A6281	1042955	Parliamentary Debate On The Royal Commission On Espionage [House Of Representatives, 25 October 1955 - Ej Ward - Part 2 And Sm Keon] - Primary Version	circa1955 - circa1955	NaT	NaT	Not yet examined	Various locations	False
4	13187854	A6281	1042808	Conversation between Vladimir Petrov and Ron Richards - Primary Version	circa1954 - circa1954	NaT	NaT	Open	Various locations	False

View word frequencies¶

In [7]:

# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')

In [8]:

series_details.display_word_counts(title_text)

Out[8]:

	word	count
8	primary	17
9	version	17
0	conversation	11
19	1955	11
16	representatives	11
15	house	11
5	petrov	11
11	debate	10
14	espionage	10
13	commission	10
12	royal	10
18	october	10
10	parliamentary	10
7	richards	10
22	part	9
6	ron	8
4	vladimir	8
17	25	7
1	dr	5
3	bialoguski	5
23	2	5
2	michael	4
21	menzies	4
35	19	3
39	bourke	3

In [9]:

# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)

	ngram	count
0	primary version	17
1	of representatives	11
2	house of	11
3	conversation between	11
4	commission on	10
5	the royal	10
6	version parliamentary	10
7	october 1955	10
8	on espionage	10
9	debate on	10
10	on the	10
11	royal commission	10
12	parliamentary debate	10
13	espionage house	9
14	vladimir petrov	8
15	ron richards	8
16	representatives 25	7
17	and ron	6
18	25 october	6
19	part 2	5
20	petrov and	5
21	between dr	5
22	version conversation	5
23	michael bialoguski	4
24	dr michael	4

In [ ]: