In [1]:

series = 'A13828'

In [2]:

import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()

In [3]:

df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])

In [4]:

series_details.display_summary(series, df)

National Archives of Australia: Series A13828

Paper documents related to audiovisual items in A8703, Film and sound recordings, multiple number series.

Total items	12
Access status
Not yet examined	9 (75.00%)
Open	3 (25.00%)
Number of items digitised	0 (0.00%)
Number of pages digitised	0
Date of earliest content	1955
Date of latest content	1974

Content preview¶

In [5]:

# Change the number_of_rows value to see more
number_of_rows = 5

# Display dataframe 
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
                       dict(selector='.row_heading, .blank', props=[('display', 'none')])])

Out[5]:

	identifier	series	control_symbol	title	contents_dates	start_date	end_date	access_status	location	digitised_status
0	30342357	A13828	32/1/65.2	TITLE: Ceskoslovensky Filmovy Tydenik - Czechoslovak Film Weekly Magazine. Number 35 1968, Special Edition 1968 and Number 36 1968 [Papers containing information related to the file item - script]	1968 - 1968	1968-01-01 00:00:00	1968-01-01 00:00:00	Not yet examined	Sydney	False
1	30342399	A13828	32/1/4.3	TITLE: The Lecture [Paper items relating to the file item - Script]	circa1962 - circa1962	NaT	NaT	Not yet examined	Sydney	False
2	60089626	A13828	32/1/49 Volume 2	TITLE: Communist Party of Australia [CPA] South Australia State Conference - Adelaide 18 - 19 March 1972 [Papers containing information related to audiovisual item - Script]	1972 - 1972	1972-01-01 00:00:00	1972-01-01 00:00:00	Not yet examined	Sydney	False
3	60089627	A13828	32/1/56 Volume 2	TITLE: State Conference Communist Party of Australia [CPA] Adelaide - 1974 [Papers containing information related to audiovisual item - Script]	1974 - 1974	1974-01-01 00:00:00	1974-01-01 00:00:00	Open	Sydney	False
4	60089628	A13828	32/1/61 Volume 2	TITLE: South Australian State Conference 28 September 1968 Day 1 [Papers containing information related to audiovisual item - Shot List]	1968 - 1968	1968-01-01 00:00:00	1968-01-01 00:00:00	Open	Sydney	False

Plot content dates¶

In [6]:

fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')

View word frequencies¶

In [7]:

# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')

In [8]:

series_details.display_word_counts(title_text)

Out[8]:

	word	count
0	title	12
19	item	12
16	information	11
15	containing	11
14	papers	11
17	related	11
37	audiovisual	10
31	conference	9
30	state	8
29	south	7
44	shot	6
45	list	6
20	script	5
10	1968	5
27	australia	5
39	australian	5
42	day	4
28	cpa	3
35	march	3
25	communist	3
41	september	3
26	party	3
43	1	2
47	2	2
50	1970	2

In [10]:

# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)

	ngram	count
0	related to	11
1	papers containing	11
2	containing information	11
3	information related	11
4	audiovisual item	10
5	to audiovisual	10
6	state conference	8
7	list title	6
8	item shot	6
9	shot list	6
10	australian state	5
11	south australian	5
12	item script	5
13	title south	5
14	script title	4
15	communist party	3
16	australia cpa	3
17	of australia	3
18	party of	3
19	2 papers	2
20	1 papers	2
21	march 1970	2
22	september 1968	2
23	the file	2
24	1968 day	2

In [ ]: