series = 'A6285'
import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()
df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])
series_details.display_summary(series, df)
Total items | 132 |
---|---|
Access status | |
Open | 83 (62.88%) |
Open with exception | 31 (23.48%) |
Not yet examined | 17 (12.88%) |
Withheld pending agency advice | 1 (0.76%) |
Number of items digitised | 110 (83.33%) |
Number of pages digitised | 186 |
Date of earliest content | 1954 |
Date of latest content | 1955 |
# Change the number_of_rows value to see more
number_of_rows = 5
# Display dataframe
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
dict(selector='.row_heading, .blank', props=[('display', 'none')])])
identifier | series | control_symbol | title | contents_dates | start_date | end_date | access_status | location | digitised_status | digitised_pages | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4185820 | A6285 | 3 | Photographs designation 'C' and 35mm Ilford FP3. Frames 2-38 designated 'C' | 1954 - 1954 | 1954-01-01 00:00:00 | 1954-01-01 00:00:00 | Not yet examined | Canberra | False | 0 |
1 | 4185821 | A6285 | 4 | Photographs designated 'D' and 35mm Ilford FP3. Negative 'D' | 1954 - 1954 | 1954-01-01 00:00:00 | 1954-01-01 00:00:00 | Not yet examined | Canberra | False | 0 |
2 | 4185822 | A6285 | 5 | 35mm negative of original statements made to the Royal Commission on Espionage by Vladimir Petrov after being granted political asylum in Australia [2 pages] | 1954 - 1954 | 1954-01-01 00:00:00 | 1954-01-01 00:00:00 | Open | Canberra | True | 2 |
3 | 4185824 | A6285 | 7 | 35mm negative original Petrov statements | 1954 - 1955 | 1954-01-01 00:00:00 | 1955-01-01 00:00:00 | Not yet examined | Canberra | False | 0 |
4 | 4185825 | A6285 | 8 | 35mm negative original Petrov statements | 1954 - 1955 | 1954-01-01 00:00:00 | 1955-01-01 00:00:00 | Not yet examined | Canberra | False | 0 |
fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')
# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')
series_details.display_word_counts(title_text)
word | count | |
---|---|---|
17 | petrov | 165 |
14 | commission | 109 |
13 | royal | 109 |
15 | espionage | 109 |
72 | photograph | 107 |
73 | presented | 107 |
74 | evidence | 107 |
21 | australia | 99 |
63 | evdokia | 98 |
16 | vladimir | 95 |
94 | defection | 67 |
96 | safe | 65 |
97 | house | 65 |
124 | following | 55 |
123 | held | 55 |
64 | asio | 52 |
122 | wife | 49 |
137 | immediately | 35 |
18 | granted | 32 |
19 | political | 32 |
20 | asylum | 32 |
78 | richards | 28 |
77 | ron | 27 |
65 | officers | 26 |
121 | verandah | 25 |
# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)
ngram | count | |
---|---|---|
0 | royal commission | 109 |
1 | the royal | 109 |
2 | on espionage | 109 |
3 | commission on | 109 |
4 | to the | 109 |
5 | photograph presented | 107 |
6 | presented as | 107 |
7 | evidence to | 107 |
8 | as evidence | 107 |
9 | australia photograph | 97 |
10 | vladimir petrov | 95 |
11 | espionage vladimir | 70 |
12 | to australia | 67 |
13 | defection to | 67 |
14 | evdokia petrov | 62 |
15 | their defection | 59 |
16 | were held | 55 |
17 | following their | 55 |
18 | wife evdokia | 49 |
19 | and his | 49 |
20 | his wife | 49 |
21 | safe house | 48 |
22 | he and | 47 |
23 | of the | 41 |
24 | the safe | 40 |