series = 'A6335'
import os
import pandas as pd
import series_details
import plotly.offline as py
py.init_notebook_mode()
df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])
series_details.display_summary(series, df)
Total items | 42 |
---|---|
Access status | |
Open | 38 (90.48%) |
Open with exception | 4 (9.52%) |
Number of items digitised | 25 (59.52%) |
Number of pages digitised | 2,607 |
Date of earliest content | 1922 |
Date of latest content | 1956 |
# Change the number_of_rows value to see more
number_of_rows = 5
# Display dataframe
df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector="th", props=[("text-align", "center")]),
dict(selector='.row_heading, .blank', props=[('display', 'none')])])
identifier | series | control_symbol | title | contents_dates | start_date | end_date | access_status | location | digitised_status | digitised_pages | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 241500 | A6335 | 1 | New South Wales Police Organisation - ramifications and branches of "The National Socialist German Workers Party" in Australia and Germany | 1940 - 1940 | 1940-01-01 00:00:00 | 1940-01-01 00:00:00 | Open | Canberra | True | 125 |
1 | 241501 | A6335 | 2 | National Socialist Spheres of influence in NSW | 1940 - 1940 | 1940-01-01 00:00:00 | 1940-01-01 00:00:00 | Open | Canberra | True | 180 |
2 | 241502 | A6335 | 3 | Australia First Movement (The Publicist) | 1939 - 1942 | 1939-01-01 00:00:00 | 1942-01-01 00:00:00 | Open | Canberra | True | 105 |
3 | 272220 | A6335 | 4 | Australia - Soviet Friendship League. | 1942 - 1942 | 1942-01-01 00:00:00 | 1942-01-01 00:00:00 | Open | Canberra | False | 0 |
4 | 272221 | A6335 | 5 | Christian Socialist Movement, Sydney. | 1936 - 1943 | 1936-01-01 00:00:00 | 1943-01-01 00:00:00 | Open | Canberra | False | 0 |
fig = series_details.plot_dates(df)
py.iplot(fig, filename='series-dates-bar')
# Combine all of the file titles into a single string
title_text = a = df['title'].str.lower().str.cat(sep=' ')
series_details.display_word_counts(title_text)
word | count | |
---|---|---|
28 | communist | 8 |
11 | party | 5 |
12 | australia | 5 |
34 | activities | 4 |
1 | south | 4 |
30 | communism | 4 |
22 | league | 4 |
8 | socialist | 3 |
2 | wales | 3 |
0 | new | 3 |
54 | 2 | 2 |
90 | council | 2 |
21 | friendship | 2 |
52 | volume | 2 |
51 | horn | 2 |
33 | organisations | 2 |
32 | services | 2 |
31 | fighting | 2 |
50 | irvan | 2 |
49 | james | 2 |
24 | sydney | 2 |
85 | intelligence | 2 |
20 | soviet | 2 |
16 | nsw | 2 |
73 | 1942 | 2 |
# Change ngram_count for larger ngrams (trigrams etc)
ngram_count = 2
series_details.display_top_ngrams(title_text, ngram_count)
ngram | count | |
---|---|---|
0 | communist party | 4 |
1 | in the | 3 |
2 | new south | 3 |
3 | south wales | 3 |
4 | fighting services | 2 |
5 | national socialist | 2 |
6 | irvan horn | 2 |
7 | the fighting | 2 |
8 | horn volume | 2 |
9 | friendship league | 2 |
10 | communism in | 2 |
11 | james irvan | 2 |
12 | of the | 2 |
13 | roman catholic | 1 |
14 | socialist spheres | 1 |
15 | council for | 1 |
16 | organisations activities | 1 |
17 | tribune general | 1 |
18 | column roman | 1 |
19 | 879/52 friends | 1 |
20 | mcleod broken | 1 |
21 | branches of | 1 |
22 | sydney accommodation | 1 |
23 | australia-soviet friendship | 1 |
24 | publications 1930-1950 | 1 |