from pathlib import Path
from collections import defaultdict
import pandas as pd
year_files = defaultdict(list)
dest = Path('./data/education-ni/')
for year_p in dest.iterdir():
try:
year=int(year_p.parts[-1])
for file in year_p.iterdir():
year_files[year].append(file.parts[-1])
except ValueError:
pass
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
year_files = dict(sorted(year_files.items()))
year_files
{2009: ['pre-school-data-0910-3.xlsx', 'post-primary-schools-data-0910-supp-2.xlsx', 'primary-schools-data-0910-supp-r.xlsx', 'special-school-data-0910-supp-3.xlsx', 'nursery-schools-data-0910-supp-3.xlsx'], 2010: ['special-school-data-1011-supp-2.xlsx', 'primary-schools-data-1011-supp-inc-unfilled-r.xlsx', 'post-primary-schools-data-10-11-supp-inc-unfilled.xlsx', 'pre-school-data-1011-3.xlsx', 'nursery-schools-data-2010-11-supp-inc-unfilled.xlsx'], 2011: ['post-primary-schools-data-11-12-supp-inc-unfilled-2.xlsx', 'nursery-schools-data-2011-12-supp-inc-unfilled-2.xlsx', 'primary-schools-data-11-12-supp-inc-unfilled-places-r.xlsx', 'special-school-data-11-12-supp.xlsx', 'pre-school-data-2011-12.xlsx'], 2012: ['nursery-schools-data-2012-13-supp-inc-unfilled-places.xlsx', 'post-primary-schools-data-2012-13-supp-inc-unfilled-places.xlsx', 'primary-schools-data-2012-13-supp-inc-unfilled-places-2.xlsx', 'pre-school-data-2012-13.xlsx', 'special-school-data-2012-13-supp.xlsx'], 2013: ['nursery-schools-data-13-14-supp-incl-unfilled-places.xlsx', 'post-primary-schools-data-13-14-supp-inc-unfilled-places.xlsx', 'special-school-data-2013-14-supp.xlsx', 'primary-schools-data-2013-14-supp-inc-unfilled-places.xlsx', 'pre-school-data-2013-14.xlsx'], 2014: ['School%20level%20-%20nursery%20schools%20data%201415%20supp%20-%20unfilled%20places.XLS', 'post-primary-schools-data-1415-supp-with-unfilled-places.xlsx', 'School%20level%20-%20pre%20school%20data%201415.XLS', 'special-school-data-1415-supp.xlsx', 'primary-schools-data-1415-supp-with-unfilled-places.xlsx'], 2015: ['School%20level%20-%20nursery%20schools%20data%201516%20supp.XLS', 'School%20level%20-%20pre%20school%20data%201516%20supp.XLSX', 'School%20level%20-%20primary%20schools%20data%201516%20supp_0.XLSX', 'School%20level%20-%20special_school_data%201516%20supp.XLSX', 'School-level-post-primary-schools-data-1516_1.XLS'], 2016: ['School%20level%20-%20special_school_data%201617%20supp_0.XLSX', 'School%20level%20-%20post%20primary%20schools%20data%201617%20supp_0.XLS', 'School%20level%20-%20nursery%20schools%20data%201617%20supp.XLS', 'available%20places%20-%20post-primary%201617%20supp.XLSX', 'available%20places%20-%20primary%201617%20supp.XLSX', 'available%20places%20-%20nursery%201617%20supp%20final.XLSX', 'School%20level%20-%20pre%20school%20data%201617%20supp.XLSX', 'School%20level%20-%20primary%20schools%20data%201617%20supp.XLSX'], 2017: ['School%20level%20-%20pre%20school%20data%201718%20supp.xlsx', 'available%20places%20-%20primary%201718%20supp.xlsx', 'available%20places%20-%20post-primary%201718%20supp.xlsx', 'School%20level%20-%20post%20primary%20schools%20data%20supp%201718.xls', 'School%20level%20-%20nursery%20schools%20data%201718%20supp.xlsx', 'available%20places%20-%20nursery%201718%20supp.xlsx', 'School%20level%20-%20special%20school%20data%202017-18_0.xlsx', 'School%20level%20-%20primary%20schools%20data%20supp%201718.xlsx']}
for year, files in year_files.items():
print(year)
2009 2010 2011 2012 2013 2014 2015 2016 2017
sheets = defaultdict(list)
for year, files in year_files.items():
for file in files:
if 'post' in file:
print(file)
post-primary-schools-data-13-14-supp-inc-unfilled-places.xlsx post-primary-schools-data-1415-supp-with-unfilled-places.xlsx School-level-post-primary-schools-data-1516_1.XLS post-primary-schools-data-2012-13-supp-inc-unfilled-places.xlsx post-primary-schools-data-0910-supp-2.xlsx available%20places%20-%20post-primary%201718%20supp.xlsx School%20level%20-%20post%20primary%20schools%20data%20supp%201718.xls post-primary-schools-data-10-11-supp-inc-unfilled.xlsx post-primary-schools-data-11-12-supp-inc-unfilled-2.xlsx School%20level%20-%20post%20primary%20schools%20data%201617%20supp_0.XLS available%20places%20-%20post-primary%201617%20supp.XLSX
xls = pd.ExcelFile(f'data/education-ni/2017/School%20level%20-%20post%20primary%20schools%20data%20supp%201718.xls')
df = pd.read_excel(xls, 'reference data', header=None)
df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Data on Pupils attending post primary schools ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN | school | parliamentary | district | urban/ |
3 | De ref | school name | address 1 | town | postcode | school type | management type | constituency | council | rural |
4 | 1210014 | Ashfield Girls' High School | 397 HOLYWOOD ROAD | BELFAST | BT42LY | Non-grammar | Controlled | BELFAST EAST | BELFAST | Urban |
join_n_strip = lambda l: ' '.join(l).strip()
df.ix[1:3].fillna('').apply(join_n_strip, axis=0)
0 De ref 1 school name 2 address 1 3 town 4 postcode 5 school type 6 school management type 7 parliamentary constituency 8 district council 9 urban/ rural dtype: object
df.columns=df.ix[1:3].fillna('').apply(join_n_strip, axis=0).values
df
De ref | school name | address 1 | town | postcode | school type | school management type | parliamentary constituency | district council | urban/ rural | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Data on Pupils attending post primary schools ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN | school | parliamentary | district | urban/ |
3 | De ref | school name | address 1 | town | postcode | school type | management type | constituency | council | rural |
4 | 1210014 | Ashfield Girls' High School | 397 HOLYWOOD ROAD | BELFAST | BT42LY | Non-grammar | Controlled | BELFAST EAST | BELFAST | Urban |
5 | 1210015 | Ashfield Boys' High School | 395 HOLYWOOD ROAD | BELFAST | BT42LY | Non-grammar | Controlled | BELFAST EAST | BELFAST | Urban |
6 | 1210021 | Belfast Model School For Girls | 35 DUNOWEN GARDENS | BELFAST | BT146NQ | Non-grammar | Controlled | BELFAST NORTH | BELFAST | Urban |
7 | 1210022 | Belfast Boys' Model School | BALLYSILLAN ROAD | BELFAST | BT146RB | Non-grammar | Controlled | BELFAST NORTH | BELFAST | Urban |
8 | 1230053 | St Louise's Comprehensive College | 468 FALLS ROAD | BELFAST | BT126EN | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
9 | 1230104 | Mercy College Belfast | BALLYSILLAN ROAD | BELFAST | BT147QR | Non-grammar | Catholic Maintained | BELFAST NORTH | BELFAST | Urban |
10 | 1230130 | St Rose’s Dominican College | 65 BEECHMOUNT AVENUE | BELFAST | BT127NA | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
11 | 1230146 | Christian Brothers School, Belfast | GLEN ROAD | BELFAST | BT118BW | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
12 | 1230155 | St Genevieve's High School | TRENCH HOUSE | BELFAST | BT119JP | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
13 | 1230182 | De La Salle College | 36 EDENMORE DRIVE | BELFAST | BT118LT | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
14 | 1230262 | Corpus Christi College | ARD NA VA ROAD | BELFAST | BT126FF | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
15 | 1230275 | St Joseph's College, Belfast | 518-572 RAVENHILL ROAD | BELFAST | BT60BY | Non-grammar | Catholic Maintained | BELFAST SOUTH | BELFAST | Urban |
16 | 1230321 | Blessed Trinity College | 619 ANTRIM ROAD | BELFAST | BT154DZ | Non-grammar | Catholic Maintained | BELFAST NORTH | BELFAST | Urban |
17 | 1240291 | Colaiste Feirste | 7 BEECHVIEW PARK | BELFAST | BT127PY | Non-grammar | Other Maintained | BELFAST WEST | BELFAST | Urban |
18 | 1260269 | Hazelwood College | 70 WHITEWELL ROAD | NEWTOWNABBEY | BT367ES | Non-grammar | GMI | BELFAST NORTH | BELFAST | Urban |
19 | 1260294 | Malone Integrated College | 45 FINAGHY ROAD NORTH | BELFAST | BT100JB | Non-grammar | GMI | BELFAST SOUTH | BELFAST | Urban |
20 | 1410079 | Grosvenor Grammar School | 50 MARINA PARK | BELFAST | BT56BA | Grammar | Controlled | BELFAST EAST | BELFAST | Urban |
21 | 1410270 | Wellington College | 18 CAROLAN ROAD | BELFAST | BT73HE | Grammar | Controlled | BELFAST SOUTH | BELFAST | Urban |
22 | 1410315 | Bloomfield Collegiate | 8 ASTORIA GARDENS | BELFAST | BT56HW | Grammar | Controlled | BELFAST EAST | BELFAST | Urban |
23 | 1420020 | Campbell College | BELMONT ROAD | BELFAST | BT42ND | Grammar | Voluntary | BELFAST EAST | BELFAST | Urban |
24 | 1420021 | St Mary's Christian Brothers' Grammar, Belfast | 147A GLEN ROAD | BELFAST | BT118NR | Grammar | Voluntary | BELFAST WEST | BELFAST | Urban |
25 | 1420022 | Methodist College | 1 MALONE ROAD | BELFAST | BT96BY | Grammar | Voluntary | BELFAST SOUTH | BELFAST | Urban |
26 | 1420027 | The Royal Belfast Academical Institution | COLLEGE SQUARE EAST | BELFAST | BT16DL | Grammar | Voluntary | BELFAST SOUTH | BELFAST | Urban |
27 | 1420028 | Belfast Royal Academy | 5-17 CLIFTONVILLE ROAD | BELFAST | BT146JL | Grammar | Voluntary | BELFAST NORTH | BELFAST | Urban |
28 | 1420029 | St Dominic's High School, Belfast | 135-137 FALLS ROAD | BELFAST | BT126AE | Grammar | Voluntary | BELFAST WEST | BELFAST | Urban |
29 | 1420030 | St Malachy's College, Belfast | 36 ANTRIM ROAD | BELFAST | BT152AE | Grammar | Voluntary | BELFAST NORTH | BELFAST | Urban |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
173 | 5230076 | St Patrick's College, Banbridge | 38 SCARVA ROAD | BANBRIDGE | BT323AS | Non-grammar | Catholic Maintained | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
174 | 5230108 | St Mary's High School, Newry | UPPER CHAPEL STREET | NEWRY | BT342DT | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Urban |
175 | 5230135 | St Mark's High School | UPPER DROMORE ROAD | NEWRY | BT343PN | Non-grammar | Catholic Maintained | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
176 | 5230152 | St Ciaran's High School | 15 TULLYBRYAN ROAD | DUNGANNON | BT702LY | Non-grammar | Catholic Maintained | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Rural |
177 | 5230157 | St Paul's High School, Bessbrook | 108 CAMLOUGH ROAD | NEWRY | BT357EE | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Rural |
178 | 5230167 | St Joseph's High School, Crossmaglen | 77 DUNDALK ROAD | NEWRY | BT359HL | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Rural |
179 | 5230187 | St Patrick's High School, Keady | MIDDLETOWN ROAD | KEADY | BT603TH | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Rural |
180 | 5230192 | St Joseph's College, Coalisland | 29 SCHOOL LANE | DUNGANNON | BT714NW | Non-grammar | Catholic Maintained | MID ULSTER | MID ULSTER | Urban |
181 | 5230213 | Lismore Comprehensive School | DRUMGASK | CRAIGAVON | BT655DU | Non-grammar | Catholic Maintained | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
182 | 5230218 | St Catherine's College | 2A CONVENT ROAD | ARMAGH | BT604BG | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
183 | 5230278 | Holy Trinity College | CHAPEL STREET | COOKSTOWN | BT808QB | Non-grammar | Catholic Maintained | MID ULSTER | MID ULSTER | Urban |
184 | 5230293 | St Patrick's College, Dungannon | 41 KILLYMEAL ROAD | DUNGANNON | BT716LJ | Non-grammar | Catholic Maintained | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
185 | 5230321 | St John The Baptist College | 4 MOY ROAD | PORTADOWN | BT621QQ | Secondary | Catholic Maintained | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
186 | 5250216 | Brownlow Int College | TULLYGALLY ROAD | CRAIGAVON | BT655BS | Non-grammar | Controlled Integrated | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
187 | 5260285 | New-Bridge Integrated College | 25 DONARD VIEW ROAD | BANBRIDGE | BT323LN | Non-grammar | GMI | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Rural |
188 | 5260286 | Integrated College Dungannon | 21 GORTMERRON LINK ROAD | DUNGANNON | BT716LS | Non-grammar | GMI | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
189 | 5410013 | Banbridge Academy | LURGAN ROAD | BANBRIDGE | BT324AQ | Grammar | Controlled | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
190 | 5410057 | Lurgan College | 9 COLLEGE WALK | CRAIGAVON | BT666JW | Grammar | Controlled | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
191 | 5410067 | Portadown College | 4 KILLYCOMAINE ROAD | CRAIGAVON | BT635BU | Grammar | Controlled | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
192 | 5420045 | St Louis Grammar School, Kilkeel | 151 NEWRY ROAD | KILKEEL | BT344EU | Grammar | Voluntary | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
193 | 5420059 | Abbey Christian Brothers Grammar School | 77A ASHGROVE ROAD | NEWRY | BT342QN | Grammar | Voluntary | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
194 | 5420060 | Our Lady's Grammar School | CHEQUER HILL | NEWRY | BT356DY | Grammar | Voluntary | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Urban |
195 | 5420062 | St Colman's College, Newry | 46 ARMAGH ROAD | NEWRY | BT356PP | Grammar | Voluntary | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Urban |
196 | 5420073 | St Joseph's Convent Grammar School, Donaghmore | 58 CASTLECAULFIELD ROAD | DUNGANNON | BT703HE | Grammar | Voluntary | MID ULSTER | MID ULSTER | Rural |
197 | 5420076 | Sacred Heart Grammar School, Newry | 10 ASHGROVE AVENUE | NEWRY | BT341PR | Grammar | Voluntary | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
198 | 5420260 | The Royal School, Dungannon | 2 RANFURLEY ROAD | DUNGANNON | BT716AP | Grammar | Voluntary | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
199 | 5420263 | The Royal School, Armagh | COLLEGE HILL | ARMAGH | BT619DH | Grammar | Voluntary | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
200 | 5420268 | St Patrick's Grammar School, Armagh | CATHEDRAL ROAD | ARMAGH | BT617QZ | Grammar | Voluntary | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
201 | 5420304 | St Patrick's Academy, Dungannon | 37 KILLYMEAL ROAD | DUNGANNON | BT716DS | Grammar | Voluntary | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
202 | 5420314 | St Ronan’s College | 12 CORNAKINEGAR ROAD | LURGAN | BT679JW | Grammar | Voluntary | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
203 rows × 10 columns
df = df.drop(df.index[0:4]).reset_index(drop=True)
df
De ref | school name | address 1 | town | postcode | school type | school management type | parliamentary constituency | district council | urban/ rural | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1210014 | Ashfield Girls' High School | 397 HOLYWOOD ROAD | BELFAST | BT42LY | Non-grammar | Controlled | BELFAST EAST | BELFAST | Urban |
1 | 1210015 | Ashfield Boys' High School | 395 HOLYWOOD ROAD | BELFAST | BT42LY | Non-grammar | Controlled | BELFAST EAST | BELFAST | Urban |
2 | 1210021 | Belfast Model School For Girls | 35 DUNOWEN GARDENS | BELFAST | BT146NQ | Non-grammar | Controlled | BELFAST NORTH | BELFAST | Urban |
3 | 1210022 | Belfast Boys' Model School | BALLYSILLAN ROAD | BELFAST | BT146RB | Non-grammar | Controlled | BELFAST NORTH | BELFAST | Urban |
4 | 1230053 | St Louise's Comprehensive College | 468 FALLS ROAD | BELFAST | BT126EN | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
5 | 1230104 | Mercy College Belfast | BALLYSILLAN ROAD | BELFAST | BT147QR | Non-grammar | Catholic Maintained | BELFAST NORTH | BELFAST | Urban |
6 | 1230130 | St Rose’s Dominican College | 65 BEECHMOUNT AVENUE | BELFAST | BT127NA | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
7 | 1230146 | Christian Brothers School, Belfast | GLEN ROAD | BELFAST | BT118BW | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
8 | 1230155 | St Genevieve's High School | TRENCH HOUSE | BELFAST | BT119JP | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
9 | 1230182 | De La Salle College | 36 EDENMORE DRIVE | BELFAST | BT118LT | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
10 | 1230262 | Corpus Christi College | ARD NA VA ROAD | BELFAST | BT126FF | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
11 | 1230275 | St Joseph's College, Belfast | 518-572 RAVENHILL ROAD | BELFAST | BT60BY | Non-grammar | Catholic Maintained | BELFAST SOUTH | BELFAST | Urban |
12 | 1230321 | Blessed Trinity College | 619 ANTRIM ROAD | BELFAST | BT154DZ | Non-grammar | Catholic Maintained | BELFAST NORTH | BELFAST | Urban |
13 | 1240291 | Colaiste Feirste | 7 BEECHVIEW PARK | BELFAST | BT127PY | Non-grammar | Other Maintained | BELFAST WEST | BELFAST | Urban |
14 | 1260269 | Hazelwood College | 70 WHITEWELL ROAD | NEWTOWNABBEY | BT367ES | Non-grammar | GMI | BELFAST NORTH | BELFAST | Urban |
15 | 1260294 | Malone Integrated College | 45 FINAGHY ROAD NORTH | BELFAST | BT100JB | Non-grammar | GMI | BELFAST SOUTH | BELFAST | Urban |
16 | 1410079 | Grosvenor Grammar School | 50 MARINA PARK | BELFAST | BT56BA | Grammar | Controlled | BELFAST EAST | BELFAST | Urban |
17 | 1410270 | Wellington College | 18 CAROLAN ROAD | BELFAST | BT73HE | Grammar | Controlled | BELFAST SOUTH | BELFAST | Urban |
18 | 1410315 | Bloomfield Collegiate | 8 ASTORIA GARDENS | BELFAST | BT56HW | Grammar | Controlled | BELFAST EAST | BELFAST | Urban |
19 | 1420020 | Campbell College | BELMONT ROAD | BELFAST | BT42ND | Grammar | Voluntary | BELFAST EAST | BELFAST | Urban |
20 | 1420021 | St Mary's Christian Brothers' Grammar, Belfast | 147A GLEN ROAD | BELFAST | BT118NR | Grammar | Voluntary | BELFAST WEST | BELFAST | Urban |
21 | 1420022 | Methodist College | 1 MALONE ROAD | BELFAST | BT96BY | Grammar | Voluntary | BELFAST SOUTH | BELFAST | Urban |
22 | 1420027 | The Royal Belfast Academical Institution | COLLEGE SQUARE EAST | BELFAST | BT16DL | Grammar | Voluntary | BELFAST SOUTH | BELFAST | Urban |
23 | 1420028 | Belfast Royal Academy | 5-17 CLIFTONVILLE ROAD | BELFAST | BT146JL | Grammar | Voluntary | BELFAST NORTH | BELFAST | Urban |
24 | 1420029 | St Dominic's High School, Belfast | 135-137 FALLS ROAD | BELFAST | BT126AE | Grammar | Voluntary | BELFAST WEST | BELFAST | Urban |
25 | 1420030 | St Malachy's College, Belfast | 36 ANTRIM ROAD | BELFAST | BT152AE | Grammar | Voluntary | BELFAST NORTH | BELFAST | Urban |
26 | 1420082 | Dominican College, Belfast | 38 FORTWILLIAM PARK | BELFAST | BT154AQ | Grammar | Voluntary | BELFAST NORTH | BELFAST | Urban |
27 | 1420089 | Strathearn School, Belfast | 188 BELMONT ROAD | BELFAST | BT42AU | Grammar | Voluntary | BELFAST EAST | BELFAST | Urban |
28 | 1420095 | Rathmore Grammar School | KINGSWAY | BELFAST | BT100LF | Grammar | Voluntary | BELFAST SOUTH | BELFAST | Urban |
29 | 1420264 | Victoria College | CRANMORE PARK | BELFAST | BT96JA | Grammar | Voluntary | BELFAST SOUTH | BELFAST | Urban |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
169 | 5230076 | St Patrick's College, Banbridge | 38 SCARVA ROAD | BANBRIDGE | BT323AS | Non-grammar | Catholic Maintained | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
170 | 5230108 | St Mary's High School, Newry | UPPER CHAPEL STREET | NEWRY | BT342DT | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Urban |
171 | 5230135 | St Mark's High School | UPPER DROMORE ROAD | NEWRY | BT343PN | Non-grammar | Catholic Maintained | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
172 | 5230152 | St Ciaran's High School | 15 TULLYBRYAN ROAD | DUNGANNON | BT702LY | Non-grammar | Catholic Maintained | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Rural |
173 | 5230157 | St Paul's High School, Bessbrook | 108 CAMLOUGH ROAD | NEWRY | BT357EE | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Rural |
174 | 5230167 | St Joseph's High School, Crossmaglen | 77 DUNDALK ROAD | NEWRY | BT359HL | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Rural |
175 | 5230187 | St Patrick's High School, Keady | MIDDLETOWN ROAD | KEADY | BT603TH | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Rural |
176 | 5230192 | St Joseph's College, Coalisland | 29 SCHOOL LANE | DUNGANNON | BT714NW | Non-grammar | Catholic Maintained | MID ULSTER | MID ULSTER | Urban |
177 | 5230213 | Lismore Comprehensive School | DRUMGASK | CRAIGAVON | BT655DU | Non-grammar | Catholic Maintained | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
178 | 5230218 | St Catherine's College | 2A CONVENT ROAD | ARMAGH | BT604BG | Non-grammar | Catholic Maintained | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
179 | 5230278 | Holy Trinity College | CHAPEL STREET | COOKSTOWN | BT808QB | Non-grammar | Catholic Maintained | MID ULSTER | MID ULSTER | Urban |
180 | 5230293 | St Patrick's College, Dungannon | 41 KILLYMEAL ROAD | DUNGANNON | BT716LJ | Non-grammar | Catholic Maintained | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
181 | 5230321 | St John The Baptist College | 4 MOY ROAD | PORTADOWN | BT621QQ | Secondary | Catholic Maintained | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
182 | 5250216 | Brownlow Int College | TULLYGALLY ROAD | CRAIGAVON | BT655BS | Non-grammar | Controlled Integrated | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
183 | 5260285 | New-Bridge Integrated College | 25 DONARD VIEW ROAD | BANBRIDGE | BT323LN | Non-grammar | GMI | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Rural |
184 | 5260286 | Integrated College Dungannon | 21 GORTMERRON LINK ROAD | DUNGANNON | BT716LS | Non-grammar | GMI | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
185 | 5410013 | Banbridge Academy | LURGAN ROAD | BANBRIDGE | BT324AQ | Grammar | Controlled | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
186 | 5410057 | Lurgan College | 9 COLLEGE WALK | CRAIGAVON | BT666JW | Grammar | Controlled | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
187 | 5410067 | Portadown College | 4 KILLYCOMAINE ROAD | CRAIGAVON | BT635BU | Grammar | Controlled | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
188 | 5420045 | St Louis Grammar School, Kilkeel | 151 NEWRY ROAD | KILKEEL | BT344EU | Grammar | Voluntary | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
189 | 5420059 | Abbey Christian Brothers Grammar School | 77A ASHGROVE ROAD | NEWRY | BT342QN | Grammar | Voluntary | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
190 | 5420060 | Our Lady's Grammar School | CHEQUER HILL | NEWRY | BT356DY | Grammar | Voluntary | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Urban |
191 | 5420062 | St Colman's College, Newry | 46 ARMAGH ROAD | NEWRY | BT356PP | Grammar | Voluntary | NEWRY AND ARMAGH | NEWRY MOURNE AND DOWN | Urban |
192 | 5420073 | St Joseph's Convent Grammar School, Donaghmore | 58 CASTLECAULFIELD ROAD | DUNGANNON | BT703HE | Grammar | Voluntary | MID ULSTER | MID ULSTER | Rural |
193 | 5420076 | Sacred Heart Grammar School, Newry | 10 ASHGROVE AVENUE | NEWRY | BT341PR | Grammar | Voluntary | SOUTH DOWN | NEWRY MOURNE AND DOWN | Urban |
194 | 5420260 | The Royal School, Dungannon | 2 RANFURLEY ROAD | DUNGANNON | BT716AP | Grammar | Voluntary | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
195 | 5420263 | The Royal School, Armagh | COLLEGE HILL | ARMAGH | BT619DH | Grammar | Voluntary | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
196 | 5420268 | St Patrick's Grammar School, Armagh | CATHEDRAL ROAD | ARMAGH | BT617QZ | Grammar | Voluntary | NEWRY AND ARMAGH | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
197 | 5420304 | St Patrick's Academy, Dungannon | 37 KILLYMEAL ROAD | DUNGANNON | BT716DS | Grammar | Voluntary | FERMANAGH AND SOUTH TYRONE | MID ULSTER | Urban |
198 | 5420314 | St Ronan’s College | 12 CORNAKINEGAR ROAD | LURGAN | BT679JW | Grammar | Voluntary | UPPER BANN | ARMAGH CITY, BANBRIDGE AND CRAIGAVON | Urban |
199 rows × 10 columns
df['district council'].value_counts().plot.pie(autopct='%1.0f%%')
<matplotlib.axes._subplots.AxesSubplot at 0x11348f518>
def parse_reference_table(xls):
if 'reference data' in xls.sheet_names:
df = pd.read_excel(xls, 'reference data', header=None)
join_n_strip = lambda l: ' '.join(l).strip()
df.columns=df.ix[1:3].fillna('').apply(join_n_strip, axis=0).values
df = df.drop(df.index[0:4]).reset_index(drop=True)
else:
df=None
return df
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_reference_table(xls)
if df is not None:
print(f'Got reference data for {year}')
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-61-2c4ac519adc4> in <module>() 13 if 'post' in file: 14 xls = pd.ExcelFile(f'data/education-ni/{year}/{file}') ---> 15 df = parse_reference_table(xls) 16 if df is not None: 17 print(f'Got reference data for {year}') <ipython-input-61-2c4ac519adc4> in parse_reference_table(xls) 3 df = pd.read_excel(xls, 'reference data', header=None) 4 join_n_strip = lambda l: ' '.join(l).strip() ----> 5 df.columns=df.ix[1:3].fillna('').apply(join_n_strip, axis=0).values 6 df = df.drop(df.index[0:4]).reset_index(drop=True) 7 else: ~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds) 4875 f, axis, 4876 reduce=reduce, -> 4877 ignore_failures=ignore_failures) 4878 else: 4879 return self._apply_broadcast(f, axis) ~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _apply_standard(self, func, axis, ignore_failures, reduce) 4971 try: 4972 for i, v in enumerate(series_gen): -> 4973 results[i] = func(v) 4974 keys.append(v.name) 4975 except Exception as e: <ipython-input-61-2c4ac519adc4> in <lambda>(l) 2 if 'reference data' in xls.sheet_names: 3 df = pd.read_excel(xls, 'reference data', header=None) ----> 4 join_n_strip = lambda l: ' '.join(l).strip() 5 df.columns=df.ix[1:3].fillna('').apply(join_n_strip, axis=0).values 6 df = df.drop(df.index[0:4]).reset_index(drop=True) TypeError: ('sequence item 2: expected str instance, int found', 'occurred at index 0')
def parse_reference_table(xls):
if 'reference data' in xls.sheet_names:
df = pd.read_excel(xls, 'reference data', header=None)
join_n_strip = lambda l: ' '.join(l).strip()
try:
df.columns=df.ix[1:3].fillna('').apply(join_n_strip, axis=0).values
df = df.drop(df.index[0:4]).reset_index(drop=True)
except TypeError as e:
print(e)
else:
df=None
return df
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_reference_table(xls)
if df is not None:
print(f'Got reference data for {year}')
('sequence item 2: expected str instance, int found', 'occurred at index 0') Got reference data for 2009 ('sequence item 2: expected str instance, int found', 'occurred at index 0') Got reference data for 2010 ('sequence item 2: expected str instance, int found', 'occurred at index 0') Got reference data for 2011 ('sequence item 2: expected str instance, int found', 'occurred at index 0') Got reference data for 2012 ('sequence item 2: expected str instance, int found', 'occurred at index 0') Got reference data for 2013 Got reference data for 2014 Got reference data for 2015 Got reference data for 2016 Got reference data for 2017
df.head()
De ref | school name | address 1 | town | postcode | school type | school management type | parliamentary constituency | district council | urban/ rural | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1210014 | Ashfield Girls' High School | 397 HOLYWOOD ROAD | BELFAST | BT42LY | Non-grammar | Controlled | BELFAST EAST | BELFAST | Urban |
1 | 1210015 | Ashfield Boys' High School | 395 HOLYWOOD ROAD | BELFAST | BT42LY | Non-grammar | Controlled | BELFAST EAST | BELFAST | Urban |
2 | 1210021 | Belfast Model School For Girls | 35 DUNOWEN GARDENS | BELFAST | BT146NQ | Non-grammar | Controlled | BELFAST NORTH | BELFAST | Urban |
3 | 1210022 | Belfast Boys' Model School | BALLYSILLAN ROAD | BELFAST | BT146RB | Non-grammar | Controlled | BELFAST NORTH | BELFAST | Urban |
4 | 1230053 | St Louise's Comprehensive College | 468 FALLS ROAD | BELFAST | BT126EN | Non-grammar | Catholic Maintained | BELFAST WEST | BELFAST | Urban |
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
def parse_reference_table(xls):
if 'reference data' in xls.sheet_names:
df = pd.read_excel(xls, 'reference data', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[1:h_range].fillna('').apply(join_n_strip_n_lower, axis=0).values
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_reference_table(xls)
if df is not None:
print(f'Got reference data for {year}')
dfs[year]=df
Got reference data for 2009 Got reference data for 2010 Got reference data for 2011 Got reference data for 2012 Got reference data for 2013 Got reference data for 2014 Got reference data for 2015 Got reference data for 2016 Got reference data for 2017
import seaborn as sns
all_keys = set([k for df in dfs.values() for k in df.keys() ])
df = pd.DataFrame.from_dict({
year: [k in df.keys() for k in all_keys]
for year, df in dfs.items()
}, orient='index')
df.columns=all_keys
sns.heatmap(df.T)
<matplotlib.axes._subplots.AxesSubplot at 0x112c6ef28>
ref_key_map={
'denino':'de ref',
'urban/ rural': 'urban_rural',
'schoolname': 'school name'
}
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
def parse_reference_table(xls):
if 'reference data' in xls.sheet_names:
df = pd.read_excel(xls, 'reference data', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[1:h_range].fillna('').apply(join_n_strip_n_lower, axis=0).values
df.rename(columns=ref_key_map, inplace=True)
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_reference_table(xls)
if df is not None:
print(f'Got reference data for {year}')
dfs[year]=df
all_keys = set([k for df in dfs.values() for k in df.keys() ])
df = pd.DataFrame.from_dict({
year: [k in df.keys() for k in all_keys]
for year, df in dfs.items()
}, orient='index')
df.columns=all_keys
sns.heatmap(df.T)
Got reference data for 2009 Got reference data for 2010 Got reference data for 2011 Got reference data for 2012 Got reference data for 2013 Got reference data for 2014 Got reference data for 2015 Got reference data for 2016 Got reference data for 2017
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e3292e8>
cols = [k for k,v in zip(df.columns, df.all()) if v]
cols
['parliamentary constituency', 'urban_rural', 'school name', 'school type', 'address 1', 'town', 'postcode', 'school management type', 'de ref', 'district council']
def parse_reference_table(xls):
cols = [
'de ref',
'school name',
'school type',
'address 1',
'postcode',
'urban_rural',
'school management type',
'district council',
'parliamentary constituency',
'town'
]
ref_key_map={
'denino':'de ref',
'urban/ rural': 'urban_rural',
'schoolname': 'school name'
}
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
if 'reference data' in xls.sheet_names:
df = pd.read_excel(xls, 'reference data', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[1:h_range].fillna('').apply(join_n_strip_n_lower, axis=0).values
df.rename(columns=ref_key_map, inplace=True)
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df = df[cols]
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_reference_table(xls)
if df is not None:
print(f'Got reference data for {year}')
dfs[year]=df
all_keys = set([k for df in dfs.values() for k in df.keys() ])
df = pd.DataFrame.from_dict({
year: [k in df.keys() for k in all_keys]
for year, df in dfs.items()
}, orient='index')
df.columns=all_keys
sns.heatmap(df.T)
Got reference data for 2009 Got reference data for 2010 Got reference data for 2011 Got reference data for 2012 Got reference data for 2013 Got reference data for 2014 Got reference data for 2015 Got reference data for 2016 Got reference data for 2017
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e0a9588>
sheets = defaultdict(list)
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
sheets[year].extend(xls.sheet_names)
all_sheet_names = set([_ for d in sheets.values() for _ in d])
df = pd.DataFrame.from_dict({
year: [sn in sheet_names for sn in all_sheet_names]
for year, sheet_names in sheets.items()
}, orient='index')
df.columns=all_sheet_names
sns.heatmap(df.T)
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e0f39e8>
df.keys()
Index(['meta data', 'gender', 'unfilled places', 'SEN', 'enrolments', 'School level data', 'Overall', 'newcomers', 'religion', 'free school meals', 'reference data'], dtype='object')
xls = pd.ExcelFile(f'data/education-ni/2017/School%20level%20-%20post%20primary%20schools%20data%20supp%201718.xls')
df = pd.read_excel(xls, 'enrolments', header=None)
df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Data on Pupils attending post primary schools ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | year of enrolment | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | DE ref | Schoolname | year 8 | year 9 | year 10 | year 11 | year 12 | year 13 | year 14 | total pupils |
4 | 1210014 | Ashfield Girls' High School | 124 | 114 | 119 | 111 | 113 | 60 | 50 | 691 |
def parse_enrolments_table(xls):
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
if 'enrolments' in xls.sheet_names:
df = pd.read_excel(xls, 'enrolments', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[1:h_range].fillna('').apply(join_n_strip_n_lower, axis=0).values
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
except TypeError as e:
print(e)
else:
df=None
return df
df= parse_enrolments_table(xls)
df
de ref | schoolname | year of enrolment year 8 | year 9 | year 10 | year 11 | year 12 | year 13 | year 14 | total pupils | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1210014 | Ashfield Girls' High School | 124 | 114 | 119 | 111 | 113 | 60 | 50 | 691 |
1 | 1210015 | Ashfield Boys' High School | 126 | 121 | 117 | 121 | 111 | 84 | 60 | 740 |
2 | 1210021 | Belfast Model School For Girls | 167 | 160 | 162 | 139 | 121 | 127 | 70 | 946 |
3 | 1210022 | Belfast Boys' Model School | 174 | 179 | 165 | 154 | 160 | 89 | 76 | 997 |
4 | 1230053 | St Louise's Comprehensive College | 236 | 194 | 189 | 206 | 264 | 160 | 186 | 1435 |
5 | 1230104 | Mercy College Belfast | 76 | 66 | 71 | 76 | 73 | 71 | 17 | 450 |
6 | 1230130 | St Rose’s Dominican College | 16 | 25 | 32 | 37 | 37 | 72 | 24 | 243 |
7 | 1230146 | Christian Brothers School, Belfast | 107 | 70 | 95 | 47 | 40 | 87 | 79 | 525 |
8 | 1230155 | St Genevieve's High School | 164 | 161 | 152 | 152 | 165 | 138 | 90 | 1022 |
9 | 1230182 | De La Salle College | 109 | 165 | 164 | 161 | 173 | 119 | 67 | 958 |
10 | 1230262 | Corpus Christi College | 24 | 23 | 45 | 40 | 38 | 37 | 26 | 233 |
11 | 1230275 | St Joseph's College, Belfast | 123 | 112 | 125 | 111 | 104 | 47 | 36 | 658 |
12 | 1230321 | Blessed Trinity College | 204 | 204 | 216 | 178 | 191 | 189 | 89 | 1271 |
13 | 1240291 | Colaiste Feirste | 102 | 119 | 102 | 112 | 97 | 71 | 48 | 651 |
14 | 1260269 | Hazelwood College | 170 | 171 | 153 | 147 | 144 | 94 | 75 | 954 |
15 | 1260294 | Malone Integrated College | 69 | 69 | 88 | 63 | 85 | 74 | 37 | 485 |
16 | 1410079 | Grosvenor Grammar School | 160 | 158 | 162 | 163 | 154 | 150 | 133 | 1080 |
17 | 1410270 | Wellington College | 112 | 119 | 114 | 117 | 122 | 85 | 73 | 742 |
18 | 1410315 | Bloomfield Collegiate | 101 | 110 | 101 | 110 | 107 | 93 | 78 | 700 |
19 | 1420020 | Campbell College | 134 | 143 | 147 | 148 | 147 | 96 | 107 | 922 |
20 | 1420021 | St Mary's Christian Brothers' Grammar, Belfast | 178 | 185 | 182 | 174 | 165 | 157 | 124 | 1165 |
21 | 1420022 | Methodist College | 241 | 247 | 256 | 255 | 253 | 266 | 258 | 1776 |
22 | 1420027 | The Royal Belfast Academical Institution | 152 | 160 | 160 | 164 | 146 | 114 | 116 | 1012 |
23 | 1420028 | Belfast Royal Academy | 207 | 211 | 209 | 216 | 209 | 221 | 188 | 1461 |
24 | 1420029 | St Dominic's High School, Belfast | 141 | 147 | 145 | 144 | 156 | 149 | 130 | 1012 |
25 | 1420030 | St Malachy's College, Belfast | 159 | 161 | 165 | 171 | 166 | 151 | 121 | 1094 |
26 | 1420082 | Dominican College, Belfast | 152 | 148 | 151 | 153 | 147 | 152 | 98 | 1001 |
27 | 1420089 | Strathearn School, Belfast | 111 | 116 | 121 | 110 | 111 | 103 | 106 | 778 |
28 | 1420095 | Rathmore Grammar School | 184 | 186 | 186 | 186 | 184 | 183 | 161 | 1270 |
29 | 1420264 | Victoria College | 132 | 138 | 141 | 136 | 127 | 120 | 103 | 897 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
178 | 5230218 | St Catherine's College | 173 | 173 | 159 | 157 | 189 | 125 | 128 | 1104 |
179 | 5230278 | Holy Trinity College | 159 | 163 | 171 | 169 | 160 | 112 | 87 | 1021 |
180 | 5230293 | St Patrick's College, Dungannon | 106 | 109 | 98 | 92 | 101 | 49 | 35 | 590 |
181 | 5230321 | St John The Baptist College | 77 | 50 | 37 | 5 | 1 | 0 | 0 | 170 |
182 | 5250216 | Brownlow Int College | 62 | 62 | 43 | 52 | 66 | 0 | 0 | 285 |
183 | 5260285 | New-Bridge Integrated College | 109 | 107 | 101 | 106 | 104 | 52 | 36 | 615 |
184 | 5260286 | Integrated College Dungannon | 119 | 99 | 97 | 94 | 93 | 41 | 49 | 592 |
185 | 5410013 | Banbridge Academy | 185 | 187 | 193 | 196 | 196 | 184 | 160 | 1301 |
186 | 5410057 | Lurgan College | 0 | 0 | 0 | 122 | 116 | 115 | 102 | 455 |
187 | 5410067 | Portadown College | 0 | 0 | 0 | 205 | 207 | 171 | 161 | 744 |
188 | 5420045 | St Louis Grammar School, Kilkeel | 82 | 83 | 84 | 81 | 84 | 81 | 69 | 564 |
189 | 5420059 | Abbey Christian Brothers Grammar School | 128 | 130 | 129 | 132 | 136 | 115 | 115 | 885 |
190 | 5420060 | Our Lady's Grammar School | 126 | 124 | 124 | 126 | 125 | 117 | 111 | 853 |
191 | 5420062 | St Colman's College, Newry | 127 | 126 | 131 | 130 | 125 | 118 | 128 | 885 |
192 | 5420073 | St Joseph's Convent Grammar School, Donaghmore | 99 | 102 | 105 | 92 | 102 | 106 | 80 | 686 |
193 | 5420076 | Sacred Heart Grammar School, Newry | 123 | 123 | 120 | 120 | 119 | 122 | 116 | 843 |
194 | 5420260 | The Royal School, Dungannon | 93 | 96 | 107 | 93 | 105 | 74 | 70 | 638 |
195 | 5420263 | The Royal School, Armagh | 103 | 103 | 99 | 108 | 106 | 103 | 113 | 735 |
196 | 5420268 | St Patrick's Grammar School, Armagh | 126 | 164 | 132 | 155 | 156 | 126 | 108 | 967 |
197 | 5420304 | St Patrick's Academy, Dungannon | 197 | 198 | 198 | 189 | 200 | 176 | 182 | 1340 |
198 | 5420314 | St Ronan’s College | 285 | 243 | 207 | 182 | 249 | 157 | 146 | 1469 |
199 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
200 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
201 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
202 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
203 | Source: NI school census | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
204 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
205 | Notes: | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
206 | 1. Year groups refer to the level of study of ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
207 | 2. Pupils aged 16+ that are not studying level... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
208 rows × 10 columns
df.set_index('de ref')
schoolname | year of enrolment year 8 | year 9 | year 10 | year 11 | year 12 | year 13 | year 14 | total pupils | |
---|---|---|---|---|---|---|---|---|---|
de ref | |||||||||
1210014 | Ashfield Girls' High School | 124 | 114 | 119 | 111 | 113 | 60 | 50 | 691 |
1210015 | Ashfield Boys' High School | 126 | 121 | 117 | 121 | 111 | 84 | 60 | 740 |
1210021 | Belfast Model School For Girls | 167 | 160 | 162 | 139 | 121 | 127 | 70 | 946 |
1210022 | Belfast Boys' Model School | 174 | 179 | 165 | 154 | 160 | 89 | 76 | 997 |
1230053 | St Louise's Comprehensive College | 236 | 194 | 189 | 206 | 264 | 160 | 186 | 1435 |
1230104 | Mercy College Belfast | 76 | 66 | 71 | 76 | 73 | 71 | 17 | 450 |
1230130 | St Rose’s Dominican College | 16 | 25 | 32 | 37 | 37 | 72 | 24 | 243 |
1230146 | Christian Brothers School, Belfast | 107 | 70 | 95 | 47 | 40 | 87 | 79 | 525 |
1230155 | St Genevieve's High School | 164 | 161 | 152 | 152 | 165 | 138 | 90 | 1022 |
1230182 | De La Salle College | 109 | 165 | 164 | 161 | 173 | 119 | 67 | 958 |
1230262 | Corpus Christi College | 24 | 23 | 45 | 40 | 38 | 37 | 26 | 233 |
1230275 | St Joseph's College, Belfast | 123 | 112 | 125 | 111 | 104 | 47 | 36 | 658 |
1230321 | Blessed Trinity College | 204 | 204 | 216 | 178 | 191 | 189 | 89 | 1271 |
1240291 | Colaiste Feirste | 102 | 119 | 102 | 112 | 97 | 71 | 48 | 651 |
1260269 | Hazelwood College | 170 | 171 | 153 | 147 | 144 | 94 | 75 | 954 |
1260294 | Malone Integrated College | 69 | 69 | 88 | 63 | 85 | 74 | 37 | 485 |
1410079 | Grosvenor Grammar School | 160 | 158 | 162 | 163 | 154 | 150 | 133 | 1080 |
1410270 | Wellington College | 112 | 119 | 114 | 117 | 122 | 85 | 73 | 742 |
1410315 | Bloomfield Collegiate | 101 | 110 | 101 | 110 | 107 | 93 | 78 | 700 |
1420020 | Campbell College | 134 | 143 | 147 | 148 | 147 | 96 | 107 | 922 |
1420021 | St Mary's Christian Brothers' Grammar, Belfast | 178 | 185 | 182 | 174 | 165 | 157 | 124 | 1165 |
1420022 | Methodist College | 241 | 247 | 256 | 255 | 253 | 266 | 258 | 1776 |
1420027 | The Royal Belfast Academical Institution | 152 | 160 | 160 | 164 | 146 | 114 | 116 | 1012 |
1420028 | Belfast Royal Academy | 207 | 211 | 209 | 216 | 209 | 221 | 188 | 1461 |
1420029 | St Dominic's High School, Belfast | 141 | 147 | 145 | 144 | 156 | 149 | 130 | 1012 |
1420030 | St Malachy's College, Belfast | 159 | 161 | 165 | 171 | 166 | 151 | 121 | 1094 |
1420082 | Dominican College, Belfast | 152 | 148 | 151 | 153 | 147 | 152 | 98 | 1001 |
1420089 | Strathearn School, Belfast | 111 | 116 | 121 | 110 | 111 | 103 | 106 | 778 |
1420095 | Rathmore Grammar School | 184 | 186 | 186 | 186 | 184 | 183 | 161 | 1270 |
1420264 | Victoria College | 132 | 138 | 141 | 136 | 127 | 120 | 103 | 897 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5230218 | St Catherine's College | 173 | 173 | 159 | 157 | 189 | 125 | 128 | 1104 |
5230278 | Holy Trinity College | 159 | 163 | 171 | 169 | 160 | 112 | 87 | 1021 |
5230293 | St Patrick's College, Dungannon | 106 | 109 | 98 | 92 | 101 | 49 | 35 | 590 |
5230321 | St John The Baptist College | 77 | 50 | 37 | 5 | 1 | 0 | 0 | 170 |
5250216 | Brownlow Int College | 62 | 62 | 43 | 52 | 66 | 0 | 0 | 285 |
5260285 | New-Bridge Integrated College | 109 | 107 | 101 | 106 | 104 | 52 | 36 | 615 |
5260286 | Integrated College Dungannon | 119 | 99 | 97 | 94 | 93 | 41 | 49 | 592 |
5410013 | Banbridge Academy | 185 | 187 | 193 | 196 | 196 | 184 | 160 | 1301 |
5410057 | Lurgan College | 0 | 0 | 0 | 122 | 116 | 115 | 102 | 455 |
5410067 | Portadown College | 0 | 0 | 0 | 205 | 207 | 171 | 161 | 744 |
5420045 | St Louis Grammar School, Kilkeel | 82 | 83 | 84 | 81 | 84 | 81 | 69 | 564 |
5420059 | Abbey Christian Brothers Grammar School | 128 | 130 | 129 | 132 | 136 | 115 | 115 | 885 |
5420060 | Our Lady's Grammar School | 126 | 124 | 124 | 126 | 125 | 117 | 111 | 853 |
5420062 | St Colman's College, Newry | 127 | 126 | 131 | 130 | 125 | 118 | 128 | 885 |
5420073 | St Joseph's Convent Grammar School, Donaghmore | 99 | 102 | 105 | 92 | 102 | 106 | 80 | 686 |
5420076 | Sacred Heart Grammar School, Newry | 123 | 123 | 120 | 120 | 119 | 122 | 116 | 843 |
5420260 | The Royal School, Dungannon | 93 | 96 | 107 | 93 | 105 | 74 | 70 | 638 |
5420263 | The Royal School, Armagh | 103 | 103 | 99 | 108 | 106 | 103 | 113 | 735 |
5420268 | St Patrick's Grammar School, Armagh | 126 | 164 | 132 | 155 | 156 | 126 | 108 | 967 |
5420304 | St Patrick's Academy, Dungannon | 197 | 198 | 198 | 189 | 200 | 176 | 182 | 1340 |
5420314 | St Ronan’s College | 285 | 243 | 207 | 182 | 249 | 157 | 146 | 1469 |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Source: NI school census | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Notes: | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1. Year groups refer to the level of study of pupils i.e. Pupils in their final year of GCSEs (including those repeating are in year 12) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2. Pupils aged 16+ that are not studying level 2 or 3 qualifications have been recorded as year 12 pupils | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
208 rows × 9 columns
def parse_enrolments_table(xls):
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
strip_n_lower = lambda s: s.strip().lower()
if 'enrolments' in xls.sheet_names:
df = pd.read_excel(xls, 'enrolments', header=None, skip_footer=5)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[3].fillna('').apply(strip_n_lower).values
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df.dropna(how='all', inplace=True, axis=0)
except TypeError as e:
print(e)
else:
df=None
return df
parse_enrolments_table(xls).tail()
de ref | schoolname | year 8 | year 9 | year 10 | year 11 | year 12 | year 13 | year 14 | total pupils | |
---|---|---|---|---|---|---|---|---|---|---|
194 | 5420260 | The Royal School, Dungannon | 93 | 96 | 107 | 93 | 105 | 74 | 70 | 638 |
195 | 5420263 | The Royal School, Armagh | 103 | 103 | 99 | 108 | 106 | 103 | 113 | 735 |
196 | 5420268 | St Patrick's Grammar School, Armagh | 126 | 164 | 132 | 155 | 156 | 126 | 108 | 967 |
197 | 5420304 | St Patrick's Academy, Dungannon | 197 | 198 | 198 | 189 | 200 | 176 | 182 | 1340 |
198 | 5420314 | St Ronan’s College | 285 | 243 | 207 | 182 | 249 | 157 | 146 | 1469 |
df = parse_enrolments_table(xls).set_index('de ref').drop('schoolname', axis=1)
df
year 8 | year 9 | year 10 | year 11 | year 12 | year 13 | year 14 | total pupils | |
---|---|---|---|---|---|---|---|---|
de ref | ||||||||
1210014 | 124 | 114 | 119 | 111 | 113 | 60 | 50 | 691 |
1210015 | 126 | 121 | 117 | 121 | 111 | 84 | 60 | 740 |
1210021 | 167 | 160 | 162 | 139 | 121 | 127 | 70 | 946 |
1210022 | 174 | 179 | 165 | 154 | 160 | 89 | 76 | 997 |
1230053 | 236 | 194 | 189 | 206 | 264 | 160 | 186 | 1435 |
1230104 | 76 | 66 | 71 | 76 | 73 | 71 | 17 | 450 |
1230130 | 16 | 25 | 32 | 37 | 37 | 72 | 24 | 243 |
1230146 | 107 | 70 | 95 | 47 | 40 | 87 | 79 | 525 |
1230155 | 164 | 161 | 152 | 152 | 165 | 138 | 90 | 1022 |
1230182 | 109 | 165 | 164 | 161 | 173 | 119 | 67 | 958 |
1230262 | 24 | 23 | 45 | 40 | 38 | 37 | 26 | 233 |
1230275 | 123 | 112 | 125 | 111 | 104 | 47 | 36 | 658 |
1230321 | 204 | 204 | 216 | 178 | 191 | 189 | 89 | 1271 |
1240291 | 102 | 119 | 102 | 112 | 97 | 71 | 48 | 651 |
1260269 | 170 | 171 | 153 | 147 | 144 | 94 | 75 | 954 |
1260294 | 69 | 69 | 88 | 63 | 85 | 74 | 37 | 485 |
1410079 | 160 | 158 | 162 | 163 | 154 | 150 | 133 | 1080 |
1410270 | 112 | 119 | 114 | 117 | 122 | 85 | 73 | 742 |
1410315 | 101 | 110 | 101 | 110 | 107 | 93 | 78 | 700 |
1420020 | 134 | 143 | 147 | 148 | 147 | 96 | 107 | 922 |
1420021 | 178 | 185 | 182 | 174 | 165 | 157 | 124 | 1165 |
1420022 | 241 | 247 | 256 | 255 | 253 | 266 | 258 | 1776 |
1420027 | 152 | 160 | 160 | 164 | 146 | 114 | 116 | 1012 |
1420028 | 207 | 211 | 209 | 216 | 209 | 221 | 188 | 1461 |
1420029 | 141 | 147 | 145 | 144 | 156 | 149 | 130 | 1012 |
1420030 | 159 | 161 | 165 | 171 | 166 | 151 | 121 | 1094 |
1420082 | 152 | 148 | 151 | 153 | 147 | 152 | 98 | 1001 |
1420089 | 111 | 116 | 121 | 110 | 111 | 103 | 106 | 778 |
1420095 | 184 | 186 | 186 | 186 | 184 | 183 | 161 | 1270 |
1420264 | 132 | 138 | 141 | 136 | 127 | 120 | 103 | 897 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
5230076 | 105 | 74 | 77 | 56 | 48 | 9 | 18 | 387 |
5230108 | 98 | 102 | 99 | 93 | 87 | 45 | 52 | 576 |
5230135 | 143 | 123 | 139 | 138 | 142 | 61 | 69 | 815 |
5230152 | 128 | 131 | 119 | 114 | 134 | 73 | 96 | 795 |
5230157 | 244 | 238 | 286 | 254 | 248 | 153 | 137 | 1560 |
5230167 | 114 | 117 | 105 | 93 | 108 | 40 | 38 | 615 |
5230187 | 161 | 161 | 150 | 170 | 172 | 118 | 100 | 1032 |
5230192 | 82 | 90 | 79 | 59 | 52 | 0 | 0 | 362 |
5230213 | 182 | 193 | 187 | 221 | 196 | 96 | 67 | 1142 |
5230218 | 173 | 173 | 159 | 157 | 189 | 125 | 128 | 1104 |
5230278 | 159 | 163 | 171 | 169 | 160 | 112 | 87 | 1021 |
5230293 | 106 | 109 | 98 | 92 | 101 | 49 | 35 | 590 |
5230321 | 77 | 50 | 37 | 5 | 1 | 0 | 0 | 170 |
5250216 | 62 | 62 | 43 | 52 | 66 | 0 | 0 | 285 |
5260285 | 109 | 107 | 101 | 106 | 104 | 52 | 36 | 615 |
5260286 | 119 | 99 | 97 | 94 | 93 | 41 | 49 | 592 |
5410013 | 185 | 187 | 193 | 196 | 196 | 184 | 160 | 1301 |
5410057 | 0 | 0 | 0 | 122 | 116 | 115 | 102 | 455 |
5410067 | 0 | 0 | 0 | 205 | 207 | 171 | 161 | 744 |
5420045 | 82 | 83 | 84 | 81 | 84 | 81 | 69 | 564 |
5420059 | 128 | 130 | 129 | 132 | 136 | 115 | 115 | 885 |
5420060 | 126 | 124 | 124 | 126 | 125 | 117 | 111 | 853 |
5420062 | 127 | 126 | 131 | 130 | 125 | 118 | 128 | 885 |
5420073 | 99 | 102 | 105 | 92 | 102 | 106 | 80 | 686 |
5420076 | 123 | 123 | 120 | 120 | 119 | 122 | 116 | 843 |
5420260 | 93 | 96 | 107 | 93 | 105 | 74 | 70 | 638 |
5420263 | 103 | 103 | 99 | 108 | 106 | 103 | 113 | 735 |
5420268 | 126 | 164 | 132 | 155 | 156 | 126 | 108 | 967 |
5420304 | 197 | 198 | 198 | 189 | 200 | 176 | 182 | 1340 |
5420314 | 285 | 243 | 207 | 182 | 249 | 157 | 146 | 1469 |
199 rows × 8 columns
df[[c for c in df if c.startswith('year')]]
year 8 | year 9 | year 10 | year 11 | year 12 | year 13 | year 14 | |
---|---|---|---|---|---|---|---|
de ref | |||||||
1210014 | 124 | 114 | 119 | 111 | 113 | 60 | 50 |
1210015 | 126 | 121 | 117 | 121 | 111 | 84 | 60 |
1210021 | 167 | 160 | 162 | 139 | 121 | 127 | 70 |
1210022 | 174 | 179 | 165 | 154 | 160 | 89 | 76 |
1230053 | 236 | 194 | 189 | 206 | 264 | 160 | 186 |
1230104 | 76 | 66 | 71 | 76 | 73 | 71 | 17 |
1230130 | 16 | 25 | 32 | 37 | 37 | 72 | 24 |
1230146 | 107 | 70 | 95 | 47 | 40 | 87 | 79 |
1230155 | 164 | 161 | 152 | 152 | 165 | 138 | 90 |
1230182 | 109 | 165 | 164 | 161 | 173 | 119 | 67 |
1230262 | 24 | 23 | 45 | 40 | 38 | 37 | 26 |
1230275 | 123 | 112 | 125 | 111 | 104 | 47 | 36 |
1230321 | 204 | 204 | 216 | 178 | 191 | 189 | 89 |
1240291 | 102 | 119 | 102 | 112 | 97 | 71 | 48 |
1260269 | 170 | 171 | 153 | 147 | 144 | 94 | 75 |
1260294 | 69 | 69 | 88 | 63 | 85 | 74 | 37 |
1410079 | 160 | 158 | 162 | 163 | 154 | 150 | 133 |
1410270 | 112 | 119 | 114 | 117 | 122 | 85 | 73 |
1410315 | 101 | 110 | 101 | 110 | 107 | 93 | 78 |
1420020 | 134 | 143 | 147 | 148 | 147 | 96 | 107 |
1420021 | 178 | 185 | 182 | 174 | 165 | 157 | 124 |
1420022 | 241 | 247 | 256 | 255 | 253 | 266 | 258 |
1420027 | 152 | 160 | 160 | 164 | 146 | 114 | 116 |
1420028 | 207 | 211 | 209 | 216 | 209 | 221 | 188 |
1420029 | 141 | 147 | 145 | 144 | 156 | 149 | 130 |
1420030 | 159 | 161 | 165 | 171 | 166 | 151 | 121 |
1420082 | 152 | 148 | 151 | 153 | 147 | 152 | 98 |
1420089 | 111 | 116 | 121 | 110 | 111 | 103 | 106 |
1420095 | 184 | 186 | 186 | 186 | 184 | 183 | 161 |
1420264 | 132 | 138 | 141 | 136 | 127 | 120 | 103 |
... | ... | ... | ... | ... | ... | ... | ... |
5230076 | 105 | 74 | 77 | 56 | 48 | 9 | 18 |
5230108 | 98 | 102 | 99 | 93 | 87 | 45 | 52 |
5230135 | 143 | 123 | 139 | 138 | 142 | 61 | 69 |
5230152 | 128 | 131 | 119 | 114 | 134 | 73 | 96 |
5230157 | 244 | 238 | 286 | 254 | 248 | 153 | 137 |
5230167 | 114 | 117 | 105 | 93 | 108 | 40 | 38 |
5230187 | 161 | 161 | 150 | 170 | 172 | 118 | 100 |
5230192 | 82 | 90 | 79 | 59 | 52 | 0 | 0 |
5230213 | 182 | 193 | 187 | 221 | 196 | 96 | 67 |
5230218 | 173 | 173 | 159 | 157 | 189 | 125 | 128 |
5230278 | 159 | 163 | 171 | 169 | 160 | 112 | 87 |
5230293 | 106 | 109 | 98 | 92 | 101 | 49 | 35 |
5230321 | 77 | 50 | 37 | 5 | 1 | 0 | 0 |
5250216 | 62 | 62 | 43 | 52 | 66 | 0 | 0 |
5260285 | 109 | 107 | 101 | 106 | 104 | 52 | 36 |
5260286 | 119 | 99 | 97 | 94 | 93 | 41 | 49 |
5410013 | 185 | 187 | 193 | 196 | 196 | 184 | 160 |
5410057 | 0 | 0 | 0 | 122 | 116 | 115 | 102 |
5410067 | 0 | 0 | 0 | 205 | 207 | 171 | 161 |
5420045 | 82 | 83 | 84 | 81 | 84 | 81 | 69 |
5420059 | 128 | 130 | 129 | 132 | 136 | 115 | 115 |
5420060 | 126 | 124 | 124 | 126 | 125 | 117 | 111 |
5420062 | 127 | 126 | 131 | 130 | 125 | 118 | 128 |
5420073 | 99 | 102 | 105 | 92 | 102 | 106 | 80 |
5420076 | 123 | 123 | 120 | 120 | 119 | 122 | 116 |
5420260 | 93 | 96 | 107 | 93 | 105 | 74 | 70 |
5420263 | 103 | 103 | 99 | 108 | 106 | 103 | 113 |
5420268 | 126 | 164 | 132 | 155 | 156 | 126 | 108 |
5420304 | 197 | 198 | 198 | 189 | 200 | 176 | 182 |
5420314 | 285 | 243 | 207 | 182 | 249 | 157 | 146 |
199 rows × 7 columns
df[[c for c in df if c.startswith('year')]].sum().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e3b6208>
my_totals = df[[c for c in df if c.startswith('year')]].sum(axis=1)
their_totals = df['total pupils']
any(my_totals!=their_totals)
False
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_enrolments_table(xls)
if df is not None:
print(f'Got enrolement data for {year}')
dfs[year]=df
Got enrolement data for 2009 Got enrolement data for 2010 Got enrolement data for 2011 Got enrolement data for 2012 Got enrolement data for 2013 Got enrolement data for 2014 Got enrolement data for 2015 Got enrolement data for 2016 Got enrolement data for 2017
all_de_refs = set([_ for d in dfs.values() for _ in d['de ref'].values])
all_de_refs
{1210014, 1210015, 1210021, 1210022, 1210266, 1230026, 1230053, 1230089, 1230104, 1230130, 1230146, '1230155', 1230155, '1230155 ', 1230173, '1230173', '1230173 ', 1230182, '1230182', '1230182 ', 1230262, '1230275', 1230275, '1230275 ', 1230321, 1240291, 1260269, 1260294, 1410079, 1410270, 1410315, 1420018, 1420020, 1420021, 1420022, 1420027, 1420028, 1420029, 1420030, 1420082, 1420089, 1420095, 1420264, 1420265, 1420277, 2210065, 2210080, 2210125, 2210164, 2210302, 2210305, 2210306, 2210312, 2230077, 2230081, 2230085, 2230099, 2230100, 2230109, 2230111, 2230122, 2230131, 2230138, 2230144, 2230148, 2230166, 2230180, 2230181, 2230188, 2230190, 2230225, 2230254, 2230298, 2230301, 2230322, 2240319, 2260276, 2260280, 2260283, 2410040, 2410048, 2410066, 2410096, 2410311, 2420041, 2420042, 2420043, 2420052, 2420054, 2420064, 2420065, 2420229, 2420287, 2420320, 3210013, 3210035, 3210038, 3210091, 3210124, 3210133, 3210134, 3210172, 3210200, 3210202, 3210208, 3210222, 3210232, 3210233, 3210279, 3210300, 3210313, 3230019, 3230066, 3230075, 3230084, 3230110, 3230132, 3230142, 3230151, 3230168, 3230203, 3230227, 3230234, 3230308, 3230310, 3250149, 3250207, 3260289, 3260290, 3260299, 3260303, 3410008, 3410033, 3410098, 3410209, 3410297, 3420010, 3420011, 3420012, 3420032, 3420034, 3420046, 3420058, 3420068, 3420077, 3420080, 3420094, 3420317, 4210012, 4210029, 4210045, 4210046, 4210051, 4210063, 4210086, 4210183, 4210194, 4210201, 4210262, 4210296, 4210316, 4230023, 4230067, 4230102, 4230107, 4230161, 4230165, 4230211, 4230223, 4230224, 4250024, 4250072, 4260255, 4260281, 4260295, 4260309, 4410063, 4410085, 4410097, 4420015, 4420044, 4420050, 4420051, 4420086, 4420088, 4420259, 5210016, 5210025, 5210043, 5210047, 5210054, 5210064, 5210083, 5210097, 5210121, 5210127, 5210143, 5210153, 5210186, 5210230, 5210231, 5210271, 5210282, 5230056, 5230059, 5230070, 5230076, 5230088, 5230108, 5230135, 5230152, 5230157, 5230160, 5230167, 5230187, 5230192, 5230213, 5230218, 5230256, 5230278, 5230293, 5230321, 5250216, 5260285, 5260286, 5410013, 5410057, 5410067, 5420045, 5420056, 5420059, 5420060, 5420062, 5420073, 5420076, 5420260, 5420263, 5420268, 5420304, 5420314}
all_de_refs = set([int(_) for d in dfs.values() for _ in d['de ref'].values])
all_de_refs
{1210014, 1210015, 1210021, 1210022, 1210266, 1230026, 1230053, 1230089, 1230104, 1230130, 1230146, 1230155, 1230173, 1230182, 1230262, 1230275, 1230321, 1240291, 1260269, 1260294, 1410079, 1410270, 1410315, 1420018, 1420020, 1420021, 1420022, 1420027, 1420028, 1420029, 1420030, 1420082, 1420089, 1420095, 1420264, 1420265, 1420277, 2210065, 2210080, 2210125, 2210164, 2210302, 2210305, 2210306, 2210312, 2230077, 2230081, 2230085, 2230099, 2230100, 2230109, 2230111, 2230122, 2230131, 2230138, 2230144, 2230148, 2230166, 2230180, 2230181, 2230188, 2230190, 2230225, 2230254, 2230298, 2230301, 2230322, 2240319, 2260276, 2260280, 2260283, 2410040, 2410048, 2410066, 2410096, 2410311, 2420041, 2420042, 2420043, 2420052, 2420054, 2420064, 2420065, 2420229, 2420287, 2420320, 3210013, 3210035, 3210038, 3210091, 3210124, 3210133, 3210134, 3210172, 3210200, 3210202, 3210208, 3210222, 3210232, 3210233, 3210279, 3210300, 3210313, 3230019, 3230066, 3230075, 3230084, 3230110, 3230132, 3230142, 3230151, 3230168, 3230203, 3230227, 3230234, 3230308, 3230310, 3250149, 3250207, 3260289, 3260290, 3260299, 3260303, 3410008, 3410033, 3410098, 3410209, 3410297, 3420010, 3420011, 3420012, 3420032, 3420034, 3420046, 3420058, 3420068, 3420077, 3420080, 3420094, 3420317, 4210012, 4210029, 4210045, 4210046, 4210051, 4210063, 4210086, 4210183, 4210194, 4210201, 4210262, 4210296, 4210316, 4230023, 4230067, 4230102, 4230107, 4230161, 4230165, 4230211, 4230223, 4230224, 4250024, 4250072, 4260255, 4260281, 4260295, 4260309, 4410063, 4410085, 4410097, 4420015, 4420044, 4420050, 4420051, 4420086, 4420088, 4420259, 5210016, 5210025, 5210043, 5210047, 5210054, 5210064, 5210083, 5210097, 5210121, 5210127, 5210143, 5210153, 5210186, 5210230, 5210231, 5210271, 5210282, 5230056, 5230059, 5230070, 5230076, 5230088, 5230108, 5230135, 5230152, 5230157, 5230160, 5230167, 5230187, 5230192, 5230213, 5230218, 5230256, 5230278, 5230293, 5230321, 5250216, 5260285, 5260286, 5410013, 5410057, 5410067, 5420045, 5420056, 5420059, 5420060, 5420062, 5420073, 5420076, 5420260, 5420263, 5420268, 5420304, 5420314}
df = pd.DataFrame.from_dict({
year: [dr in df['de ref'].values for dr in all_de_refs]
for year, df in dfs.items()
}, orient='index')
df.columns=all_de_refs
sns.heatmap(df.T)
<matplotlib.axes._subplots.AxesSubplot at 0x1131fb278>
for year, df in dfs.items():
print(f"{year}:{df.keys().values}")
2009:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'year 15' 'total pupils'] 2010:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'year 15' 'total pupils'] 2011:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'year 15' 'total pupils'] 2012:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'year 15' 'total pupils'] 2013:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'year 15' 'total pupils'] 2014:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'year 15' 'total pupils'] 2015:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'year 15' 'total pupils'] 2016:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'total pupils'] 2017:['de ref' 'schoolname' 'year 8' 'year 9' 'year 10' 'year 11' 'year 12' 'year 13' 'year 14' 'total pupils']
def parse_enrolments_table(xls):
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
strip_n_lower = lambda s: s.strip().lower()
unyearify = lambda s: int(s.replace('year ',''))
if 'enrolments' in xls.sheet_names:
df = pd.read_excel(xls, 'enrolments', header=None, skip_footer=5)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[3].fillna('').apply(strip_n_lower).values
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df.dropna(how='all', inplace=True, axis=0)
df['de ref'] = df['de ref'].astype(int)
df.drop('schoolname',axis=1, inplace=True)
df.drop('total pupils',axis=1, inplace=True)
df.set_index('de ref', inplace=True)
df.rename(columns=unyearify, inplace=True)
df=df.astype(float)
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_enrolments_table(xls)
if df is not None:
print(f'Got enrolement data for {year}')
dfs[year]=df
Got enrolement data for 2009 Got enrolement data for 2010 Got enrolement data for 2011 Got enrolement data for 2012 Got enrolement data for 2013 Got enrolement data for 2014 Got enrolement data for 2015 Got enrolement data for 2016 Got enrolement data for 2017
panel = pd.Panel(dfs)
panel
<class 'pandas.core.panel.Panel'> Dimensions: 9 (items) x 232 (major_axis) x 8 (minor_axis) Items axis: 2009 to 2017 Major_axis axis: 1210014 to 5420314 Minor_axis axis: 8 to 15
panel.sum(axis=1)
2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | |
---|---|---|---|---|---|---|---|---|---|
8 | 23116.0 | 23113.0 | 21770.0 | 21322.0 | 21418.0 | 21367.0 | 21792.0 | 22571.0 | 22817.0 |
9 | 23859.0 | 23403.0 | 22925.0 | 21928.0 | 21425.0 | 21481.0 | 21422.0 | 22284.0 | 22650.0 |
10 | 22954.0 | 24155.0 | 23245.0 | 23101.0 | 21971.0 | 21527.0 | 21443.0 | 21864.0 | 22381.0 |
11 | 23521.0 | 23277.0 | 24094.0 | 23524.0 | 23299.0 | 22092.0 | 21601.0 | 22118.0 | 22102.0 |
12 | 23808.0 | 23656.0 | 22967.0 | 24027.0 | 23353.0 | 23166.0 | 21864.0 | 22091.0 | 22099.0 |
13 | 14912.0 | 15652.0 | 15421.0 | 15892.0 | 16567.0 | 16208.0 | 15912.0 | 15667.0 | 15347.0 |
14 | 12607.0 | 13097.0 | 13274.0 | 13482.0 | 13554.0 | 14174.0 | 13922.0 | 13818.0 | 13149.0 |
15 | 100.0 | 154.0 | 154.0 | 196.0 | 201.0 | 275.0 | 365.0 | 0.0 | 0.0 |
sns.heatmap(panel.sum(axis=2))
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e3eb2b0>
panel.sum().sum().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e970cf8>
def parse_enrolments_table(xls):
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
strip_n_lower = lambda s: s.strip().lower()
unyearify = lambda s: int(s.replace('year ',''))
if 'enrolments' in xls.sheet_names:
df = pd.read_excel(xls, 'enrolments', header=None, skip_footer=5)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[3].fillna('').apply(strip_n_lower).values
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df.dropna(how='all', inplace=True, axis=0)
df['de ref'] = df['de ref'].astype(int)
df.drop('schoolname',axis=1, inplace=True)
df.drop('total pupils',axis=1, inplace=True)
df.set_index('de ref', inplace=True)
df.rename(columns=unyearify, inplace=True)
df=df.astype(float)
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_enrolments_table(xls)
if df is not None:
print(f'Got enrolement data for {year}')
dfs[year]=df
enrolements = pd.Panel(dfs)
Got enrolement data for 2009 Got enrolement data for 2010 Got enrolement data for 2011 Got enrolement data for 2012 Got enrolement data for 2013 Got enrolement data for 2014 Got enrolement data for 2015 Got enrolement data for 2016 Got enrolement data for 2017
def parse_reference_table(xls):
cols = [
'de ref',
'school name',
'school type',
'address 1',
'postcode',
'urban_rural',
'school management type',
'district council',
'parliamentary constituency',
'town'
]
categories = [
'school type',
'urban_rural',
'school management type',
'district council',
'parliamentary constituency',
'town'
]
ref_key_map={
'denino':'de ref',
'urban/ rural': 'urban_rural',
'schoolname': 'school name'
}
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
if 'reference data' in xls.sheet_names:
df = pd.read_excel(xls, 'reference data', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[1:h_range].fillna('').apply(join_n_strip_n_lower, axis=0).values
df.rename(columns=ref_key_map, inplace=True)
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df = df[cols]
df['de ref'] = df['de ref'].astype(int)
df.set_index('de ref', inplace=True)
for c in df:
df[c]=df[c].str.lower().str.strip()
for c in categories:
df[c] = df[c].astype('category')
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_reference_table(xls)
if df is not None:
print(f'Got reference data for {year}')
dfs[year]=df
reference = pd.Panel(dfs)
Got reference data for 2009 Got reference data for 2010 Got reference data for 2011 Got reference data for 2012 Got reference data for 2013 Got reference data for 2014 Got reference data for 2015 Got reference data for 2016 Got reference data for 2017
reference.to_frame()
2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | ||
---|---|---|---|---|---|---|---|---|---|---|
de ref | minor | |||||||||
1210014 | school name | ashfield girls' high school | ashfield girls' high school | ashfield girls' high school | ashfield girls' high school | ashfield girls' high school | ashfield girls' high school | ashfield girls' high school | ashfield girls' high school | ashfield girls' high school |
school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar | |
address 1 | holywood road | holywood road | holywood road | holywood road | holywood road | holywood road | 397 holywood road | 397 holywood road | 397 holywood road | |
postcode | bt4 2ly | bt42ly | bt42ly | bt42ly | bt42ly | bt42ly | bt4 2ly | bt42ly | bt42ly | |
urban_rural | urban | urban | urban | urban | urban | urban | urban | urban | urban | |
school management type | controlled | controlled | controlled | controlled | controlled | controlled | controlled | controlled | controlled | |
district council | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | |
parliamentary constituency | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | |
town | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | |
1210015 | school name | ashfield boys' high school | ashfield boys' high school | ashfield boys' high school | ashfield boys' high school | ashfield boys' high school | ashfield boys' high school | ashfield boys' high school | ashfield boys' high school | ashfield boys' high school |
school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar | |
address 1 | holywood road | holywood road | holywood road | holywood road | holywood road | holywood road | 395 holywood road | 395 holywood road | 395 holywood road | |
postcode | bt4 2ly | bt42ly | bt42ly | bt42ly | bt42ly | bt42ly | bt4 2ly | bt42ly | bt42ly | |
urban_rural | urban | urban | urban | urban | urban | urban | urban | urban | urban | |
school management type | controlled | controlled | controlled | controlled | controlled | controlled | controlled | controlled | controlled | |
district council | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | |
parliamentary constituency | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | belfast east | |
town | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | |
1210021 | school name | belfast model school for girls | belfast model school for girls | belfast model school for girls | belfast model school for girls | belfast model school for girls | belfast model school for girls | belfast model school for girls | belfast model school for girls | belfast model school for girls |
school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar | |
address 1 | dunkeld gardens | dunkeld gardens | dunkeld gardens | dunkeld gardens | dunkeld gardens | dunkeld gardens | 35 dunowen gardens | 35 dunowen gardens | 35 dunowen gardens | |
postcode | bt14 6nt | bt146nt | bt146nt | bt146nt | bt146nt | bt146nt | bt14 6nq | bt146nq | bt146nq | |
urban_rural | urban | urban | urban | urban | urban | urban | urban | urban | urban | |
school management type | controlled | controlled | controlled | controlled | controlled | controlled | controlled | controlled | controlled | |
district council | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | |
parliamentary constituency | belfast north | belfast north | belfast north | belfast north | belfast north | belfast north | belfast north | belfast north | belfast north | |
town | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | belfast | |
1210022 | school name | belfast boys' model school | belfast boys' model school | belfast boys' model school | belfast boys' model school | belfast boys' model school | belfast boys' model school | belfast boys' model school | belfast boys' model school | belfast boys' model school |
school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar | |
address 1 | ballysillan road | ballysillan road | ballysillan road | ballysillan road | ballysillan road | ballysillan road | ballysillan road | ballysillan road | ballysillan road | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5420260 | district council | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | mid ulster | mid ulster | mid ulster |
parliamentary constituency | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | |
town | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | |
5420263 | school name | the royal school armagh | the royal school armagh | the royal school armagh | the royal school armagh | the royal school armagh | the royal school, armagh | the royal school, armagh | the royal school, armagh | the royal school, armagh |
school type | grammar | grammar | grammar | grammar | grammar | grammar | grammar | grammar | grammar | |
address 1 | college hill | college hill | college hill | college hill | college hill | college hill | college hill | college hill | college hill | |
postcode | bt61 9dh | bt619dh | bt619dh | bt619dh | bt619dh | bt619dh | bt61 9dh | bt619dh | bt619dh | |
urban_rural | urban | urban | urban | urban | urban | urban | urban | urban | urban | |
school management type | voluntary | voluntary | voluntary - other managed | voluntary - other managed | voluntary - other managed | voluntary | voluntary | voluntary | voluntary | |
district council | armagh | armagh | armagh | armagh | armagh | armagh | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | |
parliamentary constituency | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | |
town | armagh | armagh | armagh | armagh | armagh | armagh | armagh | armagh | armagh | |
5420268 | school name | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school, armagh | st patrick's grammar school, armagh | st patrick's grammar school, armagh | st patrick's grammar school, armagh |
school type | grammar | grammar | grammar | grammar | grammar | grammar | grammar | grammar | grammar | |
address 1 | cathedral road | cathedral road | cathedral road | cathedral road | cathedral road | cathedral road | cathedral road | cathedral road | cathedral road | |
postcode | bt61 7qz | bt617qz | bt617qz | bt617qz | bt617qz | bt617qz | bt61 7qz | bt617qz | bt617qz | |
urban_rural | urban | urban | urban | urban | urban | urban | urban | urban | urban | |
school management type | voluntary | voluntary | voluntary - rc managed | voluntary - rc managed | voluntary - rc managed | voluntary | voluntary | voluntary | voluntary | |
district council | armagh | armagh | armagh | armagh | armagh | armagh | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | |
parliamentary constituency | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | newry and armagh | |
town | armagh | armagh | armagh | armagh | armagh | armagh | armagh | armagh | armagh | |
5420304 | school name | st patrick's academy | st patrick's academy | st patrick's academy | st patrick's academy | st patrick's academy | st patrick's academy, dungannon | st patrick's academy, dungannon | st patrick's academy, dungannon | st patrick's academy, dungannon |
school type | grammar | grammar | grammar | grammar | grammar | grammar | grammar | grammar | grammar | |
address 1 | 37 killymeal road | 37 killymeal road | 37 killymeal road | 37 killymeal road | 37 killymeal road | 37 killymeal road | 37 killymeal road | 37 killymeal road | 37 killymeal road | |
postcode | bt71 6ds | bt716ds | bt716ds | bt716ds | bt716ds | bt716ds | bt71 6ds | bt716ds | bt716ds | |
urban_rural | urban | urban | urban | urban | urban | urban | urban | urban | urban | |
school management type | voluntary | voluntary | voluntary - rc managed | voluntary - rc managed | voluntary - rc managed | voluntary | voluntary | voluntary | voluntary | |
district council | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | mid ulster | mid ulster | mid ulster | |
parliamentary constituency | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | fermanagh and south tyrone | |
town | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon |
1673 rows × 9 columns
reference.to_frame().apply(pd.Series.nunique, axis=1)!=1
de ref minor 1210014 school name False school type True address 1 True postcode True urban_rural False school management type False district council False parliamentary constituency False town False 1210015 school name False school type True address 1 True postcode True urban_rural False school management type False district council False parliamentary constituency False town False 1210021 school name False school type True address 1 True postcode True urban_rural False school management type False district council False parliamentary constituency False town False 1210022 school name False school type True address 1 False ... 5420260 district council True parliamentary constituency False town False 5420263 school name True school type False address 1 False postcode True urban_rural False school management type True district council True parliamentary constituency False town False 5420268 school name True school type False address 1 False postcode True urban_rural False school management type True district council True parliamentary constituency False town False 5420304 school name True school type False address 1 False postcode True urban_rural False school management type True district council True parliamentary constituency False town False Length: 1673, dtype: bool
reference.to_frame().apply(pd.Series.nunique, axis=1).unstack('minor').sum()
minor school name 252.0 school type 311.0 address 1 241.0 postcode 408.0 urban_rural 195.0 school management type 364.0 district council 365.0 parliamentary constituency 198.0 town 187.0 dtype: float64
reference.to_frame()[reference.to_frame().apply(pd.Series.nunique, axis=1)!=1]
2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | ||
---|---|---|---|---|---|---|---|---|---|---|
de ref | minor | |||||||||
1210014 | school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar |
address 1 | holywood road | holywood road | holywood road | holywood road | holywood road | holywood road | 397 holywood road | 397 holywood road | 397 holywood road | |
postcode | bt4 2ly | bt42ly | bt42ly | bt42ly | bt42ly | bt42ly | bt4 2ly | bt42ly | bt42ly | |
1210015 | school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar |
address 1 | holywood road | holywood road | holywood road | holywood road | holywood road | holywood road | 395 holywood road | 395 holywood road | 395 holywood road | |
postcode | bt4 2ly | bt42ly | bt42ly | bt42ly | bt42ly | bt42ly | bt4 2ly | bt42ly | bt42ly | |
1210021 | school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar |
address 1 | dunkeld gardens | dunkeld gardens | dunkeld gardens | dunkeld gardens | dunkeld gardens | dunkeld gardens | 35 dunowen gardens | 35 dunowen gardens | 35 dunowen gardens | |
postcode | bt14 6nt | bt146nt | bt146nt | bt146nt | bt146nt | bt146nt | bt14 6nq | bt146nq | bt146nq | |
1210022 | school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar |
postcode | bt14 6rb | bt146rb | bt146rb | bt146rb | bt146rb | bt146rb | bt14 6rb | bt146rb | bt146rb | |
1230053 | school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar |
postcode | bt12 6en | bt126en | bt126en | bt126en | bt126en | bt126en | bt12 6en | bt126en | bt126en | |
school management type | rc maintained | roman catholic maintained | rc maintained | rc maintained | rc maintained | catholic maintained | catholic maintained | catholic maintained | catholic maintained | |
1230104 | school name | our lady of mercy girls' school | our lady of mercy girls' school | our lady of mercy girls' school | our lady of mercy girls' school | our lady of mercy girls' school | mercy college | mercy college | mercy college | mercy college belfast |
school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar | |
postcode | bt14 7qr | bt147qr | bt147qr | bt147qr | bt147qr | bt147qr | bt14 7qr | bt147qr | bt147qr | |
school management type | rc maintained | roman catholic maintained | rc maintained | rc maintained | rc maintained | catholic maintained | catholic maintained | catholic maintained | catholic maintained | |
1230130 | school name | st rose's high school | st rose's high school | st rose's high school | st rose's high school | st rose's high school | st rose’s dominican college | st rose’s dominican college | st rose’s dominican college | st rose’s dominican college |
school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar | |
address 1 | beechmount avenue | beechmount avenue | beechmount avenue | beechmount avenue | beechmount avenue | beechmount avenue | 65 beechmount avenue | 65 beechmount avenue | 65 beechmount avenue | |
postcode | bt12 7na | bt127na | bt127na | bt127na | bt127na | bt127na | bt12 7na | bt127na | bt127na | |
school management type | rc maintained | roman catholic maintained | rc maintained | rc maintained | rc maintained | catholic maintained | catholic maintained | catholic maintained | catholic maintained | |
1230146 | school name | christian brothers school | christian brothers school | christian brothers school | christian brothers school | christian brothers school | christian brothers school, belfast | christian brothers school, belfast | christian brothers school, belfast | christian brothers school, belfast |
school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar | |
postcode | bt11 8bw | bt118bw | bt118bw | bt118bw | bt118bw | bt118bw | bt11 8bw | bt118bw | bt118bw | |
school management type | rc maintained | roman catholic maintained | rc maintained | rc maintained | rc maintained | catholic maintained | catholic maintained | catholic maintained | catholic maintained | |
1230155 | school type | secondary | secondary | secondary | secondary | secondary | secondary | secondary | secondary | non-grammar |
postcode | bt11 9jp | bt119jp | bt119jp | bt119jp | bt119jp | bt119jp | bt11 9jp | bt119jp | bt119jp | |
school management type | rc maintained | roman catholic maintained | rc maintained | rc maintained | rc maintained | catholic maintained | catholic maintained | catholic maintained | catholic maintained | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5420062 | school name | st colman's college | st colman's college | st colman's college | st colman's college | st colman's college | st colman's college, newry | st colman's college, newry | st colman's college, newry | st colman's college, newry |
postcode | bt35 6pp | bt356pp | bt356pp | bt356pp | bt356pp | bt356pp | bt35 6pp | bt356pp | bt356pp | |
school management type | voluntary | voluntary | voluntary - rc managed | voluntary - rc managed | voluntary - rc managed | voluntary | voluntary | voluntary | voluntary | |
district council | newry & mourne | newry & mourne | newry and mourne | newry and mourne | newry and mourne | newry and mourne | newry mourne and down | newry mourne and down | newry mourne and down | |
5420073 | school name | st joseph's grammar school | st joseph's grammar school | st joseph's grammar school | st joseph's grammar school | st joseph's grammar school | st joseph's convent grammar school, donaghmore | st joseph's convent grammar school, donaghmore | st joseph's convent grammar school, donaghmore | st joseph's convent grammar school, donaghmore |
postcode | bt70 3he | bt703he | bt703he | bt703he | bt703he | bt703he | bt70 3he | bt703he | bt703he | |
school management type | voluntary | voluntary | voluntary - rc managed | voluntary - rc managed | voluntary - rc managed | voluntary | voluntary | voluntary | voluntary | |
district council | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | mid ulster | mid ulster | mid ulster | |
5420076 | school name | sacred heart grammar school | sacred heart grammar school | sacred heart grammar school | sacred heart grammar school | sacred heart grammar school | sacred heart grammar school, newry | sacred heart grammar school, newry | sacred heart grammar school, newry | sacred heart grammar school, newry |
postcode | bt34 1pr | bt341pr | bt341pr | bt341pr | bt341pr | bt341pr | bt34 1pr | bt341pr | bt341pr | |
school management type | voluntary | voluntary | voluntary - rc managed | voluntary - rc managed | voluntary - rc managed | voluntary | voluntary | voluntary | voluntary | |
district council | newry & mourne | newry & mourne | newry and mourne | newry and mourne | newry and mourne | newry and mourne | newry mourne and down | newry mourne and down | newry mourne and down | |
parliamentary constituency | newry and armagh | newry and armagh | south down | south down | south down | south down | south down | south down | south down | |
5420260 | school name | the royal school dungannon | the royal school dungannon | the royal school dungannon | the royal school dungannon | the royal school dungannon | the royal school, dungannon | the royal school, dungannon | the royal school, dungannon | the royal school, dungannon |
address 1 | 1 ranfurley road | 1 ranfurley road | 1 ranfurley road | 1 ranfurley road | 1 ranfurley road | 1 ranfurley road | 2 ranfurley road | 2 ranfurley road | 2 ranfurley road | |
postcode | bt71 6ap | bt716ap | bt716ap | bt716ap | bt716ap | bt716ap | bt71 6ap | bt716ap | bt716ap | |
school management type | voluntary | voluntary | voluntary - other managed | voluntary - other managed | voluntary - other managed | voluntary | voluntary | voluntary | voluntary | |
district council | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | mid ulster | mid ulster | mid ulster | |
5420263 | school name | the royal school armagh | the royal school armagh | the royal school armagh | the royal school armagh | the royal school armagh | the royal school, armagh | the royal school, armagh | the royal school, armagh | the royal school, armagh |
postcode | bt61 9dh | bt619dh | bt619dh | bt619dh | bt619dh | bt619dh | bt61 9dh | bt619dh | bt619dh | |
school management type | voluntary | voluntary | voluntary - other managed | voluntary - other managed | voluntary - other managed | voluntary | voluntary | voluntary | voluntary | |
district council | armagh | armagh | armagh | armagh | armagh | armagh | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | |
5420268 | school name | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school | st patrick's grammar school, armagh | st patrick's grammar school, armagh | st patrick's grammar school, armagh | st patrick's grammar school, armagh |
postcode | bt61 7qz | bt617qz | bt617qz | bt617qz | bt617qz | bt617qz | bt61 7qz | bt617qz | bt617qz | |
school management type | voluntary | voluntary | voluntary - rc managed | voluntary - rc managed | voluntary - rc managed | voluntary | voluntary | voluntary | voluntary | |
district council | armagh | armagh | armagh | armagh | armagh | armagh | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | armagh city, banbridge and craigavon | |
5420304 | school name | st patrick's academy | st patrick's academy | st patrick's academy | st patrick's academy | st patrick's academy | st patrick's academy, dungannon | st patrick's academy, dungannon | st patrick's academy, dungannon | st patrick's academy, dungannon |
postcode | bt71 6ds | bt716ds | bt716ds | bt716ds | bt716ds | bt716ds | bt71 6ds | bt716ds | bt716ds | |
school management type | voluntary | voluntary | voluntary - rc managed | voluntary - rc managed | voluntary - rc managed | voluntary | voluntary | voluntary | voluntary | |
district council | dungannon | dungannon | dungannon | dungannon | dungannon | dungannon | mid ulster | mid ulster | mid ulster |
723 rows × 9 columns
from collections import Counter
Counter(reference[:,:,'school management type'].values.ravel('k'))
Counter({'catholic maintained': 258, 'controlled': 628, 'controlled integrated': 45, 'gmi': 120, 'grant maintained integrated': 15, nan: 201, 'other maintained': 12, 'rc maintained': 282, 'roman catholic maintained': 71, 'voluntary': 303, 'voluntary - other managed': 66, 'voluntary - rc managed': 87})
reference_value_rename = {
'school management type':{
'controlled integrated':'integrated',
'roman catholic maintained':'rc maintained',
'grant maintained integrated':'gmi',
'voluntary - other managed':'other maintained',
'voluntary - rc managed':'rc maintained',
'catholic maintained':'rc maintained'
}
}
import numpy as np
def parse_reference_table(xls):
cols = [
'de ref',
'school name',
'school type',
'address 1',
'postcode',
'urban_rural',
'school management type',
'district council',
'parliamentary constituency',
'town'
]
categories = [
'school type',
'urban_rural',
'school management type',
'district council',
'parliamentary constituency',
'town'
]
ref_key_map={
'denino':'de ref',
'urban/ rural': 'urban_rural',
'schoolname': 'school name'
}
reference_value_rename = {
'school management type':{
'gmi':'integrated',
'controlled integrated':'integrated',
'roman catholic maintained':'rc maintained',
'grant maintained integrated':'integrated',
'voluntary - other managed':'voluntary',
'voluntary - rc managed':'voluntary',
'catholic maintained':'rc maintained'
}
}
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
if 'reference data' in xls.sheet_names:
df = pd.read_excel(xls, 'reference data', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[1:h_range].fillna('').apply(join_n_strip_n_lower, axis=0).values
df.rename(columns=ref_key_map, inplace=True)
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df = df[cols]
df['de ref'] = df['de ref'].astype(int)
df.set_index('de ref', inplace=True)
for c in df:
df[c]=df[c].str.lower().str.strip()
df.replace(reference_value_rename, inplace=True)
for c in categories:
df[c].fillna('NA', inplace=True)
df[c] = df[c].astype('category')
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_reference_table(xls)
if df is not None:
print(f'Got reference data for {year}')
dfs[year]=df
reference = pd.Panel(dfs)
Got reference data for 2009 Got reference data for 2010 Got reference data for 2011 Got reference data for 2012 Got reference data for 2013 Got reference data for 2014 Got reference data for 2015 Got reference data for 2016 Got reference data for 2017
"""School level data"""
def parse_available_table(xls):
ref_key_map={
'schoolname': 'school name',
'total unfilled places': 'available places',
'unfilled places': 'available places',
'total approved enrolment number': 'approved enrolments'
}
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
if 'School level data' in xls.sheet_names:
df = pd.read_excel(xls, 'School level data', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
elif 'unfilled places' in xls.sheet_names:
df = pd.read_excel(xls, 'unfilled places', header=None)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
else:
df=None
if df is not None:
try:
df.columns=df.ix[1:h_range].fillna('').apply(join_n_strip_n_lower, axis=0).values
df.rename(columns=ref_key_map, inplace=True)
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df=df.applymap(lambda x: np.nan if isinstance(x, str) and x.isspace() else x)
df.dropna(how='all', axis=1, inplace=True)
df.dropna(how='any', axis=0, inplace=True)
if df.shape[1] == 6: # recent doesn't have fecking headers
cols = list(df.columns)
cols[0] = 'de ref'
cols[1] = 'school name'
df.columns=cols
df.drop('school name', axis=1, inplace=True)
df['de ref'] = df['de ref'].astype(int)
df.set_index('de ref', inplace=True)
df.replace('*',2.0, inplace=True) # * == < 5
df.replace('!',1, inplace=True) # ! avoid identification, so it's prob one or two
df.dropna(how='all', inplace=True, axis=1)
df.astype(int, inplace=True)
except TypeError as e:
print(e)
return df
dfs={}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_available_table(xls)
if df is not None:
print(f'Got available data for {year}')
dfs[year]=df
available = pd.Panel(dfs)
available
Got available data for 2010 Got available data for 2011 Got available data for 2012 Got available data for 2013 Got available data for 2014 Got available data for 2015 Got available data for 2016 Got available data for 2017
<class 'pandas.core.panel.Panel'> Dimensions: 8 (items) x 229 (major_axis) x 5 (minor_axis) Items axis: 2010 to 2017 Major_axis axis: 1210014 to 5420314 Minor_axis axis: actual enrolments to supernumerary pupils
available[2013]
actual enrolments | approved admissions | approved enrolments | available places | supernumerary pupils | |
---|---|---|---|---|---|
de ref | |||||
1210014 | NaN | NaN | 660.0 | 0.0 | NaN |
1210015 | NaN | NaN | 600.0 | 0.0 | NaN |
1210021 | NaN | NaN | 950.0 | 56.0 | NaN |
1210022 | NaN | NaN | 905.0 | 12.0 | NaN |
1210266 | NaN | NaN | 548.0 | 2.0 | NaN |
1230026 | NaN | NaN | 780.0 | 111.0 | NaN |
1230053 | NaN | NaN | 2280.0 | 787.0 | NaN |
1230089 | NaN | NaN | 650.0 | 0.0 | NaN |
1230104 | NaN | NaN | 650.0 | 238.0 | NaN |
1230130 | NaN | NaN | 575.0 | 237.0 | NaN |
1230146 | NaN | NaN | 750.0 | 305.0 | NaN |
1230155 | NaN | NaN | 1000.0 | 0.0 | NaN |
1230173 | NaN | NaN | NaN | NaN | NaN |
1230182 | NaN | NaN | 1025.0 | 9.0 | NaN |
1230262 | NaN | NaN | 600.0 | 232.0 | NaN |
1230275 | NaN | NaN | 664.0 | 143.0 | NaN |
1230321 | NaN | NaN | NaN | NaN | NaN |
1240291 | NaN | NaN | 600.0 | 60.0 | NaN |
1260269 | NaN | NaN | 850.0 | 14.0 | NaN |
1260294 | NaN | NaN | 800.0 | 189.0 | NaN |
1410079 | NaN | NaN | 1075.0 | 0.0 | NaN |
1410270 | NaN | NaN | 770.0 | 0.0 | NaN |
1410315 | NaN | NaN | NaN | NaN | NaN |
1420018 | NaN | NaN | 710.0 | 2.0 | NaN |
1420020 | NaN | NaN | 950.0 | 73.0 | NaN |
1420021 | NaN | NaN | 1180.0 | 0.0 | NaN |
1420022 | NaN | NaN | 1810.0 | 74.0 | NaN |
1420027 | NaN | NaN | 1050.0 | 32.0 | NaN |
1420028 | NaN | NaN | 1410.0 | 19.0 | NaN |
1420029 | NaN | NaN | 966.0 | 0.0 | NaN |
... | ... | ... | ... | ... | ... |
5230152 | NaN | NaN | 760.0 | 0.0 | NaN |
5230157 | NaN | NaN | 1355.0 | 0.0 | NaN |
5230160 | NaN | NaN | 450.0 | 291.0 | NaN |
5230167 | NaN | NaN | 600.0 | 17.0 | NaN |
5230187 | NaN | NaN | 1000.0 | 37.0 | NaN |
5230192 | NaN | NaN | 680.0 | 403.0 | NaN |
5230213 | NaN | NaN | 1200.0 | 88.0 | NaN |
5230218 | NaN | NaN | 1029.0 | 0.0 | NaN |
5230256 | NaN | NaN | 800.0 | 646.0 | NaN |
5230278 | NaN | NaN | 960.0 | 43.0 | NaN |
5230293 | NaN | NaN | 575.0 | 0.0 | NaN |
5230321 | NaN | NaN | NaN | NaN | NaN |
5250216 | NaN | NaN | 450.0 | 149.0 | NaN |
5260285 | NaN | NaN | 500.0 | 0.0 | NaN |
5260286 | NaN | NaN | 500.0 | 0.0 | NaN |
5410013 | NaN | NaN | 1320.0 | 5.0 | NaN |
5410057 | NaN | NaN | 440.0 | 2.0 | NaN |
5410067 | NaN | NaN | 780.0 | 0.0 | NaN |
5420045 | NaN | NaN | 570.0 | 0.0 | NaN |
5420056 | NaN | NaN | 570.0 | 0.0 | NaN |
5420059 | NaN | NaN | 850.0 | 0.0 | NaN |
5420060 | NaN | NaN | 860.0 | 8.0 | NaN |
5420062 | NaN | NaN | 860.0 | 0.0 | NaN |
5420073 | NaN | NaN | 575.0 | 0.0 | NaN |
5420076 | NaN | NaN | 875.0 | 25.0 | NaN |
5420260 | NaN | NaN | 650.0 | 5.0 | NaN |
5420263 | NaN | NaN | 650.0 | 0.0 | NaN |
5420268 | NaN | NaN | 750.0 | 0.0 | NaN |
5420304 | NaN | NaN | 1400.0 | 31.0 | NaN |
5420314 | NaN | NaN | NaN | NaN | NaN |
229 rows × 5 columns
all_ = set([_ for df in dfs.values() for _ in df.keys()])
df = pd.DataFrame.from_dict({
year: [sn in these_ for sn in all_]
for year, these_ in dfs.items()
}, orient='index')
df.columns=all_
sns.heatmap(df.T)
<matplotlib.axes._subplots.AxesSubplot at 0x1a262c3ba8>
for year,df in dfs.items():
print(f"{year}:{df.shape}:{','.join(df.keys())}")
2010:(217, 2):approved enrolments,available places 2011:(216, 2):approved enrolments,available places 2012:(215, 2):available places,approved enrolments 2013:(210, 2):available places,approved enrolments 2014:(208, 1):available places 2015:(202, 3):approved admissions,approved enrolments,available places 2016:(201, 4):approved enrolments,actual enrolments,supernumerary pupils,available places 2017:(199, 4):approved enrolments,actual enrolments,supernumerary pupils,available places
Counter([_ for df in dfs.values() for _ in df.keys()])
Counter({'actual enrolments': 2, 'approved admissions': 1, 'approved enrolments': 7, 'available places': 8, 'supernumerary pupils': 2})
dfs[2017].dtypes
approved enrolments float64 actual enrolments float64 supernumerary pupils float64 available places float64 dtype: object
available.sum().ix[['approved enrolments','available places']].T
approved enrolments | available places | |
---|---|---|
2010 | 159503.0 | 17780.0 |
2011 | 159793.0 | 19473.0 |
2012 | 158906.0 | 20256.0 |
2013 | 156996.0 | 19599.0 |
2014 | 0.0 | 21151.0 |
2015 | 153916.0 | 20186.0 |
2016 | 154131.0 | 20946.0 |
2017 | 154098.0 | 21022.0 |
from collections import defaultdict
import numpy as np
from pathlib import Path
import pandas as pd
## Initialisation and re-walking already extracted
## Datasets.
year_files = defaultdict(list)
dest = Path('./data/education-ni/')
for year_p in dest.iterdir():
try:
year=int(year_p.parts[-1])
for file in year_p.iterdir():
year_files[year].append(file.parts[-1])
except ValueError:
pass
year_files = dict(sorted(year_files.items()))
year_files
def parse_fsm_table(xls):
join_n_strip_n_lower = lambda l: ' '.join(l).strip().lower()
strip_n_lower = lambda s: s.strip().lower()
unyearify = lambda s: int(s.replace('year ',''))
if 'free school meals' in xls.sheet_names:
df = pd.read_excel(xls, 'free school meals', header=None, skip_footer=5)
h_range = 2 if isinstance(df.ix[3,0], int) else 3
try:
df.columns=df.ix[3].fillna('').apply(strip_n_lower).values
df = df.drop(df.index[0:h_range+1]).reset_index(drop=True)
df.dropna(how='all', inplace=True, axis=0)
df['de ref'] = df['de ref'].astype(int)
df.drop('schoolname',axis=1, inplace=True)
df.drop('free school meals', axis=1, inplace=True)
df.set_index('de ref', inplace=True)
df.replace('#',pd.np.nan, inplace=True) # # = Undisclosed
df.replace('*',2.0, inplace=True) # * == < 5
df.replace('!',1, inplace=True) # ! avoid identification, so it's prob one or two
df=df.astype(float)
except TypeError as e:
print(e)
else:
df=None
return df
dfs = {}
for year, files in year_files.items():
for file in files:
if 'post' in file:
xls = pd.ExcelFile(f'data/education-ni/{year}/{file}')
df = parse_fsm_table(xls)
if df is not None:
print(f'Got fsm data for {year}')
dfs[year]=df
fsm = pd.Panel(dfs)
/Users/andrew.bolster/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:29: DeprecationWarning: .ix is deprecated. Please use .loc for label based indexing or .iloc for positional indexing See the documentation here: http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
Got fsm data for 2009 Got fsm data for 2010 Got fsm data for 2011 Got fsm data for 2012 Got fsm data for 2013 Got fsm data for 2014 Got fsm data for 2015 Got fsm data for 2016 Got fsm data for 2017
fsm.to_frame()
2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | ||
---|---|---|---|---|---|---|---|---|---|---|
de ref | minor | |||||||||
1210014 | pupils | 672.000000 | 688.000000 | 703.000000 | 708.000000 | 704.000000 | 697.000000 | 704.000000 | 696.000000 | 691.000000 |
fsme | 132.000000 | 153.000000 | 170.000000 | 183.000000 | 171.000000 | 241.000000 | 277.000000 | 301.000000 | 310.000000 | |
free school meals | 0.196429 | 0.222384 | 0.241821 | 0.258475 | 0.242898 | 0.345768 | 0.393466 | 0.432471 | 0.448625 | |
1210015 | pupils | 647.000000 | 650.000000 | 649.000000 | 666.000000 | 709.000000 | 737.000000 | 744.000000 | 728.000000 | 740.000000 |
fsme | 127.000000 | 141.000000 | 157.000000 | 177.000000 | 173.000000 | 270.000000 | 312.000000 | 332.000000 | 326.000000 | |
free school meals | 0.196291 | 0.216923 | 0.241911 | 0.265766 | 0.244006 | 0.366350 | 0.419355 | 0.456044 | 0.440541 | |
1210021 | pupils | 963.000000 | 929.000000 | 899.000000 | 930.000000 | 916.000000 | 884.000000 | 910.000000 | 954.000000 | 946.000000 |
fsme | 376.000000 | 359.000000 | 367.000000 | 379.000000 | 368.000000 | 452.000000 | 522.000000 | 572.000000 | 580.000000 | |
free school meals | 0.390447 | 0.386437 | 0.408231 | 0.407527 | 0.401747 | 0.511312 | 0.573626 | 0.599581 | 0.613108 | |
1210022 | pupils | 907.000000 | 932.000000 | 913.000000 | 930.000000 | 943.000000 | 969.000000 | 968.000000 | 991.000000 | 997.000000 |
fsme | 363.000000 | 392.000000 | 384.000000 | 390.000000 | 380.000000 | 522.000000 | 548.000000 | 575.000000 | 588.000000 | |
free school meals | 0.400221 | 0.420601 | 0.420591 | 0.419355 | 0.402969 | 0.538700 | 0.566116 | 0.580222 | 0.589769 | |
1230053 | pupils | 1547.000000 | 1530.000000 | 1530.000000 | 1521.000000 | 1516.000000 | 1518.000000 | 1474.000000 | 1420.000000 | 1435.000000 |
fsme | 546.000000 | 551.000000 | 578.000000 | 618.000000 | 563.000000 | 821.000000 | 834.000000 | 847.000000 | 860.000000 | |
free school meals | 0.352941 | 0.360131 | 0.377778 | 0.406312 | 0.371372 | 0.540843 | 0.565807 | 0.596479 | 0.599303 | |
1230104 | pupils | 438.000000 | 429.000000 | 384.000000 | 367.000000 | 423.000000 | 433.000000 | 446.000000 | 452.000000 | 450.000000 |
fsme | 184.000000 | 188.000000 | 184.000000 | 192.000000 | 217.000000 | 337.000000 | 333.000000 | 323.000000 | 315.000000 | |
free school meals | 0.420091 | 0.438228 | 0.479167 | 0.523161 | 0.513002 | 0.778291 | 0.746637 | 0.714602 | 0.700000 | |
1230130 | pupils | 468.000000 | 440.000000 | 406.000000 | 388.000000 | 352.000000 | 316.000000 | 273.000000 | 240.000000 | 243.000000 |
fsme | 272.000000 | 244.000000 | 239.000000 | 214.000000 | 183.000000 | 217.000000 | 187.000000 | 171.000000 | 166.000000 | |
free school meals | 0.581197 | 0.554545 | 0.588670 | 0.551546 | 0.519886 | 0.686709 | 0.684982 | 0.712500 | 0.683128 | |
1230146 | pupils | 508.000000 | 513.000000 | 528.000000 | 529.000000 | 490.000000 | 490.000000 | 498.000000 | 500.000000 | 525.000000 |
fsme | 242.000000 | 237.000000 | 245.000000 | 286.000000 | 236.000000 | 270.000000 | 317.000000 | 326.000000 | 329.000000 | |
free school meals | 0.476378 | 0.461988 | 0.464015 | 0.540643 | 0.481633 | 0.551020 | 0.636546 | 0.652000 | 0.626667 | |
1230155 | pupils | 1011.000000 | 1027.000000 | 1043.000000 | 1069.000000 | 1078.000000 | 1061.000000 | 1070.000000 | 1059.000000 | 1022.000000 |
fsme | 327.000000 | 349.000000 | 401.000000 | 417.000000 | 409.000000 | 539.000000 | 584.000000 | 591.000000 | 559.000000 | |
free school meals | 0.323442 | 0.339825 | 0.384468 | 0.390084 | 0.379406 | 0.508011 | 0.545794 | 0.558074 | 0.546967 | |
1230182 | pupils | 1116.000000 | 1088.000000 | 1088.000000 | 1098.000000 | 1093.000000 | 1116.000000 | 1117.000000 | 1104.000000 | 958.000000 |
fsme | 348.000000 | 348.000000 | 359.000000 | 386.000000 | 345.000000 | 535.000000 | 562.000000 | 588.000000 | 508.000000 | |
free school meals | 0.311828 | 0.319853 | 0.329963 | 0.351548 | 0.315645 | 0.479391 | 0.503133 | 0.532609 | 0.530271 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5410013 | pupils | 1311.000000 | 1325.000000 | 1324.000000 | 1331.000000 | 1327.000000 | 1324.000000 | 1320.000000 | 1312.000000 | 1301.000000 |
fsme | 38.000000 | 46.000000 | 48.000000 | 56.000000 | 48.000000 | 67.000000 | 76.000000 | 83.000000 | 86.000000 | |
free school meals | 0.028986 | 0.034717 | 0.036254 | 0.042074 | 0.036172 | 0.050604 | 0.057576 | 0.063262 | 0.066103 | |
5410057 | pupils | 432.000000 | 438.000000 | 455.000000 | 457.000000 | 451.000000 | 453.000000 | 455.000000 | 451.000000 | 455.000000 |
fsme | 20.000000 | 27.000000 | 24.000000 | 21.000000 | 25.000000 | 37.000000 | 38.000000 | 41.000000 | 50.000000 | |
free school meals | 0.046296 | 0.061644 | 0.052747 | 0.045952 | 0.055432 | 0.081678 | 0.083516 | 0.090909 | 0.109890 | |
5410067 | pupils | 789.000000 | 788.000000 | 780.000000 | 778.000000 | 798.000000 | 791.000000 | 788.000000 | 778.000000 | 744.000000 |
fsme | 27.000000 | 35.000000 | 31.000000 | 34.000000 | 37.000000 | 46.000000 | 58.000000 | 73.000000 | 64.000000 | |
free school meals | 0.034221 | 0.044416 | 0.039744 | 0.043702 | 0.046366 | 0.058154 | 0.073604 | 0.093830 | 0.086022 | |
5420045 | pupils | 586.000000 | 587.000000 | 585.000000 | 587.000000 | 586.000000 | 591.000000 | 588.000000 | 564.000000 | 564.000000 |
fsme | 76.000000 | 79.000000 | 71.000000 | 85.000000 | 73.000000 | 135.000000 | 131.000000 | 131.000000 | 126.000000 | |
free school meals | 0.129693 | 0.134583 | 0.121368 | 0.144804 | 0.124573 | 0.228426 | 0.222789 | 0.232270 | 0.223404 | |
5420059 | pupils | 880.000000 | 887.000000 | 894.000000 | 907.000000 | 900.000000 | 911.000000 | 902.000000 | 882.000000 | 885.000000 |
fsme | 77.000000 | 88.000000 | 85.000000 | 87.000000 | 84.000000 | 141.000000 | 155.000000 | 149.000000 | 125.000000 | |
free school meals | 0.087500 | 0.099211 | 0.095078 | 0.095921 | 0.093333 | 0.154775 | 0.171840 | 0.168934 | 0.141243 | |
5420060 | pupils | 861.000000 | 861.000000 | 864.000000 | 861.000000 | 858.000000 | 845.000000 | 853.000000 | 854.000000 | 853.000000 |
fsme | 71.000000 | 77.000000 | 80.000000 | 84.000000 | 81.000000 | 129.000000 | 123.000000 | 124.000000 | 116.000000 | |
free school meals | 0.082462 | 0.089431 | 0.092593 | 0.097561 | 0.094406 | 0.152663 | 0.144197 | 0.145199 | 0.135991 | |
5420062 | pupils | 919.000000 | 940.000000 | 942.000000 | 920.000000 | 900.000000 | 904.000000 | 897.000000 | 903.000000 | 885.000000 |
fsme | 57.000000 | 70.000000 | 72.000000 | 82.000000 | 83.000000 | 135.000000 | 132.000000 | 140.000000 | 134.000000 | |
free school meals | 0.062024 | 0.074468 | 0.076433 | 0.089130 | 0.092222 | 0.149336 | 0.147157 | 0.155039 | 0.151412 | |
5420073 | pupils | 527.000000 | 555.000000 | 579.000000 | 604.000000 | 635.000000 | 678.000000 | 707.000000 | 709.000000 | 686.000000 |
fsme | 82.000000 | 81.000000 | 89.000000 | 95.000000 | 88.000000 | 155.000000 | 170.000000 | 195.000000 | 168.000000 | |
free school meals | 0.155598 | 0.145946 | 0.153713 | 0.157285 | 0.138583 | 0.228614 | 0.240453 | 0.275035 | 0.244898 | |
5420076 | pupils | 836.000000 | 846.000000 | 845.000000 | 849.000000 | 855.000000 | 853.000000 | 854.000000 | 852.000000 | 843.000000 |
fsme | 56.000000 | 68.000000 | 79.000000 | 78.000000 | 74.000000 | 122.000000 | 131.000000 | 130.000000 | 131.000000 | |
free school meals | 0.066986 | 0.080378 | 0.093491 | 0.091873 | 0.086550 | 0.143025 | 0.153396 | 0.152582 | 0.155397 | |
5420260 | pupils | 646.000000 | 658.000000 | 665.000000 | 652.000000 | 653.000000 | 660.000000 | 650.000000 | 652.000000 | 638.000000 |
fsme | 29.000000 | 31.000000 | 31.000000 | 31.000000 | 28.000000 | 58.000000 | 64.000000 | 62.000000 | 61.000000 | |
free school meals | 0.044892 | 0.047112 | 0.046617 | 0.047546 | 0.042879 | 0.087879 | 0.098462 | 0.095092 | 0.095611 |
547 rows × 9 columns
dfs[2016]
Counter(reference[:,:,'school management type'].values.ravel('k'))
reference[:,:,'school management type'].apply(Counter, axis=0)
categories=pd.DataFrame.from_dict({
year: dict(c)
for year,c in reference[:,:,'school management type'].apply(Counter, axis=0).iteritems()
})
categories.plot.bar()
pd.DataFrame.from_dict({
category:
enrolements[:,(reference.minor_xs('school management type')==category).any(axis=1),:].sum().sum()
for category in categories.drop(np.nan).index
}).plot()
enrolements.sum().sum().plot()
Counter(reference.minor_xs('town').values.ravel('k'))
gazetteer = "45341066-1998-4116-b234-6223e7e19397"
gztr = pd.DataFrame.from_records(dataset_generator(gazetteer))
gztr
gztr['town'] = gztr['PLACENAME'].str.lower()
town_lookup = gztr.set_index('town')[['X','Y']]
town_lookup
for town, n in Counter(reference.minor_xs('town').values.ravel('k')).items():
if town not in town_lookup.index:
print(f"{town}:{n}")
from shapely.geometry import Point
from pyproj import Proj, transform
prj_wgs = Proj(proj='latlong',datum='WGS84')
prj_itm = Proj(init='EPSG:29903')
def apply_itm_to_wgs(row, e='eastings', n='northings'):
eastings, northings = row[e], row[n]
if eastings is not None and northings is not None:
long,lat = transform(prj_itm, prj_wgs, eastings, northings)
else:
long,lat = None,None
return pd.Series({'latitude':lat,'longitude':long})
town_lookup = gpd.GeoDataFrame(town_lookup, crs={'init': 'epsg:4326'},
geometry=town_lookup.apply(lambda r: Point(apply_itm_to_wgs(r, e='Y',n='X')), axis=1))
town_lookup.plot()
categories = set(reference.minor_xs('town').values.ravel('k'))
df= pd.DataFrame.from_dict({
category:
enrolements[:,(reference.minor_xs('town')==category).any(axis=1),:].sum().sum()
for category in categories
}).T.drop(np.nan)
town_stats = gpd.GeoDataFrame(geometry=town_lookup.apply(lambda r: Point(apply_itm_to_wgs(r, e='Y',n='X')), axis=1))
town_stats['mean_enrolment'] = df.mean(axis=1)
town_stats['2017_change'] = (-1+df[2017]/df.mean(axis=1))
town_stats.plot(column='2017_change')
constituency_shp = 'http://osni-spatial-ni.opendata.arcgis.com/datasets/563dc2ec3d9943428e3fe68966d40deb_3.zip'
urllib.request.urlretrieve(constituency_shp, 'data/constituency.zip')
import zipfile
with zipfile.ZipFile('data/constituency.zip','r') as z:
z.extractall('data/')
import geopandas as gpd
df = gpd.GeoDataFrame.from_file('data/OSNI_Open_Data_Largescale_Boundaries__Parliamentary_Constituencies_2008.shp')
df