import pandas as pd
import numpy as np
import re
%matplotlib inline
data_files = [
"C:\\Users\\ngass\\Downloads\\ap_2010.csv",
"C:\\Users\\ngass\\Downloads\\class_size.csv",
"C:\\Users\\ngass\\Downloads\\demographics.csv",
"C:\\Users\\ngass\\Downloads\\graduation.csv",
"C:\\Users\\ngass\\Downloads\\hs_directory.csv",
"C:\\Users\\ngass\\Downloads\\sat_results.csv"
]
data = {}
for f in data_files:
d = pd.read_csv("{0}".format(f))
data[f.replace(".csv", "").replace("C:\\Users\\ngass\\Downloads\\", "")] = d
print(data)
print(type(data.values()))
{'ap_2010': DBN SchoolName \ 0 01M448 UNIVERSITY NEIGHBORHOOD H.S. 1 01M450 EAST SIDE COMMUNITY HS 2 01M515 LOWER EASTSIDE PREP 3 01M539 NEW EXPLORATIONS SCI,TECH,MATH 4 02M296 High School of Hospitality Management .. ... ... 253 31R605 STATEN ISLAND TECHNICAL HS 254 32K545 EBC-HS FOR PUB SERVICE (BUSH) 255 32K552 Academy of Urban Planning 256 32K554 All City Leadership Secondary School 257 32K556 Bushwick Leaders High School for Academic Exce... AP Test Takers Total Exams Taken Number of Exams with scores 3 4 or 5 0 39 49 10 1 19 21 NaN 2 24 26 24 3 255 377 191 4 NaN NaN NaN .. ... ... ... 253 528 905 809 254 47 64 13 255 76 100 10 256 7 8 NaN 257 34 35 18 [258 rows x 5 columns], 'class_size': CSD BOROUGH SCHOOL CODE SCHOOL NAME GRADE \ 0 1 M M015 P.S. 015 Roberto Clemente 0K 1 1 M M015 P.S. 015 Roberto Clemente 0K 2 1 M M015 P.S. 015 Roberto Clemente 01 3 1 M M015 P.S. 015 Roberto Clemente 01 4 1 M M015 P.S. 015 Roberto Clemente 02 ... ... ... ... ... ... 27606 32 K K564 Bushwick Community High School 09-12 27607 32 K K564 Bushwick Community High School 09-12 27608 32 K K564 Bushwick Community High School 09-12 27609 32 K K564 Bushwick Community High School 09-12 27610 32 K K564 Bushwick Community High School NaN PROGRAM TYPE CORE SUBJECT (MS CORE and 9-12 ONLY) \ 0 GEN ED - 1 CTT - 2 GEN ED - 3 CTT - 4 GEN ED - ... ... ... 27606 GEN ED SOCIAL STUDIES 27607 GEN ED SOCIAL STUDIES 27608 GEN ED SOCIAL STUDIES 27609 CTT MATH 27610 NaN NaN CORE COURSE (MS CORE and 9-12 ONLY) SERVICE CATEGORY(K-9* ONLY) \ 0 - - 1 - - 2 - - 3 - - 4 - - ... ... ... 27606 US History & Government - 27607 Economics - 27608 Participation in Government - 27609 Integrated Algebra - 27610 NaN NaN NUMBER OF STUDENTS / SEATS FILLED NUMBER OF SECTIONS \ 0 19 1.0 1 21 1.0 2 17 1.0 3 17 1.0 4 15 1.0 ... ... ... 27606 256 10.0 27607 65 2.0 27608 53 2.0 27609 50 2.0 27610 NaN NaN AVERAGE CLASS SIZE SIZE OF SMALLEST CLASS SIZE OF LARGEST CLASS \ 0 19.0 19.0 19.0 1 21.0 21.0 21.0 2 17.0 17.0 17.0 3 17.0 17.0 17.0 4 15.0 15.0 15.0 ... ... ... ... 27606 25.6 15.0 35.0 27607 32.5 32.0 33.0 27608 26.5 25.0 28.0 27609 25.0 25.0 25.0 27610 NaN NaN NaN DATA SOURCE SCHOOLWIDE PUPIL-TEACHER RATIO 0 ATS NaN 1 ATS NaN 2 ATS NaN 3 ATS NaN 4 ATS NaN ... ... ... 27606 STARS NaN 27607 STARS NaN 27608 STARS NaN 27609 STARS NaN 27610 NaN 17.1 [27611 rows x 16 columns], 'demographics': DBN Name schoolyear \ 0 01M015 P.S. 015 ROBERTO CLEMENTE 20052006 1 01M015 P.S. 015 ROBERTO CLEMENTE 20062007 2 01M015 P.S. 015 ROBERTO CLEMENTE 20072008 3 01M015 P.S. 015 ROBERTO CLEMENTE 20082009 4 01M015 P.S. 015 ROBERTO CLEMENTE 20092010 ... ... ... ... 10070 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 20072008 10071 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 20082009 10072 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 20092010 10073 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 20102011 10074 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 20112012 fl_percent frl_percent total_enrollment prek k grade1 grade2 \ 0 89.4 NaN 281 15 36 40 33 1 89.4 NaN 243 15 29 39 38 2 89.4 NaN 261 18 43 39 36 3 89.4 NaN 252 17 37 44 32 4 96.5 208 16 40 28 32 ... ... ... ... ... ... ... ... 10070 85.6 NaN 355 NaN NaN NaN NaN 10071 65.2 NaN 383 NaN NaN NaN NaN 10072 89.2 395 NaN NaN NaN NaN 10073 88.2 420 NaN NaN NaN NaN 10074 NaN 81.8 382 ... black_num black_per hispanic_num hispanic_per white_num white_per \ 0 ... 74 26.3 189 67.3 5 1.8 1 ... 68 28.0 153 63.0 4 1.6 2 ... 77 29.5 157 60.2 7 2.7 3 ... 75 29.8 149 59.1 7 2.8 4 ... 67 32.2 118 56.7 6 2.9 ... ... ... ... ... ... ... ... 10070 ... 151 42.5 198 55.8 5 1.4 10071 ... 132 34.5 245 64.0 2 0.5 10072 ... 146 37.0 242 61.3 3 0.8 10073 ... 166 39.5 242 57.6 8 1.9 10074 ... 137 35.9 235 61.5 7 1.8 male_num male_per female_num female_per 0 158.0 56.2 123.0 43.8 1 140.0 57.6 103.0 42.4 2 143.0 54.8 118.0 45.2 3 149.0 59.1 103.0 40.9 4 124.0 59.6 84.0 40.4 ... ... ... ... ... 10070 181.0 51.0 174.0 49.0 10071 206.0 53.8 177.0 46.2 10072 199.0 50.4 196.0 49.6 10073 209.0 49.8 211.0 50.2 10074 169.0 44.2 213.0 55.8 [10075 rows x 38 columns], 'graduation': Demographic DBN School Name Cohort \ 0 Total Cohort 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 2003 1 Total Cohort 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 2004 2 Total Cohort 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 2005 3 Total Cohort 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 2006 4 Total Cohort 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 2006 Aug ... ... ... ... ... 25091 Male 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 2003 25092 Male 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 2004 25093 Male 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 2005 25094 Male 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 2006 25095 Male 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 2006 Aug Total Cohort Total Grads - n Total Grads - % of cohort \ 0 5 s NaN 1 55 37 67.3 2 64 43 67.2 3 78 43 55.1 4 78 44 56.4 ... ... ... ... 25091 65 1 1.5 25092 64 2 3.1 25093 79 3 3.8 25094 57 4 7.0 25095 57 4 7.0 Total Regents - n Total Regents - % of cohort \ 0 s NaN 1 17 30.9 2 27 42.2 3 36 46.2 4 37 47.4 ... ... ... 25091 0 0.0 25092 0 0.0 25093 1 1.3 25094 2 3.5 25095 2 3.5 Total Regents - % of grads ... Regents w/o Advanced - n \ 0 NaN ... s 1 45.9 ... 17 2 62.8 ... 27 3 83.7 ... 36 4 84.1 ... 37 ... ... ... ... 25091 0.0 ... 0 25092 0.0 ... 0 25093 33.3 ... 1 25094 50.0 ... 2 25095 50.0 ... 2 Regents w/o Advanced - % of cohort Regents w/o Advanced - % of grads \ 0 NaN NaN 1 30.9 45.9 2 42.2 62.8 3 46.2 83.7 4 47.4 84.1 ... ... ... 25091 0.0 0.0 25092 0.0 0.0 25093 1.3 33.3 25094 3.5 50.0 25095 3.5 50.0 Local - n Local - % of cohort Local - % of grads Still Enrolled - n \ 0 s NaN NaN s 1 20 36.4 54.1 15 2 16 25.0 37.2 9 3 7 9.0 16.3 16 4 7 9.0 15.9 15 ... ... ... ... ... 25091 1 1.5 100.0 44.0 25092 2 3.1 100.0 38.0 25093 2 2.5 66.7 46.0 25094 2 3.5 50.0 34.0 25095 2 3.5 50.0 34.0 Still Enrolled - % of cohort Dropped Out - n Dropped Out - % of cohort 0 NaN s NaN 1 27.3 3 5.5 2 14.1 9 14.1 3 20.5 11 14.1 4 19.2 11 14.1 ... ... ... ... 25091 67.7 20.0 30.8 25092 59.4 22.0 34.4 25093 58.2 28.0 35.4 25094 59.6 17.0 29.8 25095 59.6 17.0 29.8 [25096 rows x 23 columns], 'hs_directory': dbn school_name borough \ 0 27Q260 Frederick Douglass Academy VI High School Queens 1 21K559 Life Academy High School for Film and Music Brooklyn 2 16K393 Frederick Douglass Academy IV Secondary School Brooklyn 3 08X305 Pablo Neruda Academy Bronx 4 03M485 Fiorello H. LaGuardia High School of Music & A... Manhattan .. ... ... ... 430 02M300 Urban Assembly School of Design and Constructi... Manhattan 431 09X412 Bronx High School of Business Bronx 432 32K549 Bushwick School for Social Justice Brooklyn 433 02M407 Institute for Collaborative Education Manhattan 434 21K344 Rachel Carson High School for Coastal Studies Brooklyn building_code phone_number fax_number grade_span_min grade_span_max \ 0 Q465 718-471-2154 718-471-2890 9.0 12 1 K400 718-333-7750 718-333-7775 9.0 12 2 K026 718-574-2820 718-574-2821 9.0 12 3 X450 718-824-1682 718-824-1663 9.0 12 4 M485 212-496-0700 212-724-5748 9.0 12 .. ... ... ... ... ... 430 M535 212-586-0981 212-586-1731 9.0 12 431 X410 718-410-4060 718-992-5760 9.0 12 432 K480 718-381-7100 718-418-0192 9.0 12 433 M475 212-475-7972 212-475-0459 6.0 12 434 K303 718-265-0329 718-372-2514 9.0 12 expgrade_span_min expgrade_span_max ... priority08 priority09 \ 0 NaN NaN ... NaN NaN 1 NaN NaN ... NaN NaN 2 NaN NaN ... NaN NaN 3 NaN NaN ... NaN NaN 4 NaN NaN ... NaN NaN .. ... ... ... ... ... 430 NaN NaN ... NaN NaN 431 NaN NaN ... NaN NaN 432 NaN NaN ... NaN NaN 433 NaN NaN ... NaN NaN 434 NaN NaN ... NaN NaN priority10 Location 1 \ 0 NaN 8 21 Bay 25 Street\nFar Rockaway, NY 11691\n(4... 1 NaN 2630 Benson Avenue\nBrooklyn, NY 11214\n(40.59... 2 NaN 1014 Lafayette Avenue\nBrooklyn, NY 11221\n(40... 3 NaN 1980 Lafayette Avenue\nBronx, NY 10473\n(40.82... 4 NaN 100 Amsterdam Avenue\nNew York, NY 10023\n(40.... .. ... ... 430 NaN 525 West 50Th Street\nNew York, NY 10019\n(40.... 431 NaN 240 East 172 Street\nBronx, NY 10457\n(40.8403... 432 NaN 400 Irving Avenue\nBrooklyn, NY 11237\n(40.696... 433 NaN 345 East 15Th Street\nNew York, NY 10003\n(40.... 434 NaN 521 West Avenue\nBrooklyn, NY 11224\n(40.58211... Community Board Council District Census Tract BIN BBL \ 0 14.0 31.0 100,802 4,300,730 4,157,360,001 1 13.0 47.0 306 3,186,454 3,068,830,001 2 3.0 36.0 291 3,393,805 3,016,160,001 3 9.0 18.0 16 2,022,205 2,036,040,039 4 7.0 6.0 151 1,030,341 1,011,560,030 .. ... ... ... ... ... 430 4.0 3.0 135 1,083,802 1,010,790,029 431 4.0 16.0 225 2,007,806 2,028,170,002 432 4.0 37.0 435 3,076,908 3,033,650,098 433 6.0 2.0 48 1,020,419 1,009,220,008 434 13.0 47.0 354 3,196,292 3,072,500,025 NTA 0 Far Rockaway-Bayswater ... 1 Gravesend ... 2 Stuyvesant Heights ... 3 Soundview-Castle Hill-Clason Point-Harding Par... 4 Lincoln Square ... .. ... 430 Clinton ... 431 East Concourse-Concourse Village ... 432 Bushwick North ... 433 Gramercy ... 434 West Brighton ... [435 rows x 64 columns], 'sat_results': DBN SCHOOL NAME \ 0 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES 1 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 2 01M450 EAST SIDE COMMUNITY SCHOOL 3 01M458 FORSYTH SATELLITE ACADEMY 4 01M509 MARTA VALLE HIGH SCHOOL .. ... ... 473 75X012 P.S. X012 LEWIS AND CLARK SCHOOL 474 75X754 J. M. RAPPORT SCHOOL CAREER DEVELOPMENT 475 79M645 SCHOOL FOR COOPERATIVE TECHNICAL EDUCATION 476 79Q950 GED PLUS s CITYWIDE 477 79X490 PHOENIX ACADEMY Num of SAT Test Takers SAT Critical Reading Avg. Score \ 0 29 355 1 91 383 2 70 377 3 7 414 4 44 390 .. ... ... 473 s s 474 s s 475 s s 476 8 496 477 9 367 SAT Math Avg. Score SAT Writing Avg. Score 0 404 363 1 423 366 2 402 370 3 401 359 4 433 384 .. ... ... 473 s s 474 s s 475 s s 476 400 426 477 370 360 [478 rows x 6 columns]} <class 'dict_values'>
all_survey = pd.read_csv("C:\\Users\\ngass\\Downloads\\survey_all.txt", delimiter="\t", encoding='windows-1252')
d75_survey = pd.read_csv("C:\\Users\\ngass\\Downloads\\survey_d75.txt", delimiter="\t", encoding='windows-1252')
survey = pd.concat([all_survey, d75_survey], axis=0)
survey["DBN"] = survey["dbn"]
survey_fields = [
"DBN",
"rr_s",
"rr_t",
"rr_p",
"N_s",
"N_t",
"N_p",
"saf_p_11",
"com_p_11",
"eng_p_11",
"aca_p_11",
"saf_t_11",
"com_t_11",
"eng_t_11",
"aca_t_11",
"saf_s_11",
"com_s_11",
"eng_s_11",
"aca_s_11",
"saf_tot_11",
"com_tot_11",
"eng_tot_11",
"aca_tot_11",
]
survey = survey.loc[:,survey_fields]
data["survey"] = survey
print(data.keys())
dict_keys(['ap_2010', 'class_size', 'demographics', 'graduation', 'hs_directory', 'sat_results', 'survey'])
data["hs_directory"]["DBN"] = data["hs_directory"]["dbn"]
def pad_csd(num):
string_representation = str(num)
if len(string_representation) > 1:
return string_representation
else:
return "0" + string_representation
data["class_size"]["padded_csd"] = data["class_size"]["CSD"].apply(pad_csd)
data["class_size"]["DBN"] = data["class_size"]["padded_csd"] + data["class_size"]["SCHOOL CODE"]
print(data["class_size"].columns)
print(data["sat_results"].columns)
print(data["hs_directory"].columns)
Index(['CSD', 'BOROUGH', 'SCHOOL CODE', 'SCHOOL NAME', 'GRADE ', 'PROGRAM TYPE', 'CORE SUBJECT (MS CORE and 9-12 ONLY)', 'CORE COURSE (MS CORE and 9-12 ONLY)', 'SERVICE CATEGORY(K-9* ONLY)', 'NUMBER OF STUDENTS / SEATS FILLED', 'NUMBER OF SECTIONS', 'AVERAGE CLASS SIZE', 'SIZE OF SMALLEST CLASS', 'SIZE OF LARGEST CLASS', 'DATA SOURCE', 'SCHOOLWIDE PUPIL-TEACHER RATIO', 'padded_csd', 'DBN'], dtype='object') Index(['DBN', 'SCHOOL NAME', 'Num of SAT Test Takers', 'SAT Critical Reading Avg. Score', 'SAT Math Avg. Score', 'SAT Writing Avg. Score'], dtype='object') Index(['dbn', 'school_name', 'borough', 'building_code', 'phone_number', 'fax_number', 'grade_span_min', 'grade_span_max', 'expgrade_span_min', 'expgrade_span_max', 'bus', 'subway', 'primary_address_line_1', 'city', 'state_code', 'postcode', 'website', 'total_students', 'campus_name', 'school_type', 'overview_paragraph', 'program_highlights', 'language_classes', 'advancedplacement_courses', 'online_ap_courses', 'online_language_courses', 'extracurricular_activities', 'psal_sports_boys', 'psal_sports_girls', 'psal_sports_coed', 'school_sports', 'partner_cbo', 'partner_hospital', 'partner_highered', 'partner_cultural', 'partner_nonprofit', 'partner_corporate', 'partner_financial', 'partner_other', 'addtl_info1', 'addtl_info2', 'start_time', 'end_time', 'se_services', 'ell_programs', 'school_accessibility_description', 'number_programs', 'priority01', 'priority02', 'priority03', 'priority04', 'priority05', 'priority06', 'priority07', 'priority08', 'priority09', 'priority10', 'Location 1', 'Community Board', 'Council District', 'Census Tract', 'BIN', 'BBL', 'NTA', 'DBN'], dtype='object')
cols = ['SAT Math Avg. Score', 'SAT Critical Reading Avg. Score', 'SAT Writing Avg. Score']
for c in cols:
data["sat_results"][c] = pd.to_numeric(data["sat_results"][c], errors="coerce")
data['sat_results']['sat_score'] = data['sat_results'][cols[0]] + data['sat_results'][cols[1]] + data['sat_results'][cols[2]]
def find_lat(loc):
coords = re.findall("\(.+, .+\)", loc)
lat = coords[0].split(",")[0].replace("(", "")
return lat
def find_lon(loc):
coords = re.findall("\(.+, .+\)", loc)
lon = coords[0].split(",")[1].replace(")", "").strip()
return lon
data["hs_directory"]["lat"] = data["hs_directory"]["Location 1"].apply(find_lat)
data["hs_directory"]["lon"] = data["hs_directory"]["Location 1"].apply(find_lon)
data["hs_directory"]["lat"] = pd.to_numeric(data["hs_directory"]["lat"], errors="coerce")
data["hs_directory"]["lon"] = pd.to_numeric(data["hs_directory"]["lon"], errors="coerce")
print(data["sat_results"].columns)
print(data["hs_directory"].columns)
Index(['DBN', 'SCHOOL NAME', 'Num of SAT Test Takers', 'SAT Critical Reading Avg. Score', 'SAT Math Avg. Score', 'SAT Writing Avg. Score', 'sat_score'], dtype='object') Index(['dbn', 'school_name', 'borough', 'building_code', 'phone_number', 'fax_number', 'grade_span_min', 'grade_span_max', 'expgrade_span_min', 'expgrade_span_max', 'bus', 'subway', 'primary_address_line_1', 'city', 'state_code', 'postcode', 'website', 'total_students', 'campus_name', 'school_type', 'overview_paragraph', 'program_highlights', 'language_classes', 'advancedplacement_courses', 'online_ap_courses', 'online_language_courses', 'extracurricular_activities', 'psal_sports_boys', 'psal_sports_girls', 'psal_sports_coed', 'school_sports', 'partner_cbo', 'partner_hospital', 'partner_highered', 'partner_cultural', 'partner_nonprofit', 'partner_corporate', 'partner_financial', 'partner_other', 'addtl_info1', 'addtl_info2', 'start_time', 'end_time', 'se_services', 'ell_programs', 'school_accessibility_description', 'number_programs', 'priority01', 'priority02', 'priority03', 'priority04', 'priority05', 'priority06', 'priority07', 'priority08', 'priority09', 'priority10', 'Location 1', 'Community Board', 'Council District', 'Census Tract', 'BIN', 'BBL', 'NTA', 'DBN', 'lat', 'lon'], dtype='object')
class_size = data["class_size"]
class_size = class_size[class_size["GRADE "] == "09-12"]
class_size = class_size[class_size["PROGRAM TYPE"] == "GEN ED"]
class_size = class_size.groupby("DBN").agg(np.mean)
class_size.reset_index(inplace=True)
data["class_size"] = class_size
data["demographics"] = data["demographics"][data["demographics"]["schoolyear"] == 20112012]
data["graduation"] = data["graduation"][data["graduation"]["Cohort"] == "2006"]
data["graduation"] = data["graduation"][data["graduation"]["Demographic"] == "Total Cohort"]
cols = ['AP Test Takers ', 'Total Exams Taken', 'Number of Exams with scores 3 4 or 5']
for col in cols:
data["ap_2010"][col] = pd.to_numeric(data["ap_2010"][col], errors="coerce")
combined = data["sat_results"]
combined = combined.merge(data["ap_2010"], on="DBN", how="left")
combined = combined.merge(data["graduation"], on="DBN", how="left")
to_merge = ["class_size", "demographics", "survey", "hs_directory"]
for m in to_merge:
combined = combined.merge(data[m], on="DBN", how="inner")
combined = combined.fillna(combined.mean())
combined = combined.fillna(0)
for c in combined.columns:
print(c)
DBN SCHOOL NAME Num of SAT Test Takers SAT Critical Reading Avg. Score SAT Math Avg. Score SAT Writing Avg. Score sat_score SchoolName AP Test Takers Total Exams Taken Number of Exams with scores 3 4 or 5 Demographic School Name Cohort Total Cohort Total Grads - n Total Grads - % of cohort Total Regents - n Total Regents - % of cohort Total Regents - % of grads Advanced Regents - n Advanced Regents - % of cohort Advanced Regents - % of grads Regents w/o Advanced - n Regents w/o Advanced - % of cohort Regents w/o Advanced - % of grads Local - n Local - % of cohort Local - % of grads Still Enrolled - n Still Enrolled - % of cohort Dropped Out - n Dropped Out - % of cohort CSD NUMBER OF SECTIONS AVERAGE CLASS SIZE SIZE OF SMALLEST CLASS SIZE OF LARGEST CLASS SCHOOLWIDE PUPIL-TEACHER RATIO Name schoolyear fl_percent frl_percent total_enrollment prek k grade1 grade2 grade3 grade4 grade5 grade6 grade7 grade8 grade9 grade10 grade11 grade12 ell_num ell_percent sped_num sped_percent ctt_num selfcontained_num asian_num asian_per black_num black_per hispanic_num hispanic_per white_num white_per male_num male_per female_num female_per rr_s rr_t rr_p N_s N_t N_p saf_p_11 com_p_11 eng_p_11 aca_p_11 saf_t_11 com_t_11 eng_t_11 aca_t_11 saf_s_11 com_s_11 eng_s_11 aca_s_11 saf_tot_11 com_tot_11 eng_tot_11 aca_tot_11 dbn school_name borough building_code phone_number fax_number grade_span_min grade_span_max expgrade_span_min expgrade_span_max bus subway primary_address_line_1 city state_code postcode website total_students campus_name school_type overview_paragraph program_highlights language_classes advancedplacement_courses online_ap_courses online_language_courses extracurricular_activities psal_sports_boys psal_sports_girls psal_sports_coed school_sports partner_cbo partner_hospital partner_highered partner_cultural partner_nonprofit partner_corporate partner_financial partner_other addtl_info1 addtl_info2 start_time end_time se_services ell_programs school_accessibility_description number_programs priority01 priority02 priority03 priority04 priority05 priority06 priority07 priority08 priority09 priority10 Location 1 Community Board Council District Census Tract BIN BBL NTA lat lon
def get_first_two_chars(dbn):
return dbn[0:2]
combined["school_dist"] = combined["DBN"].apply(get_first_two_chars)
correlations = combined.corr()
correlations = correlations["sat_score"]
print(correlations)
SAT Critical Reading Avg. Score 0.986820 SAT Math Avg. Score 0.972643 SAT Writing Avg. Score 0.987771 sat_score 1.000000 AP Test Takers 0.412667 ... priority10 NaN Community Board -0.060919 Council District -0.076151 lat -0.121029 lon -0.132222 Name: sat_score, Length: 78, dtype: float64
# Remove DBN since it's a unique identifier, not a useful numerical value for correlation.
survey_fields.remove("DBN")
combined.corr()["sat_score"][survey_fields].plot.bar()
<AxesSubplot:>
combined.plot.scatter(x="saf_s_11", y="sat_score")
<AxesSubplot:xlabel='saf_s_11', ylabel='sat_score'>
it seems to me there is some positiv correlation
print(combined["borough"])
0 Manhattan 1 Manhattan 2 Manhattan 3 Manhattan 4 Manhattan ... 358 Brooklyn 359 Brooklyn 360 Brooklyn 361 Brooklyn 362 Brooklyn Name: borough, Length: 363, dtype: object
boros=combined.groupby("borough").agg(np.mean)["saf_s_11"]
print(boros)
borough Bronx 6.606577 Brooklyn 6.370755 Manhattan 6.831370 Queens 6.721875 Staten Island 6.530000 Name: saf_s_11, dtype: float64
Brooklyn hast the lowest safety score among students
cols=["white_per", "asian_per", "black_per", "hispanic_per"]
combined.corr()["sat_score"][cols].plot.bar()
<AxesSubplot:>
shools with a higher percentage of white and aisan race have a high sat_score, whereas scholls with a higher percetange of black and hispanic have a lower sat_score
combined.plot.scatter(x="hispanic_per", y="sat_score")
<AxesSubplot:xlabel='hispanic_per', ylabel='sat_score'>
majority of schools with the higher percentage of hispanic has a lower sat_score
hispanic_95=combined[combined["hispanic_per"]>95]
print(hispanic_95["SCHOOL NAME"])
44 MANHATTAN BRIDGES HIGH SCHOOL 82 WASHINGTON HEIGHTS EXPEDITIONARY LEARNING SCHOOL 89 GREGORIO LUPERON HIGH SCHOOL FOR SCIENCE AND M... 125 ACADEMY FOR LANGUAGE AND TECHNOLOGY 141 INTERNATIONAL SCHOOL FOR LIBERAL ARTS 176 PAN AMERICAN INTERNATIONAL HIGH SCHOOL AT MONROE 253 MULTICULTURAL HIGH SCHOOL 286 PAN AMERICAN INTERNATIONAL HIGH SCHOOL Name: SCHOOL NAME, dtype: object
print(combined[combined["hispanic_per"]>95]["SCHOOL NAME"])
44 MANHATTAN BRIDGES HIGH SCHOOL 82 WASHINGTON HEIGHTS EXPEDITIONARY LEARNING SCHOOL 89 GREGORIO LUPERON HIGH SCHOOL FOR SCIENCE AND M... 125 ACADEMY FOR LANGUAGE AND TECHNOLOGY 141 INTERNATIONAL SCHOOL FOR LIBERAL ARTS 176 PAN AMERICAN INTERNATIONAL HIGH SCHOOL AT MONROE 253 MULTICULTURAL HIGH SCHOOL 286 PAN AMERICAN INTERNATIONAL HIGH SCHOOL Name: SCHOOL NAME, dtype: object
print(combined[(combined["hispanic_per"]<10) & (combined["sat_score"]> 1800)]["SCHOOL NAME"])
37 STUYVESANT HIGH SCHOOL 151 BRONX HIGH SCHOOL OF SCIENCE 187 BROOKLYN TECHNICAL HIGH SCHOOL 327 QUEENS HIGH SCHOOL FOR THE SCIENCES AT YORK CO... 356 STATEN ISLAND TECHNICAL HIGH SCHOOL Name: SCHOOL NAME, dtype: object
male_female=["male_per", "female_per"]
combined.corr()["sat_score"][male_female].plot.bar()
<AxesSubplot:>
combined.plot.scatter("female_per","sat_score")
<AxesSubplot:xlabel='female_per', ylabel='sat_score'>
there is some positive correlation between female_per between 30 and 80 percentage female percentage but the higher the weaker this correlation
print(combined[(combined["female_per"]>60) & (combined["sat_score"]>1700)]["SCHOOL NAME"])
5 BARD HIGH SCHOOL EARLY COLLEGE 26 ELEANOR ROOSEVELT HIGH SCHOOL 60 BEACON HIGH SCHOOL 61 FIORELLO H. LAGUARDIA HIGH SCHOOL OF MUSIC & A... 302 TOWNSEND HARRIS HIGH SCHOOL Name: SCHOOL NAME, dtype: object
combined["ap_per"]=combined["AP Test Takers "]/combined["total_enrollment"]
print(combined["ap_per"])
0 0.254549 1 0.098985 2 0.031773 3 0.292696 4 0.158091 ... 358 0.077558 359 0.247510 360 0.190955 361 0.026616 362 0.073593 Name: ap_per, Length: 363, dtype: float64
combined.plot.scatter(x="ap_per", y="sat_score")
<AxesSubplot:xlabel='ap_per', ylabel='sat_score'>
combined.columns
Index(['DBN', 'SCHOOL NAME', 'Num of SAT Test Takers', 'SAT Critical Reading Avg. Score', 'SAT Math Avg. Score', 'SAT Writing Avg. Score', 'sat_score', 'SchoolName', 'AP Test Takers ', 'Total Exams Taken', ... 'Community Board', 'Council District', 'Census Tract', 'BIN', 'BBL', 'NTA', 'lat', 'lon', 'school_dist', 'ap_per'], dtype='object', length=166)
combined.shape
(363, 166)
for c in combined.columns:
print(c)
DBN SCHOOL NAME Num of SAT Test Takers SAT Critical Reading Avg. Score SAT Math Avg. Score SAT Writing Avg. Score sat_score SchoolName AP Test Takers Total Exams Taken Number of Exams with scores 3 4 or 5 Demographic School Name Cohort Total Cohort Total Grads - n Total Grads - % of cohort Total Regents - n Total Regents - % of cohort Total Regents - % of grads Advanced Regents - n Advanced Regents - % of cohort Advanced Regents - % of grads Regents w/o Advanced - n Regents w/o Advanced - % of cohort Regents w/o Advanced - % of grads Local - n Local - % of cohort Local - % of grads Still Enrolled - n Still Enrolled - % of cohort Dropped Out - n Dropped Out - % of cohort CSD NUMBER OF SECTIONS AVERAGE CLASS SIZE SIZE OF SMALLEST CLASS SIZE OF LARGEST CLASS SCHOOLWIDE PUPIL-TEACHER RATIO Name schoolyear fl_percent frl_percent total_enrollment prek k grade1 grade2 grade3 grade4 grade5 grade6 grade7 grade8 grade9 grade10 grade11 grade12 ell_num ell_percent sped_num sped_percent ctt_num selfcontained_num asian_num asian_per black_num black_per hispanic_num hispanic_per white_num white_per male_num male_per female_num female_per rr_s rr_t rr_p N_s N_t N_p saf_p_11 com_p_11 eng_p_11 aca_p_11 saf_t_11 com_t_11 eng_t_11 aca_t_11 saf_s_11 com_s_11 eng_s_11 aca_s_11 saf_tot_11 com_tot_11 eng_tot_11 aca_tot_11 dbn school_name borough building_code phone_number fax_number grade_span_min grade_span_max expgrade_span_min expgrade_span_max bus subway primary_address_line_1 city state_code postcode website total_students campus_name school_type overview_paragraph program_highlights language_classes advancedplacement_courses online_ap_courses online_language_courses extracurricular_activities psal_sports_boys psal_sports_girls psal_sports_coed school_sports partner_cbo partner_hospital partner_highered partner_cultural partner_nonprofit partner_corporate partner_financial partner_other addtl_info1 addtl_info2 start_time end_time se_services ell_programs school_accessibility_description number_programs priority01 priority02 priority03 priority04 priority05 priority06 priority07 priority08 priority09 priority10 Location 1 Community Board Council District Census Tract BIN BBL NTA lat lon school_dist ap_per
combined.plot.scatter(x="AVERAGE CLASS SIZE", y="sat_score")
<AxesSubplot:xlabel='AVERAGE CLASS SIZE', ylabel='sat_score'>
there seems to be a positive relationship between average class size and the sat_score
boros_best=combined.groupby("borough").agg(np.mean)["sat_score"]
print(boros_best)
borough Bronx 1157.598203 Brooklyn 1181.364461 Manhattan 1278.331410 Queens 1286.753032 Staten Island 1382.500000 Name: sat_score, dtype: float64
c=combined[combined["sat_score"]<1000]["SCHOOL NAME"]
print(c)
91 INTERNATIONAL COMMUNITY HIGH SCHOOL 125 ACADEMY FOR LANGUAGE AND TECHNOLOGY 126 BRONX INTERNATIONAL HIGH SCHOOL 139 KINGSBRIDGE INTERNATIONAL HIGH SCHOOL 141 INTERNATIONAL SCHOOL FOR LIBERAL ARTS 176 PAN AMERICAN INTERNATIONAL HIGH SCHOOL AT MONROE 179 HIGH SCHOOL OF WORLD CULTURES 188 BROOKLYN INTERNATIONAL HIGH SCHOOL 225 INTERNATIONAL HIGH SCHOOL AT PROSPECT HEIGHTS 237 IT TAKES A VILLAGE ACADEMY 253 MULTICULTURAL HIGH SCHOOL 286 PAN AMERICAN INTERNATIONAL HIGH SCHOOL Name: SCHOOL NAME, dtype: object
b=combined[combined["sat_score"]>1200]["SCHOOL NAME"]
print(b)
3 MARTA VALLE HIGH SCHOOL 4 NEW EXPLORATIONS INTO SCIENCE, TECHNOLOGY AND ... 5 BARD HIGH SCHOOL EARLY COLLEGE 10 PACE HIGH SCHOOL 11 URBAN ASSEMBLY SCHOOL OF DESIGN AND CONSTRUCTI... ... 353 TOTTENVILLE HIGH SCHOOL 354 SUSAN E. WAGNER HIGH SCHOOL 355 RALPH R. MCKEE CAREER AND TECHNICAL EDUCATION ... 356 STATEN ISLAND TECHNICAL HIGH SCHOOL 361 ALL CITY LEADERSHIP SECONDARY SCHOOL Name: SCHOOL NAME, Length: 172, dtype: object
a=combined[(combined["sat_score"]>1000) & (combined["sat_score"]<1200)]["SCHOOL NAME"]
print(a)
0 HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES 1 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 2 EAST SIDE COMMUNITY SCHOOL 6 47 THE AMERICAN SIGN LANGUAGE AND ENGLISH SECO... 7 FOOD AND FINANCE HIGH SCHOOL ... 357 ACADEMY FOR ENVIRONMENTAL LEADERSHIP 358 EBC HIGH SCHOOL FOR PUBLIC SERVICE–BUSHWICK 359 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 360 ACADEMY OF URBAN PLANNING 362 BUSHWICK LEADERS HIGH SCHOOL FOR ACADEMIC EXCE... Name: SCHOOL NAME, Length: 178, dtype: object
school={}
for s in c:
school[s]=1
for s in b:
school[s]=5
for s in a:
school[s]=3
print(school)
{'INTERNATIONAL COMMUNITY HIGH SCHOOL': 1, 'ACADEMY FOR LANGUAGE AND TECHNOLOGY': 1, 'BRONX INTERNATIONAL HIGH SCHOOL': 1, 'KINGSBRIDGE INTERNATIONAL HIGH SCHOOL': 1, 'INTERNATIONAL SCHOOL FOR LIBERAL ARTS': 1, 'PAN AMERICAN INTERNATIONAL HIGH SCHOOL AT MONROE': 1, 'HIGH SCHOOL OF WORLD CULTURES': 1, 'BROOKLYN INTERNATIONAL HIGH SCHOOL': 1, 'INTERNATIONAL HIGH SCHOOL AT PROSPECT HEIGHTS': 1, 'IT TAKES A VILLAGE ACADEMY': 1, 'MULTICULTURAL HIGH SCHOOL': 1, 'PAN AMERICAN INTERNATIONAL HIGH SCHOOL': 1, 'MARTA VALLE HIGH SCHOOL': 5, 'NEW EXPLORATIONS INTO SCIENCE, TECHNOLOGY AND MATH HIGH SCHOOL': 5, 'BARD HIGH SCHOOL EARLY COLLEGE': 5, 'PACE HIGH SCHOOL': 5, 'URBAN ASSEMBLY SCHOOL OF DESIGN AND CONSTRUCTION, THE': 5, 'NYC ISCHOOL': 5, 'MANHATTAN BUSINESS ACADEMY': 5, 'BUSINESS OF SPORTS SCHOOL': 5, 'THE HIGH SCHOOL FOR LANGUAGE AND DIPLOMACY': 5, 'HIGH SCHOOL FOR ENVIRONMENTAL STUDIES': 5, 'PROFESSIONAL PERFORMING ARTS HIGH SCHOOL': 5, 'BARUCH COLLEGE CAMPUS HIGH SCHOOL': 5, 'N.Y.C. LAB SCHOOL FOR COLLABORATIVE STUDIES': 5, 'N.Y.C. MUSEUM SCHOOL': 5, 'ELEANOR ROOSEVELT HIGH SCHOOL': 5, 'MILLENNIUM HIGH SCHOOL': 5, 'HIGH SCHOOL FOR HEALTH PROFESSIONS AND HUMAN SERVICES': 5, 'LEADERSHIP AND PUBLIC SERVICE HIGH SCHOOL': 5, 'MANHATTAN ACADEMY FOR ARTS & LANGUAGE': 5, 'HUDSON HIGH SCHOOL OF LEARNING TECHNOLOGIES': 5, 'INTERNATIONAL HIGH SCHOOL AT UNION SQUARE': 5, 'MANHATTAN VILLAGE ACADEMY': 5, 'MANHATTAN INTERNATIONAL HIGH SCHOOL': 5, 'STUYVESANT HIGH SCHOOL': 5, 'HIGH SCHOOL OF ECONOMICS AND FINANCE': 5, 'TALENT UNLIMITED HIGH SCHOOL': 5, 'MURRY BERGTRAUM HIGH SCHOOL FOR BUSINESS CAREERS': 5, 'JACQUELINE KENNEDY ONASSIS HIGH SCHOOL': 5, 'REPERTORY COMPANY HIGH SCHOOL FOR THEATRE ARTS': 5, 'HIGH SCHOOL FOR DUAL LANGUAGE AND ASIAN STUDIES': 5, 'THE URBAN ASSEMBLY NEW YORK HARBOR SCHOOL': 5, 'THE HIGH SCHOOL OF FASHION INDUSTRIES': 5, 'HUMANITIES PREPARATORY ACADEMY': 5, 'CHELSEA CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 5, 'ART AND DESIGN HIGH SCHOOL': 5, 'LIFE SCIENCES SECONDARY SCHOOL': 5, 'THE URBAN ASSEMBLY SCHOOL FOR GREEN CAREERS': 5, 'THE GLOBAL LEARNING COLLABORATIVE': 5, 'FRANK MCCOURT HIGH SCHOOL': 5, 'BEACON HIGH SCHOOL': 5, 'FIORELLO H. LAGUARDIA HIGH SCHOOL OF MUSIC & ART AND PERFORMING ARTS': 5, 'HIGH SCHOOL OF ARTS AND TECHNOLOGY': 5, 'MANHATTAN / HUNTER SCIENCE HIGH SCHOOL': 5, 'MANHATTAN CENTER FOR SCIENCE AND MATHEMATICS': 5, 'CENTRAL PARK EAST HIGH SCHOOL': 5, "YOUNG WOMEN'S LEADERSHIP SCHOOL": 5, 'MOTT HALL HIGH SCHOOL': 5, 'COLUMBIA SECONDARY SCHOOL FOR MATH, SCIENCE, AND ENGINEERING': 5, 'FREDERICK DOUGLASS ACADEMY': 5, 'THURGOOD MARSHALL ACADEMY FOR LEARNING AND SOCIAL CHANGE': 5, 'HIGH SCHOOL FOR MATHEMATICS, SCIENCE AND ENGINEERING AT CITY COLLEGE': 5, 'CITY COLLEGE ACADEMY OF THE ARTS': 5, 'HIGH SCHOOL FOR EXCELLENCE AND INNOVATION': 5, 'HIGH SCHOOL FOR HEALTH CAREERS AND SCIENCES': 5, 'A. PHILIP RANDOLPH CAMPUS HIGH SCHOOL': 5, 'UNIVERSITY HEIGHTS SECONDARY SCHOOL': 5, 'HOSTOSsLINCOLN ACADEMY OF SCIENCE': 5, 'BRONX ACADEMY OF LETTERS': 5, 'BRONX STUDIO SCHOOL FOR WRITERS AND ARTISTS': 5, 'ANTONIA PANTOJA PREPARATORY ACADEMY, A COLLEGE BOARD SCHOOL': 5, 'HERBERT H. LEHMAN HIGH SCHOOL': 5, 'BRONX BRIDGES HIGH SCHOOL': 5, 'URBAN ASSEMBLY SCHOOL FOR APPLIED MATH AND SCIENCE, THE': 5, 'BRONX CENTER FOR SCIENCE AND MATHEMATICS': 5, 'BRONX EARLY COLLEGE ACADEMY FOR TEACHING & LEARNING': 5, 'BRONX HIGH SCHOOL FOR MEDICAL SCIENCE': 5, 'BRONX SCHOOL FOR LAW, GOVERNMENT AND JUSTICE': 5, 'RIVERDALE / KINGSBRIDGE ACADEMY (MIDDLE SCHOOL / HIGH SCHOOL 141)': 5, 'BELMONT PREPARATORY HIGH SCHOOL': 5, 'DEWITT CLINTON HIGH SCHOOL': 5, 'CELIA CRUZ BRONX HIGH SCHOOL OF MUSIC, THE': 5, 'BRONX HIGH SCHOOL OF SCIENCE': 5, 'MARBLE HILL HIGH SCHOOL FOR INTERNATIONAL STUDIES': 5, 'HIGH SCHOOL OF AMERICAN STUDIES AT LEHMAN COLLEGE': 5, 'COLLEGIATE INSTITUTE FOR MATH AND SCIENCE': 5, 'BRONX HIGH SCHOOL FOR THE VISUAL ARTS': 5, 'PELHAM PREPARATORY ACADEMY': 5, 'BRONX LATIN': 5, 'THE CINEMA SCHOOL': 5, 'BRONX CAREER AND COLLEGE PREPARATORY HIGH SCHOOL': 5, 'SCIENCE SKILLS CENTER HIGH SCHOOL FOR SCIENCE, TECHNOLOGY AND THE CREATIVE ARTS': 5, 'BROOKLYN TECHNICAL HIGH SCHOOL': 5, 'THE URBAN ASSEMBLY SCHOOL FOR LAW AND JUSTICE': 5, 'URBAN ASSEMBLY INSTITUTE OF MATH AND SCIENCE FOR YOUNG WOMEN': 5, 'BEDFORD ACADEMY HIGH SCHOOL': 5, 'BENJAMIN BANNEKER ACADEMY': 5, 'CITY POLYTECHNIC HIGH SCHOOL OF ENGINEERING, ARCHITECTURE, AND TECHNOLOGY': 5, 'FOUNDATIONS ACADEMY': 5, 'BROOKLYN LATIN SCHOOL, THE': 5, 'SCHOOL FOR LEGAL STUDIES': 5, 'THE HIGH SCHOOL FOR ENTERPRISE, BUSINESS AND TECHNOLOGY': 5, 'SECONDARY SCHOOL FOR LAW': 5, 'SCHOOL FOR INTERNATIONAL STUDIES': 5, 'BROOKLYN HIGH SCHOOL OF THE ARTS': 5, 'SUNSET PARK HIGH SCHOOL': 5, 'THE BROOKLYN ACADEMY OF GLOBAL FINANCE': 5, 'SCIENCE, TECHNOLOGY AND RESEARCH EARLY COLLEGE HIGH SCHOOL AT ERASMUS': 5, 'HIGH SCHOOL FOR PUBLIC SERVICE: HEROES OF TOMORROW': 5, 'MEDGAR EVERS COLLEGE PREPARATORY SCHOOL': 5, 'CLARA BARTON HIGH SCHOOL': 5, 'EAST NEW YORK FAMILY ACADEMY': 5, 'NEW UTRECHT HIGH SCHOOL': 5, 'HIGH SCHOOL OF TELECOMMUNICATION ARTS AND TECHNOLOGY': 5, 'FORT HAMILTON HIGH SCHOOL': 5, 'FRANKLIN DELANO ROOSEVELT HIGH SCHOOL': 5, 'THE URBAN ASSEMBLY SCHOOL FOR CRIMINAL JUSTICE': 5, 'RACHEL CARSON HIGH SCHOOL FOR COASTAL STUDIES': 5, 'ABRAHAM LINCOLN HIGH SCHOOL': 5, 'KINGSBOROUGH EARLY COLLEGE SCHOOL': 5, 'EDWARD R. MURROW HIGH SCHOOL': 5, 'JOHN DEWEY HIGH SCHOOL': 5, 'BROOKLYN STUDIO SECONDARY SCHOOL': 5, 'MIDWOOD HIGH SCHOOL': 5, 'JAMES MADISON HIGH SCHOOL': 5, 'LEON M. GOLDSTEIN HIGH SCHOOL FOR THE SCIENCES': 5, 'BROOKLYN COLLEGE ACADEMY': 5, 'ACADEMY OF FINANCE AND ENTERPRISE': 5, 'HIGH SCHOOL OF APPLIED COMMUNICATION': 5, 'CIVIC LEADERSHIP ACADEMY': 5, 'BARD HIGH SCHOOL EARLY COLLEGE II': 5, 'NEWTOWN HIGH SCHOOL': 5, 'GROVER CLEVELAND HIGH SCHOOL': 5, 'ROBERT F. WAGNER, JR. SECONDARY SCHOOL FOR ARTS AND TECHNOLOGY': 5, 'QUEENS VOCATIONAL AND TECHNICAL HIGH SCHOOL': 5, 'AVIATION CAREER & TECHNICAL EDUCATION HIGH SCHOOL': 5, 'QUEENS SCHOOL OF INQUIRY, THE': 5, 'EASTsWEST SCHOOL OF INTERNATIONAL STUDIES': 5, 'WORLD JOURNALISM PREPARATORY: A COLLEGE BOARD SCHOOL': 5, 'JOHN BOWNE HIGH SCHOOL': 5, 'FLUSHING HIGH SCHOOL': 5, 'TOWNSEND HARRIS HIGH SCHOOL': 5, 'ROBERT F. KENNEDY COMMUNITY HIGH SCHOOL': 5, 'BENJAMIN N. CARDOZO HIGH SCHOOL': 5, 'FRANCIS LEWIS HIGH SCHOOL': 5, 'BAYSIDE HIGH SCHOOL': 5, 'QUEENS HIGH SCHOOL OF TEACHING, LIBERAL ARTS AND THE SCIENCES': 5, 'CHANNEL VIEW SCHOOL FOR RESEARCH': 5, 'ROBERT H. GODDARD HIGH SCHOOL OF COMMUNICATION ARTS AND TECHNOLOGY': 5, "SCHOLARS' ACADEMY": 5, 'HIGH SCHOOL FOR CONSTRUCTION TRADES, ENGINEERING AND ARCHITECTURE': 5, 'YORK EARLY COLLEGE ACADEMY': 5, 'QUEENS COLLEGIATE: A COLLEGE BOARD SCHOOL': 5, 'HILLSIDE ARTS & LETTERS ACADEMY': 5, 'FOREST HILLS HIGH SCHOOL': 5, 'THOMAS A. EDISON CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 5, 'QUEENS GATEWAY TO HEALTH SCIENCES SECONDARY SCHOOL': 5, 'QUEENS METROPOLITAN HIGH SCHOOL': 5, 'QUEENS HIGH SCHOOL FOR THE SCIENCES AT YORK COLLEGE': 5, "YOUNG WOMEN'S LEADERSHIP SCHOOL, QUEENS": 5, 'EXCELSIOR PREPARATORY HIGH SCHOOL': 5, 'GEORGE WASHINGTON CARVER HIGH SCHOOL FOR THE SCIENCES': 5, 'CAMBRIA HEIGHTS ACADEMY': 5, 'MATHEMATICS, SCIENCE RESEARCH AND TECHNOLOGY MAGNET HIGH SCHOOL': 5, "YOUNG WOMEN'S LEADERSHIP SCHOOL, ASTORIA": 5, 'ACADEMY FOR CAREERS IN TELEVISION AND FILM': 5, 'WILLIAM CULLEN BRYANT HIGH SCHOOL': 5, 'LONG ISLAND CITY HIGH SCHOOL': 5, 'FRANK SINATRA SCHOOL OF THE ARTS HIGH SCHOOL': 5, 'INFORMATION TECHNOLOGY HIGH SCHOOL': 5, 'ACADEMY OF AMERICAN STUDIES': 5, 'BACCALAUREATE SCHOOL FOR GLOBAL EDUCATION': 5, 'CSI HIGH SCHOOL FOR INTERNATIONAL STUDIES': 5, 'THE MICHAEL J. PETRIDES SCHOOL': 5, 'NEW DORP HIGH SCHOOL': 5, 'PORT RICHMOND HIGH SCHOOL': 5, 'CURTIS HIGH SCHOOL': 5, 'TOTTENVILLE HIGH SCHOOL': 5, 'SUSAN E. WAGNER HIGH SCHOOL': 5, 'RALPH R. MCKEE CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 5, 'STATEN ISLAND TECHNICAL HIGH SCHOOL': 5, 'ALL CITY LEADERSHIP SECONDARY SCHOOL': 5, 'HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES': 3, 'UNIVERSITY NEIGHBORHOOD HIGH SCHOOL': 3, 'EAST SIDE COMMUNITY SCHOOL': 3, '47 THE AMERICAN SIGN LANGUAGE AND ENGLISH SECONDARY SCHOOL': 3, 'FOOD AND FINANCE HIGH SCHOOL': 3, 'ESSEX STREET ACADEMY': 3, 'HIGH SCHOOL OF HOSPITALITY MANAGEMENT': 3, 'FACING HISTORY SCHOOL, THE': 3, 'URBAN ASSEMBLY ACADEMY OF GOVERNMENT AND LAW, THE': 3, 'URBAN ASSEMBLY SCHOOL OF BUSINESS FOR YOUNG WOMEN, THE': 3, 'GRAMERCY ARTS HIGH SCHOOL': 3, 'LANDMARK HIGH SCHOOL': 3, 'VANGUARD HIGH SCHOOL': 3, 'UNITY CENTER FOR URBAN TECHNOLOGIES': 3, 'MANHATTAN BRIDGES HIGH SCHOOL': 3, 'NEW DESIGN HIGH SCHOOL': 3, 'RICHARD R. GREEN HIGH SCHOOL OF TEACHING': 3, 'HIGH SCHOOL FOR ARTS, IMAGINATION AND INQUIRY': 3, 'URBAN ASSEMBLY SCHOOL FOR MEDIA STUDIES, THE': 3, 'WADLEIGH SECONDARY SCHOOL FOR THE PERFORMING & VISUAL ARTS': 3, 'HIGH SCHOOL FOR LAW, ADVOCACY AND COMMUNITY JUSTICE': 3, 'FREDERICK DOUGLASS ACADEMY II SECONDARY SCHOOL': 3, 'COALITION SCHOOL FOR SOCIAL CHANGE': 3, 'PARK EAST HIGH SCHOOL': 3, 'HERITAGE SCHOOL, THE': 3, 'ACADEMY FOR SOCIAL ACTION: A COLLEGE BOARD SCHOOL': 3, 'URBAN ASSEMBLY SCHOOL FOR THE PERFORMING ARTS': 3, 'COMMUNITY HEALTH ACADEMY OF THE HEIGHTS': 3, 'WASHINGTON HEIGHTS EXPEDITIONARY LEARNING SCHOOL': 3, 'HIGH SCHOOL FOR INTERNATIONAL BUSINESS AND FINANCE': 3, 'HIGH SCHOOL FOR MEDIA AND COMMUNICATIONS': 3, 'HIGH SCHOOL FOR LAW AND PUBLIC SERVICE': 3, 'GREGORIO LUPERON HIGH SCHOOL FOR SCIENCE AND MATHEMATICS': 3, 'SOUTH BRONX PREPARATORY: A COLLEGE BOARD SCHOOL': 3, 'COMMUNITY SCHOOL FOR SOCIAL JUSTICE': 3, 'MOTT HAVEN VILLAGE PREPARATORY HIGH SCHOOL': 3, 'FOREIGN LANGUAGE ACADEMY OF GLOBAL STUDIES': 3, 'BRONX LEADERSHIP ACADEMY II HIGH SCHOOL': 3, 'NEW EXPLORERS HIGH SCHOOL': 3, 'URBAN ASSEMBLY SCHOOL FOR CAREERS IN SPORTS': 3, 'ALFRED E. SMITH CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 3, 'HEALTH OPPORTUNITIES HIGH SCHOOL': 3, "WOMEN'S ACADEMY OF EXCELLENCE": 3, 'RENAISSANCE HIGH SCHOOL FOR MUSICAL THEATER & TECHNOLOGY': 3, 'PABLO NERUDA ACADEMY FOR ARCHITECTURE AND WORLD STUDIES': 3, 'MILLENNIUM ART ACADEMY': 3, 'HOLCOMBE L. RUCKER SCHOOL OF COMMUNITY RESEARCH': 3, 'FELISA RINCON DE GAUTIER INSTITUTE FOR LAW AND PUBLIC POLICY, THE': 3, 'BANANA KELLY HIGH SCHOOL': 3, 'BRONX EXPEDITIONARY LEARNING HIGH SCHOOL': 3, 'EAGLE ACADEMY FOR YOUNG MEN': 3, 'EXIMIUS COLLEGE PREPARATORY ACADEMY: A COLLEGE BOARD SCHOOL': 3, 'MOTT HALL BRONX HIGH SCHOOL': 3, 'VALIDUS PREPARATORY ACADEMY: AN EXPEDITIONARY LEARNING SCHOOL': 3, 'LEADERSHIP INSTITUTE': 3, 'MORRIS ACADEMY FOR COLLABORATIVE STUDIES': 3, 'DREAMYARD PREPARATORY SCHOOL': 3, 'SCHOOL FOR EXCELLENCE': 3, 'BRONX HIGH SCHOOL OF BUSINESS': 3, 'FREDERICK DOUGLASS ACADEMY III SECONDARY SCHOOL': 3, 'BRONX LEADERSHIP ACADEMY HIGH SCHOOL': 3, 'HIGH SCHOOL FOR VIOLIN AND DANCE': 3, 'BRONX ENGINEERING AND TECHNOLOGY ACADEMY': 3, 'THEATRE ARTS PRODUCTION COMPANY SCHOOL': 3, 'THE MARIE CURIE SCHOOL FOR MEDICINE, NURSING, AND HEALTH PROFESSIONS': 3, 'WEST BRONX ACADEMY FOR THE FUTURE': 3, 'BRONX SCHOOL OF LAW AND FINANCE': 3, 'INsTECH ACADEMY (M.S. / HIGH SCHOOL 368)': 3, 'KNOWLEDGE AND POWER PREPARATORY ACADEMY INTERNATIONAL HIGH SCHOOL (KAPPA)': 3, 'HIGH SCHOOL FOR TEACHING AND THE PROFESSIONS': 3, 'FORDHAM HIGH SCHOOL FOR THE ARTS': 3, 'FORDHAM LEADERSHIP ACADEMY FOR BUSINESS AND TECHNOLOGY': 3, 'BRONX HIGH SCHOOL FOR LAW AND COMMUNITY SERVICE': 3, 'BRONX THEATRE HIGH SCHOOL': 3, 'DISCOVERY HIGH SCHOOL': 3, 'BRONX HEALTH SCIENCES HIGH SCHOOL': 3, 'BRONX HIGH SCHOOL FOR WRITING AND COMMUNICATION ARTS': 3, 'BRONX LAB SCHOOL': 3, 'ACADEMY FOR SCHOLARSHIP AND ENTREPRENEURSHIP: A COLLEGE BOARD SCHOOL': 3, 'HIGH SCHOOL OF COMPUTERS AND TECHNOLOGY': 3, 'BRONX ACADEMY OF HEALTH CAREERS': 3, 'ASTOR COLLEGIATE ACADEMY': 3, 'HARRY S TRUMAN HIGH SCHOOL': 3, 'NEW WORLD HIGH SCHOOL': 3, 'THE BRONXWOOD PREPARATORY ACADEMY': 3, 'HIGH SCHOOL FOR CONTEMPORARY ARTS': 3, 'BRONX AEROSPACE HIGH SCHOOL': 3, 'METROPOLITAN HIGH SCHOOL, THE': 3, 'EXPLORATIONS ACADEMY': 3, 'EAST BRONX ACADEMY FOR THE FUTURE': 3, 'PEACE AND DIVERSITY ACADEMY': 3, 'FANNIE LOU HAMER FREEDOM HIGH SCHOOL': 3, 'WINGS ACADEMY': 3, 'MONROE ACADEMY FOR VISUAL ARTS & DESIGN': 3, 'DR. SUSAN S. MCKINNEY SECONDARY SCHOOL OF THE ARTS': 3, 'URBAN ASSEMBLY HIGH SCHOOL OF MUSIC AND ART': 3, 'BROOKLYN COMMUNITY HIGH SCHOOL OF COMMUNICATION, ARTS AND MEDIA': 3, 'ACORN COMMUNITY HIGH SCHOOL': 3, 'GEORGE WESTINGHOUSE CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 3, 'JUAN MOREL CAMPOS SECONDARY SCHOOL': 3, 'GREEN SCHOOL: AN ACADEMY FOR ENVIRONMENTAL CAREERS': 3, 'PROGRESS HIGH SCHOOL FOR PROFESSIONAL CAREERS': 3, 'BROOKLYN PREPARATORY HIGH SCHOOL': 3, 'WILLIAMSBURG HIGH SCHOOL FOR ARCHITECTURE AND DESIGN': 3, 'WILLIAMSBURG PREPARATORY SCHOOL': 3, 'LYONS COMMUNITY SCHOOL': 3, 'AUTOMOTIVE HIGH SCHOOL': 3, 'FRANCES PERKINS ACADEMY': 3, 'EL PUENTE ACADEMY FOR PEACE AND JUSTICE': 3, 'BROOKLYN SCHOOL FOR GLOBAL STUDIES': 3, 'BROOKLYN SECONDARY SCHOOL FOR COLLABORATIVE STUDIES': 3, 'SECONDARY SCHOOL FOR JOURNALISM': 3, 'PARK SLOPE COLLEGIATE': 3, 'COBBLE HILL SCHOOL OF AMERICAN STUDIES': 3, 'FREDERICK DOUGLASS ACADEMY IV SECONDARY SCHOOL': 3, 'BOYS AND GIRLS HIGH SCHOOL': 3, 'BROOKLYN HIGH SCHOOL FOR LAW AND TECHNOLOGY': 3, 'ACADEMY FOR COLLEGE PREPARATION AND CAREER EXPLORATION: A COLLEGE BOARD SCHOOL': 3, 'ACADEMY OF HOSPITALITY AND TOURISM': 3, 'THE HIGH SCHOOL FOR GLOBAL CITIZENSHIP': 3, 'SCHOOL FOR HUMAN RIGHTS, THE': 3, 'SCHOOL FOR DEMOCRACY AND LEADERSHIP': 3, 'HIGH SCHOOL FOR YOUTH AND COMMUNITY DEVELOPMENT AT ERASMUS': 3, 'HIGH SCHOOL FOR SERVICE & LEARNING AT ERASMUS': 3, 'BROOKLYN ACADEMY OF SCIENCE AND THE ENVIRONMENT': 3, 'BROOKLYN SCHOOL FOR MUSIC & THEATRE': 3, 'BROOKLYN GENERATION SCHOOL': 3, 'BROOKLYN THEATRE ARTS HIGH SCHOOL': 3, 'KURT HAHN EXPEDITIONARY LEARNING SCHOOL': 3, 'VICTORY COLLEGIATE HIGH SCHOOL': 3, 'ARTS & MEDIA PREPARATORY ACADEMY': 3, 'HIGH SCHOOL FOR INNOVATION IN ADVERTISING AND MEDIA': 3, 'CULTURAL ACADEMY FOR THE ARTS AND SCIENCES': 3, 'HIGH SCHOOL FOR MEDICAL PROFESSIONS': 3, 'ACADEMY FOR CONSERVATION AND THE ENVIRONMENT': 3, 'URBAN ACTION ACADEMY': 3, 'FDNY HIGH SCHOOL FOR FIRE AND LIFE SAFETY': 3, 'HIGH SCHOOL FOR CIVIL RIGHTS': 3, 'PERFORMING ARTS AND TECHNOLOGY HIGH SCHOOL': 3, 'WORLD ACADEMY FOR TOTAL COMMUNITY HEALTH HIGH SCHOOL': 3, 'TRANSIT TECH CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 3, 'ACADEMY OF INNOVATIVE TECHNOLOGY': 3, 'BROOKLYN LAB SCHOOL': 3, 'CYPRESS HILLS COLLEGIATE PREPARATORY SCHOOL': 3, 'W. H. MAXWELL CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 3, 'THE SCHOOL FOR CLASSICS: AN ACADEMY OF THINKERS, WRITERS AND PERFORMERS': 3, 'INTERNATIONAL HIGH SCHOOL AT LAFAYETTE': 3, 'HIGH SCHOOL OF SPORTS MANAGEMENT': 3, 'LIFE ACADEMY HIGH SCHOOL FOR FILM AND MUSIC': 3, 'EXPEDITIONARY LEARNING SCHOOL FOR COMMUNITY LEADERS': 3, 'WILLIAM E. GRADY CAREER AND TECHNICAL EDUCATION HIGH SCHOOL': 3, 'BROOKLYN COLLEGIATE: A COLLEGE BOARD SCHOOL': 3, 'FREDERICK DOUGLASS ACADEMY VII HIGH SCHOOL': 3, 'TEACHERS PREPARATORY HIGH SCHOOL': 3, 'MIDDLE COLLEGE HIGH SCHOOL AT LAGUARDIA COMMUNITY COLLEGE': 3, 'INTERNATIONAL HIGH SCHOOL AT LAGUARDIA COMMUNITY COLLEGE': 3, 'HIGH SCHOOL FOR ARTS AND BUSINESS': 3, 'FLUSHING INTERNATIONAL HIGH SCHOOL': 3, 'MARTIN VAN BUREN HIGH SCHOOL': 3, 'FREDERICK DOUGLASS ACADEMY VI HIGH SCHOOL': 3, 'QUEENS HIGH SCHOOL FOR INFORMATION, RESEARCH, AND TECHNOLOGY': 3, 'ACADEMY OF MEDICAL TECHNOLOGY: A COLLEGE BOARD SCHOOL': 3, 'AUGUST MARTIN HIGH SCHOOL': 3, 'RICHMOND HILL HIGH SCHOOL': 3, 'JOHN ADAMS HIGH SCHOOL': 3, 'HILLCREST HIGH SCHOOL': 3, 'HIGH SCHOOL FOR LAW ENFORCEMENT AND PUBLIC SAFETY': 3, 'QUEENS PREPARATORY ACADEMY': 3, 'PATHWAYS COLLEGE PREPARATORY SCHOOL: A COLLEGE BOARD SCHOOL': 3, 'PREPARATORY ACADEMY FOR WRITERS: A COLLEGE BOARD SCHOOL': 3, 'HUMANITIES & ARTS MAGNET HIGH SCHOOL': 3, 'NEWCOMERS HIGH SCHOOL': 3, 'GAYNOR MCCOWN EXPEDITIONARY LEARNING SCHOOL': 3, 'ACADEMY FOR ENVIRONMENTAL LEADERSHIP': 3, 'EBC HIGH SCHOOL FOR PUBLIC SERVICE–BUSHWICK': 3, 'BUSHWICK SCHOOL FOR SOCIAL JUSTICE': 3, 'ACADEMY OF URBAN PLANNING': 3, 'BUSHWICK LEADERS HIGH SCHOOL FOR ACADEMIC EXCELLENCE': 3}
combined1=pd.DataFrame.from_dict(school, orient="index", columns=["school_score"])
print(combined1)
school_score INTERNATIONAL COMMUNITY HIGH SCHOOL 1 ACADEMY FOR LANGUAGE AND TECHNOLOGY 1 BRONX INTERNATIONAL HIGH SCHOOL 1 KINGSBRIDGE INTERNATIONAL HIGH SCHOOL 1 INTERNATIONAL SCHOOL FOR LIBERAL ARTS 1 ... ... ACADEMY FOR ENVIRONMENTAL LEADERSHIP 3 EBC HIGH SCHOOL FOR PUBLIC SERVICE–BUSHWICK 3 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 3 ACADEMY OF URBAN PLANNING 3 BUSHWICK LEADERS HIGH SCHOOL FOR ACADEMIC EXCEL... 3 [361 rows x 1 columns]
combined1.reset_index(inplace = True)
combined1.rename(columns={"index":"SCHOOL NAME","school_score":"SCHOOL SCORE"}, inplace=True)
combined2=pd.merge(combined, combined1, how="left", on="SCHOOL NAME")
combined1["SCHOOL NAME"]
0 INTERNATIONAL COMMUNITY HIGH SCHOOL 1 ACADEMY FOR LANGUAGE AND TECHNOLOGY 2 BRONX INTERNATIONAL HIGH SCHOOL 3 KINGSBRIDGE INTERNATIONAL HIGH SCHOOL 4 INTERNATIONAL SCHOOL FOR LIBERAL ARTS ... 356 ACADEMY FOR ENVIRONMENTAL LEADERSHIP 357 EBC HIGH SCHOOL FOR PUBLIC SERVICE–BUSHWICK 358 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 359 ACADEMY OF URBAN PLANNING 360 BUSHWICK LEADERS HIGH SCHOOL FOR ACADEMIC EXCE... Name: SCHOOL NAME, Length: 361, dtype: object
print(combined2)
DBN SCHOOL NAME \ 0 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES 1 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 2 01M450 EAST SIDE COMMUNITY SCHOOL 3 01M509 MARTA VALLE HIGH SCHOOL 4 01M539 NEW EXPLORATIONS INTO SCIENCE, TECHNOLOGY AND ... .. ... ... 358 32K545 EBC HIGH SCHOOL FOR PUBLIC SERVICE–BUSHWICK 359 32K549 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 360 32K552 ACADEMY OF URBAN PLANNING 361 32K554 ALL CITY LEADERSHIP SECONDARY SCHOOL 362 32K556 BUSHWICK LEADERS HIGH SCHOOL FOR ACADEMIC EXCE... Num of SAT Test Takers SAT Critical Reading Avg. Score \ 0 29 355.0 1 91 383.0 2 70 377.0 3 44 390.0 4 159 522.0 .. ... ... 358 88 384.0 359 75 348.0 360 67 342.0 361 39 428.0 362 23 347.0 SAT Math Avg. Score SAT Writing Avg. Score sat_score \ 0 404.0 363.0 1122.0 1 423.0 366.0 1172.0 2 402.0 370.0 1149.0 3 433.0 384.0 1207.0 4 574.0 525.0 1621.0 .. ... ... ... 358 409.0 361.0 1154.0 359 353.0 334.0 1035.0 360 364.0 354.0 1060.0 361 465.0 422.0 1315.0 362 358.0 350.0 1055.0 SchoolName AP Test Takers \ 0 0 107.419512 1 UNIVERSITY NEIGHBORHOOD H.S. 39.000000 2 EAST SIDE COMMUNITY HS 19.000000 3 0 107.419512 4 NEW EXPLORATIONS SCI,TECH,MATH 255.000000 .. ... ... 358 EBC-HS FOR PUB SERVICE (BUSH) 47.000000 359 0 107.419512 360 Academy of Urban Planning 76.000000 361 All City Leadership Secondary School 7.000000 362 Bushwick Leaders High School for Academic Exce... 34.000000 Total Exams Taken ... Council District Census Tract BIN \ 0 131.425 ... 1.0 201 1,003,223 1 49.000 ... 1.0 202 1,003,214 2 21.000 ... 2.0 34 1,005,974 3 131.425 ... 1.0 3,001 1,004,323 4 377.000 ... 2.0 2,201 1,004,070 .. ... ... ... ... ... 358 64.000 ... 34.0 393 3,073,462 359 131.425 ... 37.0 435 3,076,908 360 100.000 ... 37.0 435 3,076,908 361 8.000 ... 37.0 433 3,413,715 362 35.000 ... 34.0 393 3,073,761 BBL NTA \ 0 1,002,690,041 Lower East Side ... 1 1,002,590,044 Lower East Side ... 2 1,004,390,017 East Village ... 3 1,003,540,080 Chinatown ... 4 1,003,350,001 Lower East Side ... .. ... ... 358 3,032,300,027 Bushwick South ... 359 3,033,650,098 Bushwick North ... 360 3,033,650,098 Bushwick North ... 361 3,033,440,136 Bushwick North ... 362 3,032,420,001 Bushwick South ... lat lon school_dist ap_per SCHOOL SCORE 0 40.713764 -73.985260 01 0.254549 3.0 1 40.712332 -73.984797 01 0.098985 3.0 2 40.729783 -73.983041 01 0.031773 3.0 3 40.720569 -73.985673 01 0.292696 5.0 4 40.718725 -73.979426 01 0.158091 5.0 .. ... ... ... ... ... 358 40.694482 -73.929154 32 0.077558 3.0 359 40.696962 -73.910816 32 0.247510 3.0 360 40.696962 -73.910816 32 0.190955 3.0 361 40.697408 -73.913153 32 0.026616 5.0 362 40.694996 -73.927986 32 0.073593 3.0 [363 rows x 167 columns]
combined=combined2
print(combined)
DBN SCHOOL NAME \ 0 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES 1 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 2 01M450 EAST SIDE COMMUNITY SCHOOL 3 01M509 MARTA VALLE HIGH SCHOOL 4 01M539 NEW EXPLORATIONS INTO SCIENCE, TECHNOLOGY AND ... .. ... ... 358 32K545 EBC HIGH SCHOOL FOR PUBLIC SERVICE–BUSHWICK 359 32K549 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 360 32K552 ACADEMY OF URBAN PLANNING 361 32K554 ALL CITY LEADERSHIP SECONDARY SCHOOL 362 32K556 BUSHWICK LEADERS HIGH SCHOOL FOR ACADEMIC EXCE... Num of SAT Test Takers SAT Critical Reading Avg. Score \ 0 29 355.0 1 91 383.0 2 70 377.0 3 44 390.0 4 159 522.0 .. ... ... 358 88 384.0 359 75 348.0 360 67 342.0 361 39 428.0 362 23 347.0 SAT Math Avg. Score SAT Writing Avg. Score sat_score \ 0 404.0 363.0 1122.0 1 423.0 366.0 1172.0 2 402.0 370.0 1149.0 3 433.0 384.0 1207.0 4 574.0 525.0 1621.0 .. ... ... ... 358 409.0 361.0 1154.0 359 353.0 334.0 1035.0 360 364.0 354.0 1060.0 361 465.0 422.0 1315.0 362 358.0 350.0 1055.0 SchoolName AP Test Takers \ 0 0 107.419512 1 UNIVERSITY NEIGHBORHOOD H.S. 39.000000 2 EAST SIDE COMMUNITY HS 19.000000 3 0 107.419512 4 NEW EXPLORATIONS SCI,TECH,MATH 255.000000 .. ... ... 358 EBC-HS FOR PUB SERVICE (BUSH) 47.000000 359 0 107.419512 360 Academy of Urban Planning 76.000000 361 All City Leadership Secondary School 7.000000 362 Bushwick Leaders High School for Academic Exce... 34.000000 Total Exams Taken ... Council District Census Tract BIN \ 0 131.425 ... 1.0 201 1,003,223 1 49.000 ... 1.0 202 1,003,214 2 21.000 ... 2.0 34 1,005,974 3 131.425 ... 1.0 3,001 1,004,323 4 377.000 ... 2.0 2,201 1,004,070 .. ... ... ... ... ... 358 64.000 ... 34.0 393 3,073,462 359 131.425 ... 37.0 435 3,076,908 360 100.000 ... 37.0 435 3,076,908 361 8.000 ... 37.0 433 3,413,715 362 35.000 ... 34.0 393 3,073,761 BBL NTA \ 0 1,002,690,041 Lower East Side ... 1 1,002,590,044 Lower East Side ... 2 1,004,390,017 East Village ... 3 1,003,540,080 Chinatown ... 4 1,003,350,001 Lower East Side ... .. ... ... 358 3,032,300,027 Bushwick South ... 359 3,033,650,098 Bushwick North ... 360 3,033,650,098 Bushwick North ... 361 3,033,440,136 Bushwick North ... 362 3,032,420,001 Bushwick South ... lat lon school_dist ap_per SCHOOL SCORE 0 40.713764 -73.985260 01 0.254549 3.0 1 40.712332 -73.984797 01 0.098985 3.0 2 40.729783 -73.983041 01 0.031773 3.0 3 40.720569 -73.985673 01 0.292696 5.0 4 40.718725 -73.979426 01 0.158091 5.0 .. ... ... ... ... ... 358 40.694482 -73.929154 32 0.077558 3.0 359 40.696962 -73.910816 32 0.247510 3.0 360 40.696962 -73.910816 32 0.190955 3.0 361 40.697408 -73.913153 32 0.026616 5.0 362 40.694996 -73.927986 32 0.073593 3.0 [363 rows x 167 columns]
combined[["rr_s","rr_t","rr_p","N_p","N_t","N_s"]].apply([np.mean,np.max,np.min])
rr_s | rr_t | rr_p | N_p | N_t | N_s | |
---|---|---|---|---|---|---|
mean | 80.936639 | 83.269972 | 38.947658 | 247.721763 | 39.603306 | 598.208333 |
amax | 100.000000 | 100.000000 | 93.000000 | 2804.000000 | 244.000000 | 4768.000000 |
amin | 0.000000 | 20.000000 | 4.000000 | 12.000000 | 5.000000 | 70.000000 |
combined.plot.scatter(x="N_s", y="sat_score")
<AxesSubplot:xlabel='N_s', ylabel='sat_score'>
the higher the number of student-respondents the higher the sat_score, which was expected
combined.plot.scatter(x="rr_s", y="sat_score")
<AxesSubplot:xlabel='rr_s', ylabel='sat_score'>
there is a weak correlation between student response rate and sat_score only above 60 there is a positive correlation
columns=["rr_s","rr_t","rr_p","N_p","N_t","N_s"]
combined.corr()["sat_score"][columns].plot.bar()
<AxesSubplot:>