In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

DATA SOURCE

· From World University Ranking 2012-2016,We download the data of 2012-2016.

https://data.world/hhaveliw/world-university-ranking-2016

· Then from Times Higher Education:world university rankings 2017-2018,We manually copy-and-paste the data we need of 2017-2018 as supplementary.

https://www.timeshighereducation.com/cn/world-university-rankings

In [2]:
df = pd.read_csv('school ranking.csv',)
df
Out[2]:
World_Rank University_Name Country %_Female_Students Year
0 1 California Institute of Technology United States of America 33.0 2012
1 2 Harvard University United States of America NaN 2012
2 2 Stanford University United States of America 42.0 2012
3 4 University of Oxford United Kingdom 46.0 2012
4 5 Princeton University United States of America 45.0 2012
5 6 University of Cambridge United Kingdom 46.0 2012
6 7 Massachusetts Institute of Technology United States of America 37.0 2012
7 8 Imperial College London United Kingdom 37.0 2012
8 9 University of Chicago United States of America 42.0 2012
9 10 University of California, Berkeley United States of America 50.0 2012
10 11 Yale University United States of America 50.0 2012
11 12 Columbia University United States of America NaN 2012
12 13 University of California, Los Angeles United States of America 52.0 2012
13 14 Johns Hopkins University United States of America 50.0 2012
14 15 ETH Zurich – Swiss Federal Institute of Tech... Switzerland 31.0 2012
15 16 University of Pennsylvania United States of America 51.0 2012
16 17 University College London United Kingdom 56.0 2012
17 18 University of Michigan United States of America 48.0 2012
18 19 University of Toronto Canada NaN 2012
19 20 Cornell University United States of America 48.0 2012
20 21 Carnegie Mellon University United States of America 39.0 2012
21 22 Duke University United States of America 49.0 2012
22 22 University of British Columbia Canada 54.0 2012
23 24 Georgia Institute of Technology United States of America 31.0 2012
24 25 University of Washington United States of America 53.0 2012
25 26 Northwestern University United States of America 48.0 2012
26 27 University of Wisconsin-Madison United States of America 51.0 2012
27 28 McGill University Canada 56.0 2012
28 29 University of Texas at Austin United States of America 51.0 2012
29 30 University of Tokyo Japan NaN 2012
... ... ... ... ... ...
2777 350-400 Justus Liebig University Giessen Germany 61.0 2018
2778 350-400 University of Kansas United States 51.0 2018
2779 350-400 Kyushu University Japan 29.0 2018
2780 350-400 La Trobe University Australia 63.0 2018
2781 350-400 Leibniz University of Hanover Germany 41.0 2018
2782 350-400 Linköping University Sweden 53.0 2018
2783 350-400 University of Macau Macao 58.0 2018
2784 350-400 University of Malaya Malaysia 66.0 2018
2785 350-400 Montpellier University France 53.0 2018
2786 350-400 Örebro University Sweden 61.0 2018
2787 350-400 University of Padua Italy 55.0 2018
2788 350-400 University of Pavia Italy 56.0 2018
2789 350-400 University of Pisa Italy 52.0 2018
2790 350-400 Royal Veterinary College United Kingdom 75.0 2018
2791 350-400 Sabancı University Turkey 41.0 2018
2792 350-400 University of Salerno Italy 60.0 2018
2793 350-400 University of South Carolina United States 55.0 2018
2794 350-400 Stellenbosch University South Africa 54.0 2018
2795 350-400 University of Strasbourg France 58.0 2018
2796 350-400 Sun Yat-sen University China 51.0 2018
2797 350-400 Temple University United States 52.0 2018
2798 350-400 University of Trieste Italy 56.0 2018
2799 350-400 Tulane University United States 56.0 2018
2800 350-400 University of Turku Finland 62.0 2018
2801 350-400 University College Cork Ireland 56.0 2018
2802 350-400 University of Vermont United States 56.0 2018
2803 350-400 Université de Versailles Saint-Quentin-en-Yvel... France 59.0 2018
2804 350-400 University of Waikato New Zealand 56.0 2018
2805 350-400 Wayne State University United States 58.0 2018
2806 350-400 York University Canada 56.0 2018

2807 rows × 5 columns

In [3]:
def clean_us_name(s):
    if s == 'United States of America':
        return 'United States'
    else:
        return s
df['Country'] = df['Country'].apply(clean_us_name)

World‘s Mean

selecting female data & see means of each year.

and then use the pivot table to come up with a new chart.

In [4]:
df['%_Female_Students'].mean()
Out[4]:
50.15897634742148
In [5]:
female_data_group_by_year = df.groupby('Year').mean()
female_data_group_by_year.plot()
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x10496f550>
In [12]:
pd.pivot_table(df, values='%_Female_Students', index='Country', columns='Year', aggfunc=np.min).T
Out[12]:
Country Australia Austria Belgium Brazil Canada Chile China Colombia Cyprus Czech Republic ... Spain Sweden Switzerland Taiwan Thailand Turkey Unisted States of America United Arab Emirates United Kingdom United States
Year
2012 44.0 27.0 54.0 48.0 45.0 NaN 29.0 NaN NaN 62.0 ... 12.0 30.0 27.0 27.0 63.0 34.0 NaN NaN 37.0 25.0
2013 47.0 27.0 54.0 48.0 45.0 NaN 32.0 44.0 NaN 62.0 ... 36.0 30.0 27.0 27.0 42.0 34.0 NaN NaN 37.0 26.0
2014 44.0 27.0 54.0 48.0 45.0 NaN 31.0 44.0 NaN 62.0 ... 36.0 30.0 27.0 27.0 42.0 34.0 NaN NaN 37.0 26.0
2015 44.0 27.0 54.0 48.0 45.0 24.0 31.0 44.0 NaN 62.0 ... 52.0 30.0 27.0 27.0 42.0 34.0 NaN NaN 37.0 26.0
2016 44.0 22.0 54.0 48.0 45.0 NaN 32.0 NaN 69.0 49.0 ... 52.0 30.0 27.0 27.0 NaN 39.0 NaN NaN 37.0 26.0
2017 39.0 23.0 49.0 48.0 45.0 NaN 32.0 NaN 69.0 NaN ... 52.0 31.0 27.0 33.0 NaN 39.0 NaN NaN 37.0 27.0
2018 43.0 29.0 50.0 48.0 46.0 NaN 22.0 NaN 57.0 NaN ... 53.0 31.0 28.0 33.0 NaN 41.0 NaN 46.0 37.0 28.0

7 rows × 54 columns

Find out the relationship between rank and female ratio

Firstly, we use the def fuction to delete the universities ranked in 201-400,

and select the ratio and the rank top 200 university to draw a chart

then use the diffrent colors to stress the top 20 universities.

In [7]:
def convert_rank(s):
    try:
        return int(s)
    except:
        return 201
df['rank'] = df['World_Rank'].apply(convert_rank)
In [8]:
myax = df[
    (df['rank'] < 201)
].plot('rank', '%_Female_Students', kind='scatter', color='#B2FFDE', alpha=0.5)

df[
    (df['rank'] < 15)
].plot('rank', '%_Female_Students', kind='scatter', color='#FF5B3E', ax=myax, alpha=0.3)
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x1041a66d8>
In [9]:
df[df['University_Name'].apply(lambda x: 'Technology'in x)].mean()
Out[9]:
%_Female_Students      30.846154
Year                 2014.967391
rank                  144.576087
dtype: float64
In [10]:
df[df['University_Name'].apply(lambda x: 'Technology'not in x)].mean()
Out[10]:
%_Female_Students      51.513278
Year                 2015.003050
rank                  151.125048
dtype: float64

Continental

set up groups by continents

see means of specific continents by years

The line chart shows Asia is the continental with the lowest ratio, while the Austrialian has the highest ratio.

In [13]:
state_mapping = {
    'China': 'Asia',
    'India': 'Asia',
    'Japan': 'Asia',
    'South Korea': 'Asia',
    'Taiwan': 'Asia',
    'Hong Kong': 'Asia',
    'Macau': 'Asia',
    'Macao': 'Asia',
    'Thailand': 'Asia',
    'Turkey': 'Asia',
    'Singapore': 'Asia',
    'Malaysia': 'Asia',
    'Saudi Arabia': 'Asia',
    'United Arab Emirates': 'Asia',
    'Maoco': 'Asia',
    'Canada': 'Americas',
    'United States': 'Americas',
    'Unisted States of America': 'Americas',
    'Canada': 'Americas',
    'Colombia': 'Americas',
    'Chile': 'Americas',
    'Mexico': 'Americas',
    'Brazil': 'Americas',
    'United Kingdom':'Europe',
    'Germany':'Europe',
    'Italy':'Europe',
    'Netherlands':'Europe',
    'France':'Europe', 
    'Sweden':'Europe',
    'Belgium':'Europe',
    'Switzerland':'Europe',
    'Spain':'Europe',
    'Finland':'Europe',
    'Denmark':'Europe',
    'Norway':'Europe',
    'Republic of Ireland':'Europe',
    'Russian Federation':'Europe',
    'Portugal':'Europe',
    'Ireland':'Europe',
    'Greece':'Europe',
    'Iceland':'Europe',
    'Estonia':'Europe',
    'Poland':'Europe',
    'Czech Republic':'Europe',
    'Cyprus':'Europe',
    'Luxembourg':'Europe',
    'Danmark':'Europe', 
    'Hungary':'Europe',
    'Austria':'Europe',
    'Australia':'Australian',
    'New Zealand':'Australian',
    'South Africa':'Africa',
    'Morocco':'Africa',
    'Egypt':'Africa',
    'Israel':'Middle_East',
    'Iran':'Middle_East'    
}
df['State'] = df['Country'].apply(lambda x: state_mapping.get(x, x))
df
Out[13]:
World_Rank University_Name Country %_Female_Students Year rank State
0 1 California Institute of Technology United States 33.0 2012 1 Americas
1 2 Harvard University United States NaN 2012 2 Americas
2 2 Stanford University United States 42.0 2012 2 Americas
3 4 University of Oxford United Kingdom 46.0 2012 4 Europe
4 5 Princeton University United States 45.0 2012 5 Americas
5 6 University of Cambridge United Kingdom 46.0 2012 6 Europe
6 7 Massachusetts Institute of Technology United States 37.0 2012 7 Americas
7 8 Imperial College London United Kingdom 37.0 2012 8 Europe
8 9 University of Chicago United States 42.0 2012 9 Americas
9 10 University of California, Berkeley United States 50.0 2012 10 Americas
10 11 Yale University United States 50.0 2012 11 Americas
11 12 Columbia University United States NaN 2012 12 Americas
12 13 University of California, Los Angeles United States 52.0 2012 13 Americas
13 14 Johns Hopkins University United States 50.0 2012 14 Americas
14 15 ETH Zurich – Swiss Federal Institute of Tech... Switzerland 31.0 2012 15 Europe
15 16 University of Pennsylvania United States 51.0 2012 16 Americas
16 17 University College London United Kingdom 56.0 2012 17 Europe
17 18 University of Michigan United States 48.0 2012 18 Americas
18 19 University of Toronto Canada NaN 2012 19 Americas
19 20 Cornell University United States 48.0 2012 20 Americas
20 21 Carnegie Mellon University United States 39.0 2012 21 Americas
21 22 Duke University United States 49.0 2012 22 Americas
22 22 University of British Columbia Canada 54.0 2012 22 Americas
23 24 Georgia Institute of Technology United States 31.0 2012 24 Americas
24 25 University of Washington United States 53.0 2012 25 Americas
25 26 Northwestern University United States 48.0 2012 26 Americas
26 27 University of Wisconsin-Madison United States 51.0 2012 27 Americas
27 28 McGill University Canada 56.0 2012 28 Americas
28 29 University of Texas at Austin United States 51.0 2012 29 Americas
29 30 University of Tokyo Japan NaN 2012 30 Asia
... ... ... ... ... ... ... ...
2777 350-400 Justus Liebig University Giessen Germany 61.0 2018 201 Europe
2778 350-400 University of Kansas United States 51.0 2018 201 Americas
2779 350-400 Kyushu University Japan 29.0 2018 201 Asia
2780 350-400 La Trobe University Australia 63.0 2018 201 Australian
2781 350-400 Leibniz University of Hanover Germany 41.0 2018 201 Europe
2782 350-400 Linköping University Sweden 53.0 2018 201 Europe
2783 350-400 University of Macau Macao 58.0 2018 201 Asia
2784 350-400 University of Malaya Malaysia 66.0 2018 201 Asia
2785 350-400 Montpellier University France 53.0 2018 201 Europe
2786 350-400 Örebro University Sweden 61.0 2018 201 Europe
2787 350-400 University of Padua Italy 55.0 2018 201 Europe
2788 350-400 University of Pavia Italy 56.0 2018 201 Europe
2789 350-400 University of Pisa Italy 52.0 2018 201 Europe
2790 350-400 Royal Veterinary College United Kingdom 75.0 2018 201 Europe
2791 350-400 Sabancı University Turkey 41.0 2018 201 Asia
2792 350-400 University of Salerno Italy 60.0 2018 201 Europe
2793 350-400 University of South Carolina United States 55.0 2018 201 Americas
2794 350-400 Stellenbosch University South Africa 54.0 2018 201 Africa
2795 350-400 University of Strasbourg France 58.0 2018 201 Europe
2796 350-400 Sun Yat-sen University China 51.0 2018 201 Asia
2797 350-400 Temple University United States 52.0 2018 201 Americas
2798 350-400 University of Trieste Italy 56.0 2018 201 Europe
2799 350-400 Tulane University United States 56.0 2018 201 Americas
2800 350-400 University of Turku Finland 62.0 2018 201 Europe
2801 350-400 University College Cork Ireland 56.0 2018 201 Europe
2802 350-400 University of Vermont United States 56.0 2018 201 Americas
2803 350-400 Université de Versailles Saint-Quentin-en-Yvel... France 59.0 2018 201 Europe
2804 350-400 University of Waikato New Zealand 56.0 2018 201 Australian
2805 350-400 Wayne State University United States 58.0 2018 201 Americas
2806 350-400 York University Canada 56.0 2018 201 Americas

2807 rows × 7 columns

In [15]:
pd.pivot_table(df, values='%_Female_Students', index='State', columns='Year', aggfunc=np.min).T
Out[15]:
State Africa Americas Asia Australian Europe Middle_East
Year
2012 46.0 25.0 13.0 44.0 12.0 27.0
2013 53.0 26.0 13.0 47.0 18.0 27.0
2014 53.0 26.0 13.0 44.0 18.0 27.0
2015 46.0 24.0 13.0 44.0 18.0 27.0
2016 53.0 26.0 13.0 44.0 18.0 35.0
2017 53.0 27.0 7.0 39.0 18.0 35.0
2018 53.0 28.0 14.0 43.0 20.0 32.0
In [14]:
ax = pd.pivot_table(df, values='%_Female_Students', index='State', columns='Year').T.plot()

patches, labels = ax.get_legend_handles_labels()
ax.legend(patches, labels, loc='upper right')
Out[14]:
<matplotlib.legend.Legend at 0x102974940>

Asia

we can find in the bar chart of Asia, means of the second country and the eight country dropped, meanwhile, the fourth country kept the lowest means all these years. in terms of that, we found out those three countries are Japan, Thailand and India

In [16]:
Asia_data= df[df['State'] =='Asia']
Asia_data.sort_values(by='%_Female_Students', ascending=True)
Out[16]:
World_Rank University_Name Country %_Female_Students Year rank State
2396 201-400 Toyota Technological Institute Japan 7.0 2017 201 Asia
530 128 Tokyo Institute of Technology Japan 13.0 2013 128 Asia
926 125 Tokyo Institute of Technology Japan 13.0 2014 125 Asia
1850 201-250 Tokyo Institute of Technology Japan 13.0 2016 201 Asia
107 108 Tokyo Institute of Technology Japan 13.0 2012 108 Asia
1161 351-400 Indian Institute of Technology Kanpur India 13.0 2014 201 Asia
2297 201-400 Tokyo Institute of Technology Japan 13.0 2017 201 Asia
1344 141 Tokyo Institute of Technology Japan 13.0 2015 141 Asia
2699 251–300 Tokyo Institute of Technology Japan 14.0 2018 201 Asia
632 226-250 Indian Institute of Technology Kharagpur India 15.0 2013 201 Asia
1162 351-400 Indian Institute of Technology Kharagpur India 15.0 2014 201 Asia
1966 351-400 Indian Institute of Technology Bombay India 16.0 2016 201 Asia
656 251-275 Indian Institute of Technology Bombay India 16.0 2013 201 Asia
310 301-350 Indian Institute of Technology Bombay India 16.0 2012 201 Asia
1564 351-400 Indian Institute of Technology Bombay India 16.0 2015 201 Asia
2776 350-400 Indian Institute of Technology Bombay India 17.0 2018 201 Asia
1565 351-400 Indian Institute of Technology Roorkee India 17.0 2015 201 Asia
1163 351-400 Indian Institute of Technology Roorkee India 17.0 2014 201 Asia
2373 201-400 Indian Institute of Technology Bombay India 17.0 2017 201 Asia
759 351-400 Indian Institute of Technology Roorkee India 17.0 2013 201 Asia
1160 351-400 Indian Institute of Technology Delhi India 18.0 2014 201 Asia
1479 276-300 Indian Institute of Science India 19.0 2015 201 Asia
1870 251-300 Indian Institute of Science India 19.0 2016 201 Asia
451 50 Pohang University of Science and Technology South Korea 20.0 2013 50 Asia
469 68 Korea Advanced Institute of Science and Techno... South Korea 20.0 2013 68 Asia
52 53 Pohang University of Science and Technology South Korea 20.0 2012 53 Asia
857 56 Korea Advanced Institute of Science and Techno... South Korea 20.0 2014 56 Asia
1267 66 Pohang University of Science and Technology South Korea 20.0 2015 66 Asia
2092 89 Korea Advanced Institute of Science and Techno... South Korea 20.0 2017 89 Asia
93 94 Korea Advanced Institute of Science and Techno... South Korea 20.0 2012 94 Asia
... ... ... ... ... ... ... ...
1408 201-225 Korea University South Korea NaN 2015 201 Asia
1418 201-225 University of Science and Technology of China China NaN 2015 201 Asia
1427 201-225 Yonsei University South Korea NaN 2015 201 Asia
1520 301-350 Renmin University of China China NaN 2015 201 Asia
1562 351-400 Hanyang University South Korea NaN 2015 201 Asia
1601 351-400 Wuhan University China NaN 2015 201 Asia
1644 42 Peking University China NaN 2016 42 Asia
1645 43 University of Tokyo Japan NaN 2016 43 Asia
1661 59 Hong Kong University of Science and Technology Hong Kong NaN 2016 59 Asia
1687 85 Seoul National University South Korea NaN 2016 85 Asia
1690 88 Kyoto University Japan NaN 2016 88 Asia
1839 201-250 University of Science and Technology of China China NaN 2016 201 Asia
1876 251-300 Korea University South Korea NaN 2016 201 Asia
1951 301-350 Yonsei University South Korea NaN 2016 201 Asia
1953 351-400 Bilkent University Turkey NaN 2016 201 Asia
1963 351-400 Hanyang University South Korea NaN 2016 201 Asia
2042 39 The University of Tokyo Japan NaN 2017 39 Asia
2052 49 Hong Kong University of Science and Technology Hong Kong NaN 2017 49 Asia
2076 72 Seoul National University South Korea NaN 2017 72 Asia
2079 76 Chinese University of Hong Kong Hong Kong NaN 2017 76 Asia
2095 91 Kyoto University Japan NaN 2017 91 Asia
2144 137 Sungkyunkwan University (SKKU) South Korea NaN 2017 137 Asia
2157 153 University of Science and Technology of China China NaN 2017 153 Asia
2303 201-400 Yonsei University South Korea NaN 2017 201 Asia
2369 201-400 Hanyang University South Korea NaN 2017 201 Asia
2449 44 Hong Kong University of Science and Technology Hong Kong NaN 2018 44 Asia
2451 46 The University of Tokyo Japan NaN 2018 46 Asia
2463 58 Chinese University of Hong Kong Hong Kong NaN 2018 58 Asia
2480 75 Seoul National University South Korea NaN 2018 75 Asia
2773 350-400 Hanyang University South Korea NaN 2018 201 Asia

345 rows × 7 columns

In [17]:
ax = pd.pivot_table(Asia_data, values='%_Female_Students', index='Country', columns='Year').T.plot(figsize=(10,10))
ax.axis([2009, 2019, 10, 60])
Out[17]:
[2009, 2019, 10, 60]

Australian

we can find in the bar chart of Australian that there are only 2 countries,while the New Zealand is always the higher one.The reason may be that Austiralia has more universities than New Zealand which bring down the mean.

In [18]:
Australian_data= df[df['State'] =='Australian']
Australian_data.sort_values(by='%_Female_Students', ascending=True)
Out[18]:
World_Rank University_Name Country %_Female_Students Year rank State
2394 201-400 Swinburne University of Technology Australia 39.0 2017 201 Australian
2539 134 University of Adelaide Australia 43.0 2018 134 Australian
1577 351-400 Swinburne University of Technology Australia 44.0 2015 201 Australian
1992 351-400 Swinburne University of Technology Australia 44.0 2016 201 Australian
385 350-400 Swinburne University of Technology Australia 44.0 2012 201 Australian
1178 351-400 Swinburne University of Technology Australia 44.0 2014 201 Australian
2490 85 University of New South Wales Australia 46.0 2018 85 Australian
2081 78 University of New South Wales Australia 46.0 2017 78 Australian
917 114 University of New South Wales Australia 47.0 2014 114 Australian
2146 142 University of Adelaide Australia 47.0 2017 142 Australian
486 85 University of New South Wales Australia 47.0 2013 85 Australian
1685 82 University of New South Wales Australia 47.0 2016 82 Australian
1311 109 University of New South Wales Australia 47.0 2015 109 Australian
174 173 University of New South Wales Australia 47.0 2012 173 Australian
1365 164 University of Adelaide Australia 48.0 2015 164 Australian
1017 201-225 University of Adelaide Australia 48.0 2014 201 Australian
1752 149 University of Adelaide Australia 48.0 2016 149 Australian
578 176 University of Adelaide Australia 48.0 2013 176 Australian
212 201-225 University of Adelaide Australia 48.0 2012 201 Australian
2517 112 University of Western Australia Australia 49.0 2018 112 Australian
1502 276-300 University of Wollongong Australia 50.0 2015 201 Australian
2302 201-400 University of Wollongong Australia 50.0 2017 201 Australian
2359 201-400 University of Canterbury New Zealand 50.0 2017 201 Australian
746 301-350 University of Wollongong Australia 50.0 2013 201 Australian
969 168 University of Western Australia Australia 50.0 2014 168 Australian
1903 251-300 University of Wollongong Australia 50.0 2016 201 Australian
1098 276-300 University of Wollongong Australia 50.0 2014 201 Australian
591 190 University of Western Australia Australia 50.0 2013 190 Australian
1360 157 University of Western Australia Australia 50.0 2015 157 Australian
2761 350-400 University of Canterbury New Zealand 50.0 2018 201 Australian
... ... ... ... ... ... ... ...
755 351-400 Deakin University Australia 60.0 2013 201 Australian
1510 301-350 Deakin University Australia 60.0 2015 201 Australian
2744 301-350 University of Tasmania Australia 60.0 2018 201 Australian
355 350-400 Deakin University Australia 60.0 2012 201 Australian
1110 301-350 Deakin University Australia 60.0 2014 201 Australian
1912 301-350 Deakin University Australia 60.0 2016 201 Australian
2643 201–250 University of South Australia Australia 62.0 2018 201 Australian
2626 201–250 James Cook University Australia 62.0 2018 201 Australian
370 350-400 Massey University New Zealand 62.0 2012 201 Australian
767 351-400 Massey University New Zealand 62.0 2013 201 Australian
2780 350-400 La Trobe University Australia 63.0 2018 201 Australian
756 351-400 Flinders University Australia 63.0 2013 201 Australian
365 350-400 La Trobe University Australia 63.0 2012 201 Australian
356 350-400 Flinders University Australia 63.0 2012 201 Australian
2273 201-400 James Cook University Australia 63.0 2017 201 Australian
2366 201-400 Flinders University Australia 63.0 2017 201 Australian
1872 251-300 James Cook University Australia 63.0 2016 201 Australian
1969 351-400 La Trobe University Australia 63.0 2016 201 Australian
2715 301-350 Charles Darwin University Australia 63.0 2018 201 Australian
2719 301-350 Flinders University Australia 63.0 2018 201 Australian
2380 201-400 La Trobe University Australia 63.0 2017 201 Australian
1866 251-300 Flinders University Australia 63.0 2016 201 Australian
2264 201-400 Charles Darwin University Australia 66.0 2017 201 Australian
1155 351-400 Charles Darwin University Australia 67.0 2014 201 Australian
754 351-400 Charles Darwin University Australia 67.0 2013 201 Australian
1507 301-350 Charles Darwin University Australia 67.0 2015 201 Australian
1862 251-300 Charles Darwin University Australia 67.0 2016 201 Australian
307 301-350 Charles Darwin University Australia 67.0 2012 201 Australian
358 350-400 Griffith University Australia NaN 2012 201 Australian
1868 251-300 Griffith University Australia NaN 2016 201 Australian

180 rows × 7 columns

In [19]:
Australian=Australian_data[Australian_data['State'] == 'Australian']
pd.pivot_table(Australian, values='%_Female_Students', index='Country', columns='Year', aggfunc=np.min).T.plot.bar()
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a55d400>

Middle East

In [35]:
ME_data= df[df['State'] =='Middle_East']
ME_data.sort_values(by='%_Female_Students', ascending=True)
Out[35]:
World_Rank University_Name Country %_Female_Students Year rank State
322 301-350 Sharif University of Technology Iran 27.0 2012 201 Middle_East
723 301-350 Sharif University of Technology Iran 27.0 2013 201 Middle_East
1065 251-275 Sharif University of Technology Iran 27.0 2014 201 Middle_East
1523 301-350 Sharif University of Technology Iran 27.0 2015 201 Middle_East
2709 301-350 Babol Noshirvani University of Technology Iran 32.0 2018 201 Middle_East
2746 301-350 Technion Israel Institute of Technology Israel 34.0 2018 201 Middle_East
207 201-225 Technion Israel Institute of Technology Israel 35.0 2012 201 Middle_East
595 193 Technion Israel Institute of Technology Israel 35.0 2013 193 Middle_East
2349 201-400 Technion Israel Institute of Technology Israel 35.0 2017 201 Middle_East
1014 201-225 Technion Israel Institute of Technology Israel 35.0 2014 201 Middle_East
1410 201-225 Technion Israel Institute of Technology Israel 35.0 2015 201 Middle_East
1944 301-350 Technion Israel Institute of Technology Israel 35.0 2016 201 Middle_East
1566 351-400 Isfahan University of Technology Iran 39.0 2015 201 Middle_East
2649 201–250 Tel Aviv University Israel 54.0 2018 201 Middle_East
2623 201–250 Hebrew University of Jerusalem Israel 55.0 2018 201 Middle_East
120 121 Hebrew University of Jerusalem Israel 55.0 2012 121 Middle_East
992 191 Hebrew University of Jerusalem Israel 55.0 2014 191 Middle_East
538 137 Hebrew University of Jerusalem Israel 55.0 2013 137 Middle_East
1780 178 Hebrew University of Jerusalem Israel 55.0 2016 178 Middle_East
1406 201-225 Hebrew University of Jerusalem Israel 55.0 2015 201 Middle_East
2190 186 Hebrew University of Jerusalem Israel 57.0 2017 186 Middle_East
304 301-350 Bar-Ilan University Israel 59.0 2012 201 Middle_East
165 166 Tel Aviv University Israel NaN 2012 166 Middle_East
559 158 Tel Aviv University Israel NaN 2013 158 Middle_East
1001 199 Tel Aviv University Israel NaN 2014 199 Middle_East
1390 188 Tel Aviv University Israel NaN 2015 188 Middle_East
1846 201-250 Tel Aviv University Israel NaN 2016 201 Middle_East
2250 201-400 Tel Aviv University Israel NaN 2017 201 Middle_East
In [36]:
Iran=ME_data[ME_data['Country'] == 'Iran']
pd.pivot_table(Iran, values='%_Female_Students', index='University_Name', columns='Year', aggfunc=np.min).T.plot.bar()
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a6e35f8>

female ratio rank

In [23]:
pd.pivot_table(df, values='%_Female_Students', index='Country', columns='Year').T.mean().sort_values(ascending=False)
Out[23]:
Country
Poland                       67.000000
Malaysia                     66.000000
Iceland                      66.000000
Estonia                      66.000000
Cyprus                       65.000000
Greece                       62.714286
Czech Republic               59.400000
Macau                        58.000000
Macao                        58.000000
Finland                      56.767347
Ireland                      56.625000
Saudi Arabia                 56.000000
Spain                        55.985771
Republic of Ireland          55.720000
Norway                       55.071429
New Zealand                  55.026190
Canada                       54.967737
Australia                    54.885981
Belgium                      54.408163
South Africa                 54.261905
Sweden                       53.947619
Hong Kong                    53.821429
Italy                        53.379991
United Kingdom               53.213112
Mexico                       51.000000
Hungary                      51.000000
Luxembourg                   50.333333
United States                49.657508
Austria                      49.445238
Germany                      49.404508
Denmark                      49.342857
Singapore                    48.785714
France                       48.596226
Netherlands                  48.549451
Switzerland                  48.215986
Brazil                       48.000000
Danmark                      48.000000
Thailand                     47.250000
Portugal                     47.216667
Israel                       46.190476
United Arab Emirates         46.000000
Egypt                        46.000000
Morocco                      46.000000
Turkey                       44.876190
Colombia                     44.000000
Russian Federation           43.095238
China                        42.002551
Taiwan                       34.991497
South Korea                  30.195918
Iran                         29.200000
Japan                        28.251330
Chile                        24.000000
India                        19.042857
Unisted States of America          NaN
dtype: float64

Canada

We pick Canada as an example to demonstrate that why this country have that kind of high female ratio at the same time have the base of so much universities.

And we find out the following possible reasons.

First is the high eduacation investment of the Cannada, in addition, there are many policy which are benifitial for the immigration and for the well-eduacted foreign woman to study in Canada. The last reason is the feminist movement which incresase the right of woman.

In [29]:
Canada_data= df[df['Country'] =='Canada']
ax = pd.pivot_table(Canada_data, values='%_Female_Students', index='Country', columns='Year').T.plot()
ax
Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b1d2780>

India

We sort the value of lowest ratio, find out the India is constantly the lowest one.

The reasons are follow, First, India has most the technology university which have lower rate of female students. It is also true for the whole world.

Another reason is the weak education for the Indian woman. And the prejudice hinder the Indian woman to get the higher education.

In [24]:
pd.pivot_table(df, values='%_Female_Students', index='Country', columns='Year').T.mean().sort_values(ascending=True)
Out[24]:
Country
India                        19.042857
Chile                        24.000000
Japan                        28.251330
Iran                         29.200000
South Korea                  30.195918
Taiwan                       34.991497
China                        42.002551
Russian Federation           43.095238
Colombia                     44.000000
Turkey                       44.876190
Morocco                      46.000000
Egypt                        46.000000
United Arab Emirates         46.000000
Israel                       46.190476
Portugal                     47.216667
Thailand                     47.250000
Danmark                      48.000000
Brazil                       48.000000
Switzerland                  48.215986
Netherlands                  48.549451
France                       48.596226
Singapore                    48.785714
Denmark                      49.342857
Germany                      49.404508
Austria                      49.445238
United States                49.657508
Luxembourg                   50.333333
Hungary                      51.000000
Mexico                       51.000000
United Kingdom               53.213112
Italy                        53.379991
Hong Kong                    53.821429
Sweden                       53.947619
South Africa                 54.261905
Belgium                      54.408163
Australia                    54.885981
Canada                       54.967737
New Zealand                  55.026190
Norway                       55.071429
Republic of Ireland          55.720000
Spain                        55.985771
Saudi Arabia                 56.000000
Ireland                      56.625000
Finland                      56.767347
Macao                        58.000000
Macau                        58.000000
Czech Republic               59.400000
Greece                       62.714286
Cyprus                       65.000000
Estonia                      66.000000
Iceland                      66.000000
Malaysia                     66.000000
Poland                       67.000000
Unisted States of America          NaN
dtype: float64
In [28]:
India_data= df[df['Country'] =='India']
ax = pd.pivot_table(India_data, values='%_Female_Students', index='Country', columns='Year').T.plot()
ax
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a7a05f8>
In [31]:
India_data[India_data['University_Name'].apply(lambda x: 'Technology'in x)].mean()
Out[31]:
%_Female_Students      16.153846
Year                 2014.461538
rank                  201.000000
dtype: float64
In [34]:
India_data[India_data['University_Name'].apply(lambda x: 'Technology'not in x)].mean()
Out[34]:
%_Female_Students      28.500000
Year                 2015.833333
rank                  201.000000
dtype: float64
In [37]:
df[df['University_Name'].apply(lambda x: 'Technology'in x)].mean()
Out[37]:
%_Female_Students      30.846154
Year                 2014.967391
rank                  144.576087
dtype: float64
In [38]:
df[df['University_Name'].apply(lambda x: 'Technology'not in x)].mean()
Out[38]:
%_Female_Students      51.513278
Year                 2015.003050
rank                  151.125048
dtype: float64
In [ ]: