import pandas as pd
#从网页中的表格获取数据
tb = pd.read_html('http://ranking.promisingedu.com/qs')
#如果页面有多个表格,根据需要选取
df = tb[0]
df.to_csv('universityRank.csv')
df.shape
(500, 11)
df.head()
Ranking | University English Name | Country/Region | Academic Reputation | Employer Reputation | Faculty Student | International Faculty | International Students | Citations per Faculty | Overall Score | Free | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 麻省理工学院Massachusetts Institute of Technology | United States | 100.0 | 100.0 | 100.0 | 100 | 94.1 | 99.8 | 100.0 | 免费评估 |
1 | 2 | 斯坦福大学Stanford University | United States | 100.0 | 100.0 | 100.0 | 99.8 | 67.7 | 98.6 | 98.4 | 免费评估 |
2 | 3 | 哈佛大学Harvard University | United States | 100.0 | 100.0 | 98.7 | 86.3 | 62.2 | 99.6 | 97.4 | 免费评估 |
3 | 4 | 牛津大学University of Oxford | United Kingdom | 100.0 | 100.0 | 100.0 | 99.7 | 98.5 | 84.7 | 97.2 | 免费评估 |
4 | 5 | 加利福尼亚理工学院California Institute of Technology | United States | 97.8 | 81.2 | 100.0 | 99.4 | 87.3 | 100.0 | 96.9 | 免费评估 |
df.tail(3)
Ranking | University English Name | Country/Region | Academic Reputation | Employer Reputation | Faculty Student | International Faculty | International Students | Citations per Faculty | Overall Score | Free | |
---|---|---|---|---|---|---|---|---|---|---|---|
497 | 498 | Czech Technical University in Prague | Czech Republic | 18.7 | 39.7 | 33.9 | 18.2 | 57.5 | 10.6 | 24.2 | 免费评估 |
498 | 498 | Université de Montpellier | France | 20.3 | 18.8 | 13.1 | 20.1 | 39.3 | 42.8 | 24.2 | 免费评估 |
499 | 498 | University of Eastern Finland | Finland | 14.5 | 10.5 | 43.8 | 27.9 | 9.1 | 33.5 | 24.2 | 免费评估 |
#标签
df.columns
Index(['Ranking', 'University English Name', 'Country/Region', 'Academic Reputation', 'Employer Reputation', 'Faculty Student', 'International Faculty', 'International Students', 'Citations per Faculty', 'Overall Score', 'Free'], dtype='object')
#矩阵倒置
df.T
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Ranking | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ... | 491 | 491 | 491 | 491 | 491 | 491 | 491 | 498 | 498 | 498 |
University English Name | 麻省理工学院Massachusetts Institute of Technology | 斯坦福大学Stanford University | 哈佛大学Harvard University | 牛津大学University of Oxford | 加利福尼亚理工学院California Institute of Technology | ETH Zurich - Swiss Federal Institute of Techno... | 剑桥大学University of Cambridge | 伦敦大学学院University College London | 帝国理工学院Imperial College London | 芝加哥大学The University of Chicago | ... | Indian Institute of Technology Guwahati (IITG) | Lappeenranta-Lahti University of Technology LUT | Auezov South Kazakhstan State University (SKSU) | Universidad de Montevideo (UM) | Aix-Marseille University | 特拉华大学University of Delaware | V. N. Karazin Kharkiv National University | Czech Technical University in Prague | Université de Montpellier | University of Eastern Finland |
Country/Region | United States | United States | United States | United Kingdom | United States | Switzerland | United Kingdom | United Kingdom | United Kingdom | United States | ... | India | Finland | Kazakhstan | Uruguay | France | United States | Ukraine | Czech Republic | France | Finland |
Academic Reputation | 100 | 100 | 100 | 100 | 97.8 | 98.4 | 100 | 99.3 | 98.6 | 99.5 | ... | 11.7 | 6.6 | 7 | 13.4 | 33.9 | 11 | 10.8 | 18.7 | 20.3 | 14.5 |
Employer Reputation | 100 | 100 | 100 | 100 | 81.2 | 96.7 | 100 | 98.7 | 99.9 | 93.7 | ... | 15.2 | 17.2 | 6.1 | 17.3 | 4.9 | 10.2 | 11.4 | 39.7 | 18.8 | 10.5 |
Faculty Student | 100 | 100 | 98.7 | 100 | 100 | 85 | 100 | 98.1 | 99.8 | 96.5 | ... | 18.7 | 23.6 | 94.1 | 79.1 | 13.5 | 11.6 | 72.2 | 33.9 | 13.1 | 43.8 |
International Faculty | 100 | 99.8 | 86.3 | 99.7 | 99.4 | 100 | 100 | 99.1 | 100 | 70.2 | ... | - | 49.4 | 17.1 | 18.1 | 23.6 | 39.3 | 1.3 | 18.2 | 20.1 | 27.9 |
International Students | 94.1 | 67.7 | 62.2 | 98.5 | 87.3 | 98 | 97.6 | 100 | 100 | 81 | ... | 1.5 | 28.6 | 17.7 | 3.7 | 26.6 | 22.5 | 78.4 | 57.5 | 39.3 | 9.1 |
Citations per Faculty | 99.8 | 98.6 | 99.6 | 84.7 | 100 | 98.4 | 74.2 | 76.7 | 72.1 | 78.5 | ... | 71.2 | 56.3 | 1 | 1.3 | 25 | 67.1 | 1.8 | 10.6 | 42.8 | 33.5 |
Overall Score | 100 | 98.4 | 97.4 | 97.2 | 96.9 | 95.9 | 95 | 94.8 | 94.1 | 92 | ... | 24.3 | 24.3 | 24.3 | 24.3 | 24.3 | 24.3 | 24.3 | 24.2 | 24.2 | 24.2 |
Free | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | ... | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 | 免费评估 |
11 rows × 500 columns
#按值排序
df.sort_values(by='Country/Region').head()
Ranking | University English Name | Country/Region | Academic Reputation | Employer Reputation | Faculty Student | International Faculty | International Students | Citations per Faculty | Overall Score | Free | |
---|---|---|---|---|---|---|---|---|---|---|---|
385 | 383 | Universidad de Palermo (UP) | Argentina | 9.8 | 9.6 | 84.6 | 35.1 | 97.5 | 1.2 | 28.8 | 免费评估 |
437 | 438 | Universidad de Belgrano | Argentina | 7.7 | 19.0 | 89.2 | 6.9 | 55.8 | 1.1 | 26.3 | 免费评估 |
73 | 74 | Universidad de Buenos Aires (UBA) | Argentina | 87.2 | 91.3 | 77.4 | 50.7 | 64.7 | 2.4 | 66.0 | 免费评估 |
402 | 400 | Universidad Austral | Argentina | 15.6 | 36.9 | 84.4 | 6.1 | 12.2 | 1.9 | 28.2 | 免费评估 |
344 | 344 | Pontificia Universidad Católica Argentina | Argentina | 17.7 | 44.3 | 95.3 | 2.9 | 13.2 | 1.1 | 31.7 | 免费评估 |
#行切片
df[:2]
Ranking | University English Name | Country/Region | Academic Reputation | Employer Reputation | Faculty Student | International Faculty | International Students | Citations per Faculty | Overall Score | Free | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 麻省理工学院Massachusetts Institute of Technology | United States | 100.0 | 100.0 | 100.0 | 100 | 94.1 | 99.8 | 100.0 | 免费评估 |
1 | 2 | 斯坦福大学Stanford University | United States | 100.0 | 100.0 | 100.0 | 99.8 | 67.7 | 98.6 | 98.4 | 免费评估 |
#行、列切片
df.iloc[:2, :4]
Ranking | University English Name | Country/Region | Academic Reputation | |
---|---|---|---|---|
0 | 1 | 麻省理工学院Massachusetts Institute of Technology | United States | 100.0 |
1 | 2 | 斯坦福大学Stanford University | United States | 100.0 |
#选择固定列
df.iloc[:2, [1, 2, -1]]
University English Name | Country/Region | Free | |
---|---|---|---|
0 | 麻省理工学院Massachusetts Institute of Technology | United States | 免费评估 |
1 | 斯坦福大学Stanford University | United States | 免费评估 |
#表格记录合并
pieces = [ df[:3], df[7:9], df[17: 22] ]
df_new = pd.concat(pieces)
df_new
Ranking | University English Name | Country/Region | Academic Reputation | Employer Reputation | Faculty Student | International Faculty | International Students | Citations per Faculty | Overall Score | Free | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 麻省理工学院Massachusetts Institute of Technology | United States | 100.0 | 100.0 | 100.0 | 100 | 94.1 | 99.8 | 100.0 | 免费评估 |
1 | 2 | 斯坦福大学Stanford University | United States | 100.0 | 100.0 | 100.0 | 99.8 | 67.7 | 98.6 | 98.4 | 免费评估 |
2 | 3 | 哈佛大学Harvard University | United States | 100.0 | 100.0 | 98.7 | 86.3 | 62.2 | 99.6 | 97.4 | 免费评估 |
7 | 8 | 伦敦大学学院University College London | United Kingdom | 99.3 | 98.7 | 98.1 | 99.1 | 100 | 76.7 | 94.8 | 免费评估 |
8 | 9 | 帝国理工学院Imperial College London | United Kingdom | 98.6 | 99.9 | 99.8 | 100 | 100 | 72.1 | 94.1 | 免费评估 |
17 | 18 | 哥伦比亚大学Columbia University | United States | 99.7 | 97.6 | 100.0 | 40.1 | 96.9 | 53.0 | 87.4 | 免费评估 |
18 | 18 | EPFL - Ecole Polytechnique Federale de Lausanne | Switzerland | 75.6 | 77.6 | 96.6 | 100 | 100 | 98.9 | 87.4 | 免费评估 |
19 | 20 | 爱丁堡大学The University of Edinburgh | United Kingdom | 97.5 | 93.7 | 85.6 | 98 | 98.6 | 53.0 | 86.2 | 免费评估 |
20 | 21 | 密歇根大学安娜堡分校University of Michigan,Ann Arbor | United States | 98.9 | 94.4 | 91.0 | 79.7 | 44.1 | 61.6 | 86.0 | 免费评估 |
21 | 22 | Peking University | China | 99.1 | 99.6 | 72.3 | 70.7 | 36.9 | 73.0 | 84.3 | 免费评估 |
#数据筛选: 赛选条件[]可以无限
df[ df['Citations per Faculty'] > 99][ df['Country/Region'] == 'United States' ]
/Users/nuoxinkeji/Library/Python/3.7/lib/python/site-packages/ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
Ranking | University English Name | Country/Region | Academic Reputation | Employer Reputation | Faculty Student | International Faculty | International Students | Citations per Faculty | Overall Score | Free | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 麻省理工学院Massachusetts Institute of Technology | United States | 100.0 | 100.0 | 100.0 | 100 | 94.1 | 99.8 | 100.0 | 免费评估 |
2 | 3 | 哈佛大学Harvard University | United States | 100.0 | 100.0 | 98.7 | 86.3 | 62.2 | 99.6 | 97.4 | 免费评估 |
4 | 5 | 加利福尼亚理工学院California Institute of Technology | United States | 97.8 | 81.2 | 100.0 | 99.4 | 87.3 | 100.0 | 96.9 | 免费评估 |
12 | 13 | 普林斯顿大学Princeton University | United States | 99.9 | 98.6 | 70.7 | 64.9 | 67.6 | 100.0 | 90.9 | 免费评估 |
67 | 68 | 西雅图华盛顿大学University of Washington,Seattle Campus | United States | 86.1 | 51.4 | 17.4 | 55.8 | 40.5 | 99.2 | 67.9 | 免费评估 |
71 | 72 | 佐治亚理工学院Georgia Institute of Technology | United States | 71.1 | 81.7 | 15.1 | 60.4 | 65.8 | 100.0 | 66.2 | 免费评估 |
#分组:统计各个国家学校个数
for name, group in df.groupby('Country/Region'):
print(name, len(group))
Argentina 5 Australia 26 Austria 5 Belarus 1 Belgium 7 Brazil 5 Brunei 2 Canada 17 Chile 3 China 24 Colombia 4 Czech Republic 3 Denmark 5 Egypt 1 Estonia 1 Finland 8 France 17 Germany 29 Greece 1 Hong Kong 6 India 9 Indonesia 3 Iran, Islamic Republic of 2 Ireland 5 Israel 4 Italy 12 Japan 17 Kazakhstan 3 Lebanon 1 Lithuania 1 Macao 1 Malaysia 7 Mexico 2 Netherlands 13 New Zealand 8 Norway 4 Oman 1 Pakistan 2 Peru 1 Philippines 1 Poland 2 Portugal 4 Qatar 1 Russia 16 Saudi Arabia 3 Singapore 3 South Africa 3 South Korea 15 Spain 12 Sweden 8 Switzerland 8 Taiwan 11 Thailand 2 Turkey 1 Ukraine 1 United Arab Emirates 3 United Kingdom 50 United States 89 Uruguay 1
#转换为numpy
df.to_numpy()
array([[1, '麻省理工学院Massachusetts Institute of Technology', 'United States', ..., 99.8, 100.0, '免费评估'], [2, '斯坦福大学Stanford University', 'United States', ..., 98.6, 98.4, '免费评估'], [3, '哈佛大学Harvard University', 'United States', ..., 99.6, 97.4, '免费评估'], ..., [498, 'Czech Technical University in Prague', 'Czech Republic', ..., 10.6, 24.2, '免费评估'], [498, 'Université de Montpellier', 'France', ..., 42.8, 24.2, '免费评估'], [498, 'University of Eastern Finland', 'Finland', ..., 33.5, 24.2, '免费评估']], dtype=object)