require 'daru'
true
require 'open-uri'
content = open('https://d37djvu3ytnwxt.cloudfront.net/asset-v1:MITx+15.071x_3+1T2016+type@asset+block/WHO.csv')
df = Daru::DataFrame.from_csv content
df = df.at 0..6
df.first
Daru::DataFrame(10x7) | |||||||
---|---|---|---|---|---|---|---|
Country | Region | Population | Under15 | Over60 | FertilityRate | LifeExpectancy | |
0 | Afghanistan | Eastern Mediterranean | 29825 | 47.42 | 3.82 | 5.4 | 60 |
1 | Albania | Europe | 3162 | 21.33 | 14.93 | 1.75 | 74 |
2 | Algeria | Africa | 38482 | 27.42 | 7.17 | 2.83 | 73 |
3 | Andorra | Europe | 78 | 15.2 | 22.86 | 82 | |
4 | Angola | Africa | 20821 | 47.58 | 3.84 | 6.1 | 51 |
5 | Antigua and Barbuda | Americas | 89 | 25.96 | 12.35 | 2.12 | 75 |
6 | Argentina | Americas | 41087 | 24.42 | 14.97 | 2.2 | 76 |
7 | Armenia | Europe | 2969 | 20.34 | 14.06 | 1.74 | 71 |
8 | Australia | Western Pacific | 23050 | 18.95 | 19.46 | 1.89 | 82 |
9 | Austria | Europe | 8464 | 14.51 | 23.52 | 1.44 | 81 |
df.index = Daru::CategoricalIndex.new df['Region'].to_a
#<Daru::CategoricalIndex(194): {Eastern Mediterranean, Europe, Africa, Europe, Africa, Americas, Americas, Europe, Western Pacific, Europe, Europe, Americas, Eastern Mediterranean, South-East Asia, Americas, Europe, Europe, Americas, Africa, South-East Asia ... Africa}>
df.first 5
Daru::DataFrame(5x7) | |||||||
---|---|---|---|---|---|---|---|
Country | Region | Population | Under15 | Over60 | FertilityRate | LifeExpectancy | |
Eastern Mediterranean | Afghanistan | Eastern Mediterranean | 29825 | 47.42 | 3.82 | 5.4 | 60 |
Europe | Albania | Europe | 3162 | 21.33 | 14.93 | 1.75 | 74 |
Africa | Algeria | Africa | 38482 | 27.42 | 7.17 | 2.83 | 73 |
Europe | Andorra | Europe | 78 | 15.2 | 22.86 | 82 | |
Africa | Angola | Africa | 20821 | 47.58 | 3.84 | 6.1 | 51 |
Say we want to know about regions as a whole. So let's index our dataset by 'Region' vector.
List all regions
df.index.categories
["Eastern Mediterranean", "Europe", "Africa", "Americas", "Western Pacific", "South-East Asia"]
Let's find out how many countries lie in Africa region.
df.row['Africa'].size
46
Finding out the mean life expectancy of europe is as easy as-
df.row['Europe']['LifeExpectancy'].mean
76.73584905660377
Let's see the maximum life expectancy of South-East Asia
df.row['South-East Asia']['LifeExpectancy'].max
77
Set see the countries in Europe that top the list of LIfeExpectancy
df.row['Europe'].sort(['LifeExpectancy'], ascending: false).first 5
Daru::DataFrame(5x7) | |||||||
---|---|---|---|---|---|---|---|
Country | Region | Population | Under15 | Over60 | FertilityRate | LifeExpectancy | |
Europe | San Marino | Europe | 31 | 14.04 | 26.97 | 83 | |
Europe | Switzerland | Europe | 7997 | 14.79 | 23.25 | 1.51 | 83 |
Europe | Andorra | Europe | 78 | 15.2 | 22.86 | 82 | |
Europe | France | Europe | 63937 | 18.26 | 23.82 | 1.98 | 82 |
Europe | Iceland | Europe | 326 | 20.71 | 17.62 | 2.11 | 82 |
Lets see countries in South-East Asia
that have high FertilityRate
df.row['South-East Asia'].sort(['FertilityRate']).row.at -10..-1
Daru::DataFrame(10x7) | |||||||
---|---|---|---|---|---|---|---|
Country | Region | Population | Under15 | Over60 | FertilityRate | LifeExpectancy | |
South-East Asia | Myanmar | South-East Asia | 52797 | 25.28 | 8.15 | 1.98 | 65 |
South-East Asia | Democratic People's Republic of Korea | South-East Asia | 24763 | 21.98 | 12.74 | 2 | 69 |
South-East Asia | Bangladesh | South-East Asia | 155000 | 30.57 | 6.89 | 2.24 | 70 |
South-East Asia | Maldives | South-East Asia | 338 | 29.03 | 6.65 | 2.31 | 77 |
South-East Asia | Bhutan | South-East Asia | 742 | 28.53 | 6.9 | 2.32 | 67 |
South-East Asia | Sri Lanka | South-East Asia | 21098 | 25.15 | 12.4 | 2.35 | 75 |
South-East Asia | Indonesia | South-East Asia | 247000 | 29.27 | 7.86 | 2.4 | 69 |
South-East Asia | Nepal | South-East Asia | 27474 | 35.58 | 7.65 | 2.5 | 68 |
South-East Asia | India | South-East Asia | 1240000 | 29.43 | 8.1 | 2.53 | 65 |
South-East Asia | Timor-Leste | South-East Asia | 1114 | 46.33 | 5.16 | 6.11 | 64 |