import pandas
csvfile = pandas.read_csv("https://pkgstore.datahub.io/core/pharmaceutical-drug-spending/data_csv/data/db46fb3c420e7100e1d2b1f973e2cbcd/data_csv.csv")
csvfile.head()
LOCATION | TIME | PC_HEALTHXP | PC_GDP | USD_CAP | FLAG_CODES | TOTAL_SPEND | |
---|---|---|---|---|---|---|---|
0 | AUS | 1971 | 15.992 | 0.727 | 35.720 | NaN | 462.11 |
1 | AUS | 1972 | 15.091 | 0.686 | 36.056 | NaN | 475.11 |
2 | AUS | 1973 | 15.117 | 0.681 | 39.871 | NaN | 533.47 |
3 | AUS | 1974 | 14.771 | 0.755 | 47.559 | NaN | 652.65 |
4 | AUS | 1975 | 11.849 | 0.682 | 47.561 | NaN | 660.76 |
type(csvfile)
pandas.core.frame.DataFrame
csvfile.describe()
TIME | PC_HEALTHXP | PC_GDP | USD_CAP | TOTAL_SPEND | |
---|---|---|---|---|---|
count | 1036.000000 | 1036.000000 | 1036.000000 | 1036.000000 | 1036.000000 |
mean | 1996.809846 | 16.406307 | 1.170948 | 295.046989 | 11765.423118 |
std | 12.498330 | 6.389064 | 0.465635 | 209.152134 | 34878.286123 |
min | 1970.000000 | 5.545000 | 0.207000 | 3.160000 | 6.290000 |
25% | 1987.000000 | 11.516250 | 0.768250 | 111.098500 | 728.180000 |
50% | 1999.000000 | 14.967500 | 1.141500 | 266.332000 | 2349.570000 |
75% | 2007.000000 | 20.590500 | 1.523250 | 446.109500 | 7778.180000 |
max | 2016.000000 | 40.239000 | 2.797000 | 1162.399000 | 373009.910000 |
csvfile[5:15]
LOCATION | TIME | PC_HEALTHXP | PC_GDP | USD_CAP | FLAG_CODES | TOTAL_SPEND | |
---|---|---|---|---|---|---|---|
5 | AUS | 1976 | 10.920 | 0.630 | 46.908 | NaN | 658.26 |
6 | AUS | 1977 | 10.087 | 0.613 | 47.649 | NaN | 676.23 |
7 | AUS | 1978 | 9.958 | 0.591 | 50.799 | NaN | 729.37 |
8 | AUS | 1979 | 8.981 | 0.523 | 49.766 | NaN | 722.30 |
9 | AUS | 1980 | 9.263 | 0.540 | 56.972 | NaN | 837.03 |
10 | AUS | 1981 | 9.388 | 0.548 | 65.390 | NaN | 976.08 |
11 | AUS | 1982 | 9.248 | 0.564 | 67.606 | NaN | 1026.12 |
12 | AUS | 1983 | 9.589 | 0.580 | 74.083 | NaN | 1138.58 |
13 | AUS | 1984 | 9.465 | 0.569 | 77.270 | NaN | 1201.08 |
14 | AUS | 1985 | 9.560 | 0.580 | 84.134 | NaN | 1325.78 |
csvfile.query('PC_HEALTHXP < 10')[1:5]
LOCATION | TIME | PC_HEALTHXP | PC_GDP | USD_CAP | FLAG_CODES | TOTAL_SPEND | |
---|---|---|---|---|---|---|---|
8 | AUS | 1979 | 8.981 | 0.523 | 49.766 | NaN | 722.30 |
9 | AUS | 1980 | 9.263 | 0.540 | 56.972 | NaN | 837.03 |
10 | AUS | 1981 | 9.388 | 0.548 | 65.390 | NaN | 976.08 |
11 | AUS | 1982 | 9.248 | 0.564 | 67.606 | NaN | 1026.12 |
import altair
a = [ 1, 2, 3, 4, 5 ]
for i, v in enumerate(a):
if i % 2 == 0:
print(v)
1 3 5
csvfile.head()
LOCATION | TIME | PC_HEALTHXP | PC_GDP | USD_CAP | FLAG_CODES | TOTAL_SPEND | |
---|---|---|---|---|---|---|---|
0 | AUS | 1971 | 15.992 | 0.727 | 35.720 | NaN | 462.11 |
1 | AUS | 1972 | 15.091 | 0.686 | 36.056 | NaN | 475.11 |
2 | AUS | 1973 | 15.117 | 0.681 | 39.871 | NaN | 533.47 |
3 | AUS | 1974 | 14.771 | 0.755 | 47.559 | NaN | 652.65 |
4 | AUS | 1975 | 11.849 | 0.682 | 47.561 | NaN | 660.76 |
Now we will load the chart and visualise it, because it is a good thing to do!
altair.Chart(csvfile).mark_point().encode(
x='PC_HEALTHXP',
y='TOTAL_SPEND'
)
altair.__version__
'2.0.0rc2'