In [2]:

import pandas

In [3]:

csvfile = pandas.read_csv("https://pkgstore.datahub.io/core/pharmaceutical-drug-spending/data_csv/data/db46fb3c420e7100e1d2b1f973e2cbcd/data_csv.csv")

In [4]:

csvfile.head()

Out[4]:

	LOCATION	TIME	PC_HEALTHXP	PC_GDP	USD_CAP	FLAG_CODES	TOTAL_SPEND
0	AUS	1971	15.992	0.727	35.720	NaN	462.11
1	AUS	1972	15.091	0.686	36.056	NaN	475.11
2	AUS	1973	15.117	0.681	39.871	NaN	533.47
3	AUS	1974	14.771	0.755	47.559	NaN	652.65
4	AUS	1975	11.849	0.682	47.561	NaN	660.76

In [5]:

type(csvfile)

Out[5]:

pandas.core.frame.DataFrame

In [6]:

csvfile.describe()

Out[6]:

	TIME	PC_HEALTHXP	PC_GDP	USD_CAP	TOTAL_SPEND
count	1036.000000	1036.000000	1036.000000	1036.000000	1036.000000
mean	1996.809846	16.406307	1.170948	295.046989	11765.423118
std	12.498330	6.389064	0.465635	209.152134	34878.286123
min	1970.000000	5.545000	0.207000	3.160000	6.290000
25%	1987.000000	11.516250	0.768250	111.098500	728.180000
50%	1999.000000	14.967500	1.141500	266.332000	2349.570000
75%	2007.000000	20.590500	1.523250	446.109500	7778.180000
max	2016.000000	40.239000	2.797000	1162.399000	373009.910000

In [7]:

csvfile[5:15]

Out[7]:

	LOCATION	TIME	PC_HEALTHXP	PC_GDP	USD_CAP	FLAG_CODES	TOTAL_SPEND
5	AUS	1976	10.920	0.630	46.908	NaN	658.26
6	AUS	1977	10.087	0.613	47.649	NaN	676.23
7	AUS	1978	9.958	0.591	50.799	NaN	729.37
8	AUS	1979	8.981	0.523	49.766	NaN	722.30
9	AUS	1980	9.263	0.540	56.972	NaN	837.03
10	AUS	1981	9.388	0.548	65.390	NaN	976.08
11	AUS	1982	9.248	0.564	67.606	NaN	1026.12
12	AUS	1983	9.589	0.580	74.083	NaN	1138.58
13	AUS	1984	9.465	0.569	77.270	NaN	1201.08
14	AUS	1985	9.560	0.580	84.134	NaN	1325.78

In [8]:

csvfile.query('PC_HEALTHXP < 10')[1:5]

Out[8]:

	LOCATION	TIME	PC_HEALTHXP	PC_GDP	USD_CAP	FLAG_CODES	TOTAL_SPEND
8	AUS	1979	8.981	0.523	49.766	NaN	722.30
9	AUS	1980	9.263	0.540	56.972	NaN	837.03
10	AUS	1981	9.388	0.548	65.390	NaN	976.08
11	AUS	1982	9.248	0.564	67.606	NaN	1026.12

In [9]:

import altair

In [10]:

a = [ 1, 2, 3, 4, 5 ]

for i, v in enumerate(a):
    if i % 2 == 0:
        print(v)

1
3
5

In [11]:

csvfile.head()

Out[11]:

	LOCATION	TIME	PC_HEALTHXP	PC_GDP	USD_CAP	FLAG_CODES	TOTAL_SPEND
0	AUS	1971	15.992	0.727	35.720	NaN	462.11
1	AUS	1972	15.091	0.686	36.056	NaN	475.11
2	AUS	1973	15.117	0.681	39.871	NaN	533.47
3	AUS	1974	14.771	0.755	47.559	NaN	652.65
4	AUS	1975	11.849	0.682	47.561	NaN	660.76

Now we will load the chart and visualise it, because it is a good thing to do!

In [12]:

altair.Chart(csvfile).mark_point().encode(
    x='PC_HEALTHXP',
    y='TOTAL_SPEND'
)

Out[12]:

In [13]:

altair.__version__

Out[13]:

'2.0.0rc2'