In [5]:
#create DataFrame
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
df = DataFrame(
    {'integer':[1,2,3,6,7,23,8,3],
     'float':[2,3.4,5,6,2,4.7,4,8],
     'string':['saya',None,'aku','cinta','kamu','a','b','jika']}
)
In [6]:
#show data in DataFrame
df
Out[6]:
float integer string
0 2.0 1 saya
1 3.4 2 None
2 5.0 3 aku
3 6.0 6 cinta
4 2.0 7 kamu
5 4.7 23 a
6 4.0 8 b
7 8.0 3 jika
In [7]:
#Show data based on columns selected by index
df.ix[:,['string','float']]
Out[7]:
string float
0 saya 2.0
1 None 3.4
2 aku 5.0
3 cinta 6.0
4 kamu 2.0
5 a 4.7
6 b 4.0
7 jika 8.0
In [8]:
#show data based columns selected
df[['string','float']]
Out[8]:
string float
0 saya 2.0
1 None 3.4
2 aku 5.0
3 cinta 6.0
4 kamu 2.0
5 a 4.7
6 b 4.0
7 jika 8.0
In [9]:
#show data with condition
df[df['float']>4]
Out[9]:
float integer string
2 5.0 3 aku
3 6.0 6 cinta
5 4.7 23 a
7 8.0 3 jika
In [10]:
#rename columns in DataFrame
df2 = df.rename(columns={'string':'characters'})
In [11]:
#Show DataFrame after rename column
df2
Out[11]:
float integer characters
0 2.0 1 saya
1 3.4 2 None
2 5.0 3 aku
3 6.0 6 cinta
4 2.0 7 kamu
5 4.7 23 a
6 4.0 8 b
7 8.0 3 jika
In [12]:
#Drop NA value in DataFrame
df2.dropna()
Out[12]:
float integer characters
0 2.0 1 saya
2 5.0 3 aku
3 6.0 6 cinta
4 2.0 7 kamu
5 4.7 23 a
6 4.0 8 b
7 8.0 3 jika
In [13]:
#Show mean, median, and maximum column in Data Frame
mean = df2['float'].mean()
print "mean %f" % mean
median = df2['float'].median()
print "median %f" %median
max = df2['float'].max()
print "max %f" % max
mean 4.387500
median 4.350000
max 8.000000
In [ ]: