In [24]:
import pandas as pd
from pandas import DataFrame, Series
In [25]:
data_dict = {'name': ['Caleb','Whitney','Hastin','Rafe'], 
             'age': [27,26,3,1],
             'sex': ['M','F','M','M']}
In [26]:
frame = DataFrame(data_dict)
frame
Out[26]:
age name sex
0 27 Caleb M
1 26 Whitney F
2 3 Hastin M
3 1 Rafe M
In [27]:
frame[frame.age > 18]
Out[27]:
age name sex
0 27 Caleb M
1 26 Whitney F
In [28]:
print "Average age:", frame.age.mean()
print "Age standard deviation:", frame.age.std()
Average age: 14.25
Age standard deviation: 14.174507634
In [29]:
frame.name # Selecting a column returns a Series
Out[29]:
0      Caleb
1    Whitney
2     Hastin
3       Rafe
Name: name, dtype: object
In [30]:
frame.ix[[0,1]] # Selecting by index
Out[30]:
age name sex
0 27 Caleb M
1 26 Whitney F
In [31]:
frame.ix[2] # Selecting just 1 row returns a Series
Out[31]:
age          3
name    Hastin
sex          M
Name: 2, dtype: object
In [32]:
frame['state'] = 'Wisconsin'
frame['country'] = 'USA'
frame
Out[32]:
age name sex state country
0 27 Caleb M Wisconsin USA
1 26 Whitney F Wisconsin USA
2 3 Hastin M Wisconsin USA
3 1 Rafe M Wisconsin USA
In [33]:
frame.drop('country', axis=1) # Creates a copy, doesn't change frame
Out[33]:
age name sex state
0 27 Caleb M Wisconsin
1 26 Whitney F Wisconsin
2 3 Hastin M Wisconsin
3 1 Rafe M Wisconsin
In [34]:
frame = frame.append({'age': 22, 'name': 'Wes', 'sex': 'M'}, ignore_index=True)
frame
Out[34]:
age name sex state country
0 27 Caleb M Wisconsin USA
1 26 Whitney F Wisconsin USA
2 3 Hastin M Wisconsin USA
3 1 Rafe M Wisconsin USA
4 22 Wes M NaN NaN
In [35]:
frame = frame.drop('country', axis=1)
frame
Out[35]:
age name sex state
0 27 Caleb M Wisconsin
1 26 Whitney F Wisconsin
2 3 Hastin M Wisconsin
3 1 Rafe M Wisconsin
4 22 Wes M NaN
In [36]:
frame.ix[4]['state'] = 'Illinois'
frame
Out[36]:
age name sex state
0 27 Caleb M Wisconsin
1 26 Whitney F Wisconsin
2 3 Hastin M Wisconsin
3 1 Rafe M Wisconsin
4 22 Wes M Illinois
In [37]:
frame.pivot(index='name', columns='state', values='age')
Out[37]:
state Illinois Wisconsin
name
Caleb NaN 27
Hastin NaN 3
Rafe NaN 1
Wes 22 NaN
Whitney NaN 26
In [38]:
frame.ix[[2,3], ['name','age']]
Out[38]:
name age
2 Hastin 3
3 Rafe 1
In [39]:
frame
Out[39]:
age name sex state
0 27 Caleb M Wisconsin
1 26 Whitney F Wisconsin
2 3 Hastin M Wisconsin
3 1 Rafe M Wisconsin
4 22 Wes M Illinois
In [40]:
frame.ix[:, ['name','age','sex','state']] # Nice way to do column reindexing
Out[40]:
name age sex state
0 Caleb 27 M Wisconsin
1 Whitney 26 F Wisconsin
2 Hastin 3 M Wisconsin
3 Rafe 1 M Wisconsin
4 Wes 22 M Illinois