import pandas as pd
from pandas import DataFrame, Series
data_dict = {'name': ['Caleb','Whitney','Hastin','Rafe'],
'age': [27,26,3,1],
'sex': ['M','F','M','M']}
frame = DataFrame(data_dict)
frame
age | name | sex | |
---|---|---|---|
0 | 27 | Caleb | M |
1 | 26 | Whitney | F |
2 | 3 | Hastin | M |
3 | 1 | Rafe | M |
frame[frame.age > 18]
age | name | sex | |
---|---|---|---|
0 | 27 | Caleb | M |
1 | 26 | Whitney | F |
print "Average age:", frame.age.mean()
print "Age standard deviation:", frame.age.std()
Average age: 14.25 Age standard deviation: 14.174507634
frame.name # Selecting a column returns a Series
0 Caleb 1 Whitney 2 Hastin 3 Rafe Name: name, dtype: object
frame.ix[[0,1]] # Selecting by index
age | name | sex | |
---|---|---|---|
0 | 27 | Caleb | M |
1 | 26 | Whitney | F |
frame.ix[2] # Selecting just 1 row returns a Series
age 3 name Hastin sex M Name: 2, dtype: object
frame['state'] = 'Wisconsin'
frame['country'] = 'USA'
frame
age | name | sex | state | country | |
---|---|---|---|---|---|
0 | 27 | Caleb | M | Wisconsin | USA |
1 | 26 | Whitney | F | Wisconsin | USA |
2 | 3 | Hastin | M | Wisconsin | USA |
3 | 1 | Rafe | M | Wisconsin | USA |
frame.drop('country', axis=1) # Creates a copy, doesn't change frame
age | name | sex | state | |
---|---|---|---|---|
0 | 27 | Caleb | M | Wisconsin |
1 | 26 | Whitney | F | Wisconsin |
2 | 3 | Hastin | M | Wisconsin |
3 | 1 | Rafe | M | Wisconsin |
frame = frame.append({'age': 22, 'name': 'Wes', 'sex': 'M'}, ignore_index=True)
frame
age | name | sex | state | country | |
---|---|---|---|---|---|
0 | 27 | Caleb | M | Wisconsin | USA |
1 | 26 | Whitney | F | Wisconsin | USA |
2 | 3 | Hastin | M | Wisconsin | USA |
3 | 1 | Rafe | M | Wisconsin | USA |
4 | 22 | Wes | M | NaN | NaN |
frame = frame.drop('country', axis=1)
frame
age | name | sex | state | |
---|---|---|---|---|
0 | 27 | Caleb | M | Wisconsin |
1 | 26 | Whitney | F | Wisconsin |
2 | 3 | Hastin | M | Wisconsin |
3 | 1 | Rafe | M | Wisconsin |
4 | 22 | Wes | M | NaN |
frame.ix[4]['state'] = 'Illinois'
frame
age | name | sex | state | |
---|---|---|---|---|
0 | 27 | Caleb | M | Wisconsin |
1 | 26 | Whitney | F | Wisconsin |
2 | 3 | Hastin | M | Wisconsin |
3 | 1 | Rafe | M | Wisconsin |
4 | 22 | Wes | M | Illinois |
frame.pivot(index='name', columns='state', values='age')
state | Illinois | Wisconsin |
---|---|---|
name | ||
Caleb | NaN | 27 |
Hastin | NaN | 3 |
Rafe | NaN | 1 |
Wes | 22 | NaN |
Whitney | NaN | 26 |
frame.ix[[2,3], ['name','age']]
name | age | |
---|---|---|
2 | Hastin | 3 |
3 | Rafe | 1 |
frame
age | name | sex | state | |
---|---|---|---|---|
0 | 27 | Caleb | M | Wisconsin |
1 | 26 | Whitney | F | Wisconsin |
2 | 3 | Hastin | M | Wisconsin |
3 | 1 | Rafe | M | Wisconsin |
4 | 22 | Wes | M | Illinois |
frame.ix[:, ['name','age','sex','state']] # Nice way to do column reindexing
name | age | sex | state | |
---|---|---|---|---|
0 | Caleb | 27 | M | Wisconsin |
1 | Whitney | 26 | F | Wisconsin |
2 | Hastin | 3 | M | Wisconsin |
3 | Rafe | 1 | M | Wisconsin |
4 | Wes | 22 | M | Illinois |