Short verison:
# import the pandas module
import pandas as pd
# Create an example dataframe about a fictional army
raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'],
'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'],
'deaths': [523, 52, 25, 616, 43, 234, 523, 62, 62, 73, 37, 35],
'battles': [5, 42, 2, 2, 4, 7, 8, 3, 4, 7, 8, 9],
'size': [1045, 957, 1099, 1400, 1592, 1006, 987, 849, 973, 1005, 1099, 1523],
'veterans': [1, 5, 62, 26, 73, 37, 949, 48, 48, 435, 63, 345],
'readiness': [1, 2, 3, 3, 2, 1, 2, 3, 2, 1, 2, 3],
'armored': [1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1],
'deserters': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],
'origin': ['Arizona', 'California', 'Texas', 'Florida', 'Maine', 'Iowa', 'Alaska', 'Washington', 'Oregon', 'Wyoming', 'Louisana', 'Georgia']}
df = pd.DataFrame(raw_data, columns = ['regiment', 'company', 'deaths', 'battles', 'size', 'veterans', 'readiness', 'armored', 'deserters', 'origin'])
df = df.set_index('origin')
df.head()
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
Arizona | Nighthawks | 1st | 523 | 5 | 1045 | 1 | 1 | 1 | 4 |
California | Nighthawks | 1st | 52 | 42 | 957 | 5 | 2 | 0 | 24 |
Texas | Nighthawks | 2nd | 25 | 2 | 1099 | 62 | 3 | 1 | 31 |
Florida | Nighthawks | 2nd | 616 | 2 | 1400 | 26 | 3 | 1 | 2 |
Maine | Dragoons | 1st | 43 | 4 | 1592 | 73 | 2 | 0 | 3 |
df['size']
origin Arizona 1045 California 957 Texas 1099 Florida 1400 Maine 1592 Iowa 1006 Alaska 987 Washington 849 Oregon 973 Wyoming 1005 Louisana 1099 Georgia 1523 Name: size, dtype: int64
df[['size', 'veterans']]
size | veterans | |
---|---|---|
origin | ||
Arizona | 1045 | 1 |
California | 957 | 5 |
Texas | 1099 | 62 |
Florida | 1400 | 26 |
Maine | 1592 | 73 |
Iowa | 1006 | 37 |
Alaska | 987 | 949 |
Washington | 849 | 48 |
Oregon | 973 | 48 |
Wyoming | 1005 | 435 |
Louisana | 1099 | 63 |
Georgia | 1523 | 345 |
# Select all rows with the index label "Arizona"
df.loc[:'Arizona']
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
Arizona | Nighthawks | 1st | 523 | 5 | 1045 | 1 | 1 | 1 | 4 |
# Select every row up to 3
df.iloc[:2]
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
Arizona | Nighthawks | 1st | 523 | 5 | 1045 | 1 | 1 | 1 | 4 |
California | Nighthawks | 1st | 52 | 42 | 957 | 5 | 2 | 0 | 24 |
# Select the second and third row
df.iloc[1:2]
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
California | Nighthawks | 1st | 52 | 42 | 957 | 5 | 2 | 0 | 24 |
# Select every row after the third row
df.iloc[2:]
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
Texas | Nighthawks | 2nd | 25 | 2 | 1099 | 62 | 3 | 1 | 31 |
Florida | Nighthawks | 2nd | 616 | 2 | 1400 | 26 | 3 | 1 | 2 |
Maine | Dragoons | 1st | 43 | 4 | 1592 | 73 | 2 | 0 | 3 |
Iowa | Dragoons | 1st | 234 | 7 | 1006 | 37 | 1 | 1 | 4 |
Alaska | Dragoons | 2nd | 523 | 8 | 987 | 949 | 2 | 0 | 24 |
Washington | Dragoons | 2nd | 62 | 3 | 849 | 48 | 3 | 1 | 31 |
Oregon | Scouts | 1st | 62 | 4 | 973 | 48 | 2 | 0 | 2 |
Wyoming | Scouts | 1st | 73 | 7 | 1005 | 435 | 1 | 0 | 3 |
Louisana | Scouts | 2nd | 37 | 8 | 1099 | 63 | 2 | 1 | 2 |
Georgia | Scouts | 2nd | 35 | 9 | 1523 | 345 | 3 | 1 | 3 |
# Select the first 2 columns
df.iloc[:,:2]
regiment | company | |
---|---|---|
origin | ||
Arizona | Nighthawks | 1st |
California | Nighthawks | 1st |
Texas | Nighthawks | 2nd |
Florida | Nighthawks | 2nd |
Maine | Dragoons | 1st |
Iowa | Dragoons | 1st |
Alaska | Dragoons | 2nd |
Washington | Dragoons | 2nd |
Oregon | Scouts | 1st |
Wyoming | Scouts | 1st |
Louisana | Scouts | 2nd |
Georgia | Scouts | 2nd |
# Select rows where df.deaths is greater than 50
df[df['deaths'] > 50]
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
Arizona | Nighthawks | 1st | 523 | 5 | 1045 | 1 | 1 | 1 | 4 |
California | Nighthawks | 1st | 52 | 42 | 957 | 5 | 2 | 0 | 24 |
Florida | Nighthawks | 2nd | 616 | 2 | 1400 | 26 | 3 | 1 | 2 |
Iowa | Dragoons | 1st | 234 | 7 | 1006 | 37 | 1 | 1 | 4 |
Alaska | Dragoons | 2nd | 523 | 8 | 987 | 949 | 2 | 0 | 24 |
Washington | Dragoons | 2nd | 62 | 3 | 849 | 48 | 3 | 1 | 31 |
Oregon | Scouts | 1st | 62 | 4 | 973 | 48 | 2 | 0 | 2 |
Wyoming | Scouts | 1st | 73 | 7 | 1005 | 435 | 1 | 0 | 3 |
# Select rows where df.deaths is greater than 500 or less than 50
df[(df['deaths'] > 500) | (df['deaths'] < 50)]
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
Arizona | Nighthawks | 1st | 523 | 5 | 1045 | 1 | 1 | 1 | 4 |
Texas | Nighthawks | 2nd | 25 | 2 | 1099 | 62 | 3 | 1 | 31 |
Florida | Nighthawks | 2nd | 616 | 2 | 1400 | 26 | 3 | 1 | 2 |
Maine | Dragoons | 1st | 43 | 4 | 1592 | 73 | 2 | 0 | 3 |
Alaska | Dragoons | 2nd | 523 | 8 | 987 | 949 | 2 | 0 | 24 |
Louisana | Scouts | 2nd | 37 | 8 | 1099 | 63 | 2 | 1 | 2 |
Georgia | Scouts | 2nd | 35 | 9 | 1523 | 345 | 3 | 1 | 3 |
# Select all the regiments not named "Dragoons"
df[~(df['regiment'] == 'Dragoons')]
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
origin | |||||||||
Arizona | Nighthawks | 1st | 523 | 5 | 1045 | 1 | 1 | 1 | 4 |
California | Nighthawks | 1st | 52 | 42 | 957 | 5 | 2 | 0 | 24 |
Texas | Nighthawks | 2nd | 25 | 2 | 1099 | 62 | 3 | 1 | 31 |
Florida | Nighthawks | 2nd | 616 | 2 | 1400 | 26 | 3 | 1 | 2 |
Oregon | Scouts | 1st | 62 | 4 | 973 | 48 | 2 | 0 | 2 |
Wyoming | Scouts | 1st | 73 | 7 | 1005 | 435 | 1 | 0 | 3 |
Louisana | Scouts | 2nd | 37 | 8 | 1099 | 63 | 2 | 1 | 2 |
Georgia | Scouts | 2nd | 35 | 9 | 1523 | 345 | 3 | 1 | 3 |
.ix is the combination of both .loc and .iloc. Integers are first considered labels, but if not found, falls back on positional indexing
# Select the rows called Texas and Arizona
df.ix[['Arizona', 'Texas']]
regiment | company | deaths | battles | size | veterans | readiness | armored | deserters | |
---|---|---|---|---|---|---|---|---|---|
Arizona | Nighthawks | 1st | 523 | 5 | 1045 | 1 | 1 | 1 | 4 |
Texas | Nighthawks | 2nd | 25 | 2 | 1099 | 62 | 3 | 1 | 31 |
# Select the third cell in the row named Arizona
df.ix['Arizona', 'deaths']
523
# Select the third cell in the row named Arizona
df.ix['Arizona', 2]
523
# Select the third cell down in the column named deaths
df.ix[2, 'deaths']
25