import pandas as pd
import numpy as np
from platform import python_version
python_version(), pd.__version__, np.__version__
('3.6.9', '1.0.5', '1.19.0')
locate by label
df_default_index = pd.DataFrame({
'name':['john','mary','peter','nancy','gary'],
'age':[22,33,27,22,31],
'state':['AK','DC','CA','CA','NY']
})
df_default_index
name | age | state | |
---|---|---|---|
0 | john | 22 | AK |
1 | mary | 33 | DC |
2 | peter | 27 | CA |
3 | nancy | 22 | CA |
4 | gary | 31 | NY |
df_default_index.loc[[0]]
name | age | state | |
---|---|---|---|
0 | john | 22 | AK |
df_default_index.loc[[2,3]]
name | age | state | |
---|---|---|---|
2 | peter | 27 | CA |
3 | nancy | 22 | CA |
df_name_index = pd.DataFrame(
index=['john','mary','peter','nancy','gary'],
data={
'age':[22,33,27,22,31],
'state':['AK','DC','CA','CA','NY']
}
)
df_name_index
age | state | |
---|---|---|
john | 22 | AK |
mary | 33 | DC |
peter | 27 | CA |
nancy | 22 | CA |
gary | 31 | NY |
df_name_index.loc[['peter']]
age | state | |
---|---|---|
peter | 27 | CA |
df_name_index.loc[['john']]
age | state | |
---|---|---|
john | 22 | AK |
locate by position
df_name_index.iloc[[0]]
age | state | |
---|---|---|
john | 22 | AK |
df_name_index.iloc[[2,3,4]]
age | state | |
---|---|---|
peter | 27 | CA |
nancy | 22 | CA |
gary | 31 | NY |
df_default_index.iloc[[0]]
name | age | state | |
---|---|---|---|
0 | john | 22 | AK |
df_default_index.iloc[[2,3,4]]
name | age | state | |
---|---|---|---|
2 | peter | 27 | CA |
3 | nancy | 22 | CA |
4 | gary | 31 | NY |
must use
loc
df_set = pd.DataFrame({
'name':['john','mary','peter','nancy','gary'],
'age':[22,33,27,22,31],
'state':['AK','DC','CA','CA','NY']
})
df_set
name | age | state | |
---|---|---|---|
0 | john | 22 | AK |
1 | mary | 33 | DC |
2 | peter | 27 | CA |
3 | nancy | 22 | CA |
4 | gary | 31 | NY |
df_set.loc[0,'name'] = 'bartholomew'
df_set.loc[3, 'age'] = 39
df_set
name | age | state | |
---|---|---|---|
0 | bartholomew | 22 | AK |
1 | mary | 33 | DC |
2 | peter | 27 | CA |
3 | nancy | 39 | CA |
4 | gary | 31 | NY |
df_name_index.loc['john','age'] = 99
df_name_index
age | state | |
---|---|---|
john | 99 | AK |
mary | 33 | DC |
peter | 27 | CA |
nancy | 22 | CA |
gary | 31 | NY |
df = pd.DataFrame({
'name':['john','mary','peter','nancy','gary'],
'age':[22,33,27,22,31],
'state':['AK','DC','CA','CA','NY']
})
df
name | age | state | |
---|---|---|---|
0 | john | 22 | AK |
1 | mary | 33 | DC |
2 | peter | 27 | CA |
3 | nancy | 22 | CA |
4 | gary | 31 | NY |
df.set_index('name', verify_integrity=True)
age | state | |
---|---|---|
name | ||
john | 22 | AK |
mary | 33 | DC |
peter | 27 | CA |
nancy | 22 | CA |
gary | 31 | NY |
df = pd.DataFrame({
'name':['john','mary','peter','nancy','gary'],
'age':[22,33,27,22,31],
'state':['AK','DC','CA','CA','NY'],
'lives_in_ca': [False,False,False,False,False]
})
df
name | age | state | lives_in_ca | |
---|---|---|---|---|
0 | john | 22 | AK | False |
1 | mary | 33 | DC | False |
2 | peter | 27 | CA | False |
3 | nancy | 22 | CA | False |
4 | gary | 31 | NY | False |
index = df[df['state']=='CA'].index
df.loc[index,'lives_in_ca'] = True
df
name | age | state | lives_in_ca | |
---|---|---|---|---|
0 | john | 22 | AK | False |
1 | mary | 33 | DC | False |
2 | peter | 27 | CA | True |
3 | nancy | 22 | CA | True |
4 | gary | 31 | NY | False |
df = pd.DataFrame({
'name':['john','mary','peter','nancy','gary'],
'age':[22,33,27,22,31],
'state':['AK','DC','CA','CA','NY']
})
df_over_30_years = df[df['age']>30]
df_over_30_years
name | age | state | |
---|---|---|---|
1 | mary | 33 | DC |
4 | gary | 31 | NY |
df_over_30_years['new_column'] = 'some_value'
/home/felipe/jekyll-utils/jekyll-venv/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy """Entry point for launching an IPython kernel.
df_over_30_years = df.copy()[df['age']>30]
df_over_30_years
name | age | state | |
---|---|---|---|
1 | mary | 33 | DC |
4 | gary | 31 | NY |
df_over_30_years['new_column'] = 'some_value'
df_over_30_years
name | age | state | new_column | |
---|---|---|---|---|
1 | mary | 33 | DC | some_value |
4 | gary | 31 | NY | some_value |