In [37]:
import pandas as pd
import numpy as np
from platform import python_version
In [38]:
python_version(), pd.__version__, np.__version__
Out[38]:
('3.6.9', '1.0.5', '1.19.0')

loc

locate by label

In [3]:
df_default_index = pd.DataFrame({
    'name':['john','mary','peter','nancy','gary'],
    'age':[22,33,27,22,31],
    'state':['AK','DC','CA','CA','NY']
})
df_default_index
Out[3]:
name age state
0 john 22 AK
1 mary 33 DC
2 peter 27 CA
3 nancy 22 CA
4 gary 31 NY
In [4]:
df_default_index.loc[[0]]
Out[4]:
name age state
0 john 22 AK
In [5]:
df_default_index.loc[[2,3]]
Out[5]:
name age state
2 peter 27 CA
3 nancy 22 CA
In [6]:
df_name_index = pd.DataFrame(
    index=['john','mary','peter','nancy','gary'],
    data={
        'age':[22,33,27,22,31],
        'state':['AK','DC','CA','CA','NY']
    }
)
In [7]:
df_name_index
Out[7]:
age state
john 22 AK
mary 33 DC
peter 27 CA
nancy 22 CA
gary 31 NY
In [8]:
df_name_index.loc[['peter']]
Out[8]:
age state
peter 27 CA
In [9]:
df_name_index.loc[['john']]
Out[9]:
age state
john 22 AK

iloc

locate by position

In [10]:
df_name_index.iloc[[0]]
Out[10]:
age state
john 22 AK
In [11]:
df_name_index.iloc[[2,3,4]]
Out[11]:
age state
peter 27 CA
nancy 22 CA
gary 31 NY
In [12]:
df_default_index.iloc[[0]]
Out[12]:
name age state
0 john 22 AK
In [13]:
df_default_index.iloc[[2,3,4]]
Out[13]:
name age state
2 peter 27 CA
3 nancy 22 CA
4 gary 31 NY

set value to individual cell

must use loc

In [14]:
df_set = pd.DataFrame({
    'name':['john','mary','peter','nancy','gary'],
    'age':[22,33,27,22,31],
    'state':['AK','DC','CA','CA','NY']
})
df_set
Out[14]:
name age state
0 john 22 AK
1 mary 33 DC
2 peter 27 CA
3 nancy 22 CA
4 gary 31 NY
In [15]:
df_set.loc[0,'name'] = 'bartholomew'

df_set.loc[3, 'age'] = 39

df_set
Out[15]:
name age state
0 bartholomew 22 AK
1 mary 33 DC
2 peter 27 CA
3 nancy 39 CA
4 gary 31 NY
In [16]:
df_name_index.loc['john','age'] = 99
df_name_index
Out[16]:
age state
john 99 AK
mary 33 DC
peter 27 CA
nancy 22 CA
gary 31 NY

Use column as index

In [26]:
df = pd.DataFrame({
    'name':['john','mary','peter','nancy','gary'],
    'age':[22,33,27,22,31],
    'state':['AK','DC','CA','CA','NY']
})
df
Out[26]:
name age state
0 john 22 AK
1 mary 33 DC
2 peter 27 CA
3 nancy 22 CA
4 gary 31 NY
In [30]:
df.set_index('name', verify_integrity=True)
Out[30]:
age state
name
john 22 AK
mary 33 DC
peter 27 CA
nancy 22 CA
gary 31 NY

Set multiple values

In [63]:
df = pd.DataFrame({
    'name':['john','mary','peter','nancy','gary'],
    'age':[22,33,27,22,31],
    'state':['AK','DC','CA','CA','NY'],
    'lives_in_ca': [False,False,False,False,False]
})
df
Out[63]:
name age state lives_in_ca
0 john 22 AK False
1 mary 33 DC False
2 peter 27 CA False
3 nancy 22 CA False
4 gary 31 NY False
In [64]:
index = df[df['state']=='CA'].index
In [65]:
df.loc[index,'lives_in_ca'] = True
df
Out[65]:
name age state lives_in_ca
0 john 22 AK False
1 mary 33 DC False
2 peter 27 CA True
3 nancy 22 CA True
4 gary 31 NY False

settingwithcopywarning

In [19]:
df = pd.DataFrame({
    'name':['john','mary','peter','nancy','gary'],
    'age':[22,33,27,22,31],
    'state':['AK','DC','CA','CA','NY']
})

bad:

In [20]:
df_over_30_years = df[df['age']>30]
df_over_30_years
Out[20]:
name age state
1 mary 33 DC
4 gary 31 NY
In [21]:
df_over_30_years['new_column'] = 'some_value'
/home/felipe/jekyll-utils/jekyll-venv/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.

good:

In [22]:
df_over_30_years = df.copy()[df['age']>30]
df_over_30_years
Out[22]:
name age state
1 mary 33 DC
4 gary 31 NY
In [23]:
df_over_30_years['new_column'] = 'some_value'
In [24]:
df_over_30_years
Out[24]:
name age state new_column
1 mary 33 DC some_value
4 gary 31 NY some_value
In [ ]: