#!/usr/bin/env python # coding: utf-8 # In[37]: import pandas as pd import numpy as np from platform import python_version # In[38]: python_version(), pd.__version__, np.__version__ # ## loc # # > locate by label # In[3]: df_default_index = pd.DataFrame({ 'name':['john','mary','peter','nancy','gary'], 'age':[22,33,27,22,31], 'state':['AK','DC','CA','CA','NY'] }) df_default_index # In[4]: df_default_index.loc[[0]] # In[5]: df_default_index.loc[[2,3]] # In[6]: df_name_index = pd.DataFrame( index=['john','mary','peter','nancy','gary'], data={ 'age':[22,33,27,22,31], 'state':['AK','DC','CA','CA','NY'] } ) # In[7]: df_name_index # In[8]: df_name_index.loc[['peter']] # In[9]: df_name_index.loc[['john']] # ## iloc # # > locate by position # In[10]: df_name_index.iloc[[0]] # In[11]: df_name_index.iloc[[2,3,4]] # In[12]: df_default_index.iloc[[0]] # In[13]: df_default_index.iloc[[2,3,4]] # ## set value to individual cell # # > must use `loc` # In[14]: df_set = pd.DataFrame({ 'name':['john','mary','peter','nancy','gary'], 'age':[22,33,27,22,31], 'state':['AK','DC','CA','CA','NY'] }) df_set # In[15]: df_set.loc[0,'name'] = 'bartholomew' df_set.loc[3, 'age'] = 39 df_set # In[16]: df_name_index.loc['john','age'] = 99 df_name_index # ## Use column as index # In[26]: df = pd.DataFrame({ 'name':['john','mary','peter','nancy','gary'], 'age':[22,33,27,22,31], 'state':['AK','DC','CA','CA','NY'] }) df # In[30]: df.set_index('name', verify_integrity=True) # ## Set multiple values # In[63]: df = pd.DataFrame({ 'name':['john','mary','peter','nancy','gary'], 'age':[22,33,27,22,31], 'state':['AK','DC','CA','CA','NY'], 'lives_in_ca': [False,False,False,False,False] }) df # In[64]: index = df[df['state']=='CA'].index # In[65]: df.loc[index,'lives_in_ca'] = True df # ## settingwithcopywarning # In[19]: df = pd.DataFrame({ 'name':['john','mary','peter','nancy','gary'], 'age':[22,33,27,22,31], 'state':['AK','DC','CA','CA','NY'] }) # ### bad: # In[20]: df_over_30_years = df[df['age']>30] df_over_30_years # In[21]: df_over_30_years['new_column'] = 'some_value' # ### good: # In[22]: df_over_30_years = df.copy()[df['age']>30] df_over_30_years # In[23]: df_over_30_years['new_column'] = 'some_value' # In[24]: df_over_30_years # In[ ]: