In [1]:
from platform import python_version

python_version()
Out[1]:
'3.6.9'
In [2]:
import pandas as pd
import numpy as np

pd.__version__, np.__version__
Out[2]:
('1.0.5', '1.19.0')
In [7]:
df = pd.DataFrame({
    'name': ['alice','bob','charlie','david'],
    'age': [25,26,27,22],
})[['name', 'age']]
df
Out[7]:
name age
0 alice 25
1 bob 26
2 charlie 27
3 david 22

apply example

In [4]:
import pandas as pd

df = pd.DataFrame({
    'name': ['alice','bob','charlie','david'],
    'age': [25,26,27,22],
})[['name', 'age']]

# each element of the age column is a string
# so you can call .upper() on it
df['name_uppercase'] = df['name'].apply(lambda element: element.upper())

df
Out[4]:
name age name_uppercase
0 alice 25 ALICE
1 bob 26 BOB
2 charlie 27 CHARLIE
3 david 22 DAVID

custom function

In [5]:
import pandas as pd

df = pd.DataFrame({
    'name': ['alice','bob','charlie','david'],
    'age': [25,26,27,22],
})[['name', 'age']]


def first_letter(input_str):
    return input_str[:1]

# each element of the age column is a string
# so you can call .upper() on it
df['first_letter'] = df['name'].apply(first_letter)

df
Out[5]:
name age first_letter
0 alice 25 a
1 bob 26 b
2 charlie 27 c
3 david 22 d

Take multiple columns as parameters

In [11]:
import pandas as pd

df = pd.DataFrame({
    'name': ['alice','bob','charlie','david'],
    'age': [25,26,27,22],
})[['name', 'age']]


def concatenate(value_1, value_2):
    return str(value_1)+ "--" + str(value_2) 

# note the use of DOUBLE SQUARE BRACKETS!
df['concatenated'] = df[['name','age']].apply(lambda row: concatenate(row['name'], row['age']) , axis=1)

df
Out[11]:
name age concatenated
0 alice 25 alice--25
1 bob 26 bob--26
2 charlie 27 charlie--27
3 david 22 david--22

Apply function to row

In [16]:
import pandas as pd

df = pd.DataFrame({
    'value1': [1,2,3,4,5],
    'value2': [5,4,3,2,1],
    'value3': [10,20,30,40,50],
    'value4': [99,99,99,99,np.nan],
})

def sum_all(row):
    return np.sum(row)

# note that apply was called on the dataframe itself, not on columns
df['sum_all'] = df.apply(lambda row: sum_all(row), axis=1)

df
Out[16]:
value1 value2 value3 value4 sum_all
0 1 5 10 99.0 115.0
1 2 4 20 99.0 125.0
2 3 3 30 99.0 135.0
3 4 2 40 99.0 145.0
4 5 1 50 NaN 56.0

return multiple values

In [48]:
df = pd.DataFrame({
    'name': ['alice','bob','charlie','david','edward'],
    'age': [25,26,27,22,np.nan],
})[['name', 'age']]
df
Out[48]:
name age
0 alice 25.0
1 bob 26.0
2 charlie 27.0
3 david 22.0
4 edward NaN
In [45]:
import pandas as pd

df = pd.DataFrame({
    'name': ['alice','bob','charlie','david','edward'],
    'age': [25,26,27,22,np.nan],
})[['name', 'age']]

def times_two_times_three(value):
    value_times_2 = value*2
    value_times_3 = value*3

    return pd.Series([value_times_2,value_times_3])

# note that apply was called on age column
df[['times_2','times_3']]= df['age'].apply(times_two_times_three)
df
Out[45]:
name age times_2 times_3
0 alice 25.0 50.0 75.0
1 bob 26.0 52.0 78.0
2 charlie 27.0 54.0 81.0
3 david 22.0 44.0 66.0
4 edward NaN NaN NaN
In [46]:
df
Out[46]:
name age times_2 times_3
0 alice 25.0 50.0 75.0
1 bob 26.0 52.0 78.0
2 charlie 27.0 54.0 81.0
3 david 22.0 44.0 66.0
4 edward NaN NaN NaN
In [ ]: