from platform import python_version
python_version()
import pandas as pd
import numpy as np
pd.__version__, np.__version__
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
df
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
# each element of the age column is a string
# so you can call .upper() on it
df['name_uppercase'] = df['name'].apply(lambda element: element.upper())
df
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
def first_letter(input_str):
return input_str[:1]
# each element of the age column is a string
# so you can call .upper() on it
df['first_letter'] = df['name'].apply(first_letter)
df
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
def concatenate(value_1, value_2):
return str(value_1)+ "--" + str(value_2)
# note the use of DOUBLE SQUARE BRACKETS!
df['concatenated'] = df[['name','age']].apply(lambda row: concatenate(row['name'], row['age']) , axis=1)
df
import pandas as pd
df = pd.DataFrame({
'value1': [1,2,3,4,5],
'value2': [5,4,3,2,1],
'value3': [10,20,30,40,50],
'value4': [99,99,99,99,np.nan],
})
def sum_all(row):
return np.sum(row)
# note that apply was called on the dataframe itself, not on columns
df['sum_all'] = df.apply(lambda row: sum_all(row), axis=1)
df
df = pd.DataFrame({
'name': ['alice','bob','charlie','david','edward'],
'age': [25,26,27,22,np.nan],
})[['name', 'age']]
df
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david','edward'],
'age': [25,26,27,22,np.nan],
})[['name', 'age']]
def times_two_times_three(value):
value_times_2 = value*2
value_times_3 = value*3
return pd.Series([value_times_2,value_times_3])
# note that apply was called on age column
df[['times_2','times_3']]= df['age'].apply(times_two_times_three)
df
df