from platform import python_version
python_version()
'3.6.9'
import pandas as pd
import numpy as np
pd.__version__, np.__version__
('1.0.5', '1.19.0')
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
df
name | age | |
---|---|---|
0 | alice | 25 |
1 | bob | 26 |
2 | charlie | 27 |
3 | david | 22 |
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
# each element of the age column is a string
# so you can call .upper() on it
df['name_uppercase'] = df['name'].apply(lambda element: element.upper())
df
name | age | name_uppercase | |
---|---|---|---|
0 | alice | 25 | ALICE |
1 | bob | 26 | BOB |
2 | charlie | 27 | CHARLIE |
3 | david | 22 | DAVID |
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
def first_letter(input_str):
return input_str[:1]
# each element of the age column is a string
# so you can call .upper() on it
df['first_letter'] = df['name'].apply(first_letter)
df
name | age | first_letter | |
---|---|---|---|
0 | alice | 25 | a |
1 | bob | 26 | b |
2 | charlie | 27 | c |
3 | david | 22 | d |
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david'],
'age': [25,26,27,22],
})[['name', 'age']]
def concatenate(value_1, value_2):
return str(value_1)+ "--" + str(value_2)
# note the use of DOUBLE SQUARE BRACKETS!
df['concatenated'] = df[['name','age']].apply(lambda row: concatenate(row['name'], row['age']) , axis=1)
df
name | age | concatenated | |
---|---|---|---|
0 | alice | 25 | alice--25 |
1 | bob | 26 | bob--26 |
2 | charlie | 27 | charlie--27 |
3 | david | 22 | david--22 |
import pandas as pd
df = pd.DataFrame({
'value1': [1,2,3,4,5],
'value2': [5,4,3,2,1],
'value3': [10,20,30,40,50],
'value4': [99,99,99,99,np.nan],
})
def sum_all(row):
return np.sum(row)
# note that apply was called on the dataframe itself, not on columns
df['sum_all'] = df.apply(lambda row: sum_all(row), axis=1)
df
value1 | value2 | value3 | value4 | sum_all | |
---|---|---|---|---|---|
0 | 1 | 5 | 10 | 99.0 | 115.0 |
1 | 2 | 4 | 20 | 99.0 | 125.0 |
2 | 3 | 3 | 30 | 99.0 | 135.0 |
3 | 4 | 2 | 40 | 99.0 | 145.0 |
4 | 5 | 1 | 50 | NaN | 56.0 |
df = pd.DataFrame({
'name': ['alice','bob','charlie','david','edward'],
'age': [25,26,27,22,np.nan],
})[['name', 'age']]
df
name | age | |
---|---|---|
0 | alice | 25.0 |
1 | bob | 26.0 |
2 | charlie | 27.0 |
3 | david | 22.0 |
4 | edward | NaN |
import pandas as pd
df = pd.DataFrame({
'name': ['alice','bob','charlie','david','edward'],
'age': [25,26,27,22,np.nan],
})[['name', 'age']]
def times_two_times_three(value):
value_times_2 = value*2
value_times_3 = value*3
return pd.Series([value_times_2,value_times_3])
# note that apply was called on age column
df[['times_2','times_3']]= df['age'].apply(times_two_times_three)
df
name | age | times_2 | times_3 | |
---|---|---|---|---|
0 | alice | 25.0 | 50.0 | 75.0 |
1 | bob | 26.0 | 52.0 | 78.0 |
2 | charlie | 27.0 | 54.0 | 81.0 |
3 | david | 22.0 | 44.0 | 66.0 |
4 | edward | NaN | NaN | NaN |
df
name | age | times_2 | times_3 | |
---|---|---|---|---|
0 | alice | 25.0 | 50.0 | 75.0 |
1 | bob | 26.0 | 52.0 | 78.0 |
2 | charlie | 27.0 | 54.0 | 81.0 |
3 | david | 22.0 | 44.0 | 66.0 |
4 | edward | NaN | NaN | NaN |