import pandas as pd
raw_data = {'patient': [1, 1, 1, 2, 2],
'obs': [1, 2, 3, 1, 2],
'treatment': [0, 1, 0, 1, 0],
'score': ['strong', 'weak', 'normal', 'weak', 'strong']}
df = pd.DataFrame(raw_data, columns = ['patient', 'obs', 'treatment', 'score'])
df
patient | obs | treatment | score | |
---|---|---|---|---|
0 | 1 | 1 | 0 | strong |
1 | 1 | 2 | 1 | weak |
2 | 1 | 3 | 0 | normal |
3 | 2 | 1 | 1 | weak |
4 | 2 | 2 | 0 | strong |
def score_to_numeric(x):
if x=='strong':
return 3
if x=='normal':
return 2
if x=='weak':
return 1
df['score_num'] = df['score'].apply(score_to_numeric)
df
patient | obs | treatment | score | score_num | |
---|---|---|---|---|---|
0 | 1 | 1 | 0 | strong | 3 |
1 | 1 | 2 | 1 | weak | 1 |
2 | 1 | 3 | 0 | normal | 2 |
3 | 2 | 1 | 1 | weak | 1 |
4 | 2 | 2 | 0 | strong | 3 |