# import pandas
import pandas as pd
# create a dataset
raw_data = {'score': [1,2,3],
'tags': [['apple','pear','guava'],['truck','car','plane'],['cat','dog','mouse']]}
df = pd.DataFrame(raw_data, columns = ['score', 'tags'])
# view the dataset
df
score | tags | |
---|---|---|
0 | 1 | [apple, pear, guava] |
1 | 2 | [truck, car, plane] |
2 | 3 | [cat, dog, mouse] |
# expand df.tags into its own dataframe
tags = df['tags'].apply(pd.Series)
# rename each variable is tags
tags = tags.rename(columns = lambda x : 'tag_' + str(x))
# view the tags dataframe
tags
tag_0 | tag_1 | tag_2 | |
---|---|---|---|
0 | apple | pear | guava |
1 | truck | car | plane |
2 | cat | dog | mouse |
# join the tags dataframe back to the original dataframe
pd.concat([df[:], tags[:]], axis=1)
score | tags | tag_0 | tag_1 | tag_2 | |
---|---|---|---|---|---|
0 | 1 | [apple, pear, guava] | apple | pear | guava |
1 | 2 | [truck, car, plane] | truck | car | plane |
2 | 3 | [cat, dog, mouse] | cat | dog | mouse |