import pandas as pd
import numpy as np
values = pd.Series(["apple", "orange", "banana"] * 3)
values
0 apple 1 orange 2 banana 3 apple 4 orange 5 banana 6 apple 7 orange 8 banana dtype: object
values.unique()
array(['apple', 'orange', 'banana'], dtype=object)
values.value_counts()
apple 3 orange 3 banana 3 dtype: int64
df = pd.DataFrame(values, columns = ["fruits"])
df
fruits | |
---|---|
0 | apple |
1 | orange |
2 | banana |
3 | apple |
4 | orange |
5 | banana |
6 | apple |
7 | orange |
8 | banana |
f = df["fruits"].astype("category")
f = f.values
f
[apple, orange, banana, apple, orange, banana, apple, orange, banana] Categories (3, object): [apple, banana, orange]
type(f)
pandas.core.categorical.Categorical
f.categories
Index(['apple', 'banana', 'orange'], dtype='object')
f.codes
array([0, 2, 1, 0, 2, 1, 0, 2, 1], dtype=int8)