import pandas as pd
import numpy as np
from IPython.display import display
a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
"bar", "bar", "foo", "foo", "foo"], dtype=object)
b = np.array(["one", "one", "one", "two", "one", "one",
"one", "two", "two", "two", "one"], dtype=object)
c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
"shiny", "dull", "shiny", "shiny", "shiny"],
dtype=object)
pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
b | one | two | ||
---|---|---|---|---|
c | dull | shiny | dull | shiny |
a | ||||
bar | 1 | 2 | 1 | 0 |
foo | 2 | 2 | 1 | 2 |
useful for obtaining a numeric representation of an array when all that matters is identifying distinct values
series = pd.Series([1,2,3, 'a', 'a', 1])
codes, labels = series.factorize()
codes
array([0, 1, 2, 3, 3, 0])
labels
Index([1, 2, 3, 'a'], dtype='object')
df =pd.DataFrame({'v': [
[1,2],
['a', 'b']
]})
df
v | |
---|---|
0 | [1, 2] |
1 | [a, b] |
df.explode('v')
v | |
---|---|
0 | 1 |
0 | 2 |
1 | a |
1 | b |
df.v.explode().to_frame()
v | |
---|---|
0 | 1 |
0 | 2 |
1 | a |
1 | b |
df = pd.DataFrame({
'foo': [10,11,12,13],
'bar': [1,2,3,4]
},index=[1,2,3,4])
df
foo | bar | |
---|---|---|
1 | 10 | 1 |
2 | 11 | 2 |
3 | 12 | 3 |
4 | 13 | 4 |
df.stack().to_frame()
0 | ||
---|---|---|
1 | foo | 10 |
bar | 1 | |
2 | foo | 11 |
bar | 2 | |
3 | foo | 12 |
bar | 3 | |
4 | foo | 13 |
bar | 4 |
df.stack().unstack()
foo | bar | |
---|---|---|
1 | 10 | 1 |
2 | 11 | 2 |
3 | 12 | 3 |
4 | 13 | 4 |