import pandas as pd
import numpy as np
data = pd.Series(np.random.randn(9),
index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
[1, 2, 3, 1, 3, 1, 2, 2, 3]])
data
a 1 0.528770 2 -1.294225 3 0.656741 b 1 -1.678594 3 1.599596 c 1 0.461214 2 0.321586 d 2 0.850888 3 1.942281 dtype: float64
data.index
MultiIndex(levels=[['a', 'b', 'c', 'd'], [1, 2, 3]], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1, 1, 2]])
data["b"]
1 -1.678594 3 1.599596 dtype: float64
data.unstack()
1 | 2 | 3 | |
---|---|---|---|
a | 0.528770 | -1.294225 | 0.656741 |
b | -1.678594 | NaN | 1.599596 |
c | 0.461214 | 0.321586 | NaN |
d | NaN | 0.850888 | 1.942281 |
data.unstack().index
Index(['a', 'b', 'c', 'd'], dtype='object')
data.unstack().columns
Int64Index([1, 2, 3], dtype='int64')
df = pd.DataFrame(data)
df
0 | ||
---|---|---|
a | 1 | 0.528770 |
2 | -1.294225 | |
3 | 0.656741 | |
b | 1 | -1.678594 |
3 | 1.599596 | |
c | 1 | 0.461214 |
2 | 0.321586 | |
d | 2 | 0.850888 |
3 | 1.942281 |
import pandas as pd
import numpy as np
df1 = pd.DataFrame({"key": ["a", "b", "c", "d", "b", "b", "a"],
"data1": range(7)})
df1
data1 | key | |
---|---|---|
0 | 0 | a |
1 | 1 | b |
2 | 2 | c |
3 | 3 | d |
4 | 4 | b |
5 | 5 | b |
6 | 6 | a |
df2 = pd.DataFrame({"key": ["a", "b", "b", "c"],
'data2': range(4)})
df2
data2 | key | |
---|---|---|
0 | 0 | a |
1 | 1 | b |
2 | 2 | b |
3 | 3 | c |
df3 = pd.merge(df1, df2)
df3
data1 | key | data2 | |
---|---|---|---|
0 | 0 | a | 0 |
1 | 6 | a | 0 |
2 | 1 | b | 1 |
3 | 1 | b | 2 |
4 | 4 | b | 1 |
5 | 4 | b | 2 |
6 | 5 | b | 1 |
7 | 5 | b | 2 |
8 | 2 | c | 3 |
df1.merge(df2)
data1 | key | data2 | |
---|---|---|---|
0 | 0 | a | 0 |
1 | 6 | a | 0 |
2 | 1 | b | 1 |
3 | 1 | b | 2 |
4 | 4 | b | 1 |
5 | 4 | b | 2 |
6 | 5 | b | 1 |
7 | 5 | b | 2 |
8 | 2 | c | 3 |
df1.merge(df2, on = "key", how = "left")
data1 | key | data2 | |
---|---|---|---|
0 | 0 | a | 0.0 |
1 | 1 | b | 1.0 |
2 | 1 | b | 2.0 |
3 | 2 | c | 3.0 |
4 | 3 | d | NaN |
5 | 4 | b | 1.0 |
6 | 4 | b | 2.0 |
7 | 5 | b | 1.0 |
8 | 5 | b | 2.0 |
9 | 6 | a | 0.0 |
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])
s1
a 0 b 1 dtype: int64
s2
c 2 d 3 e 4 dtype: int64
s3
f 5 g 6 dtype: int64
pd.concat([s1, s2, s3])
a 0 b 1 c 2 d 3 e 4 f 5 g 6 dtype: int64
df = pd.DataFrame({'key': ['foo', 'bar', 'baz'],
'A': [1, 2, 3],
'B': [4, 5, 6],
'C': [7, 8, 9]})
df
A | B | C | key | |
---|---|---|---|---|
0 | 1 | 4 | 7 | foo |
1 | 2 | 5 | 8 | bar |
2 | 3 | 6 | 9 | baz |
melted = pd.melt(df, ["key"])
melted
key | variable | value | |
---|---|---|---|
0 | foo | A | 1 |
1 | bar | A | 2 |
2 | baz | A | 3 |
3 | foo | B | 4 |
4 | bar | B | 5 |
5 | baz | B | 6 |
6 | foo | C | 7 |
7 | bar | C | 8 |
8 | baz | C | 9 |
pivoted = melted.pivot(index = "key", values = "value", columns = "variable")
pivoted
variable | A | B | C |
---|---|---|---|
key | |||
bar | 2 | 5 | 8 |
baz | 3 | 6 | 9 |
foo | 1 | 4 | 7 |