import pandas as pd
from pandas import Series, DataFrame
obj = pd. Series([1, 3, 5, -7, 9])
obj
0 1 1 3 2 5 3 -7 4 9 dtype: int64
obj.values
array([ 1, 3, 5, -7, 9])
obj.index
RangeIndex(start=0, stop=5, step=1)
obj2 = pd.Series([2,4,6,-8,10], index=['a', 'b', 'c', 'd', 'e'])
obj2
a 2 b 4 c 6 d -8 e 10 dtype: int64
obj2['a']
2
obj2[['b', 'c', 'd']]
b 4 c 6 d -8 dtype: int64
obj2 * 5
a 10 b 20 c 30 d -40 e 50 dtype: int64
obj2[obj2 > 0]
a 2 b 4 c 6 e 10 dtype: int64
obj2[obj2 < 0]
d -8 dtype: int64
obj2[obj2 > 8]
e 10 dtype: int64
import numpy as np
np.exp(obj2)
a 7.389056 b 54.598150 c 403.428793 d 0.000335 e 22026.465795 dtype: float64
population_dict = {'Nordrhein-Westfalen': 17933000, 'Bayern': 13077000,
'Baden-Württemberg': 11070000, 'Niedersachsen': 7982000}
obj3 = pd.Series(population_dict)
obj3
Nordrhein-Westfalen 17933000 Bayern 13077000 Baden-Württemberg 11070000 Niedersachsen 7982000 dtype: int64
obj3['Bayern']
13077000
obj3['Bayern':'Niedersachsen']
Bayern 13077000 Baden-Württemberg 11070000 Niedersachsen 7982000 dtype: int64
states = ['Berlin','Bayern', 'Niedersachsen', 'Baden-Württemberg']
obj4 = pd.Series(population_dict, index=states)
obj4
Berlin NaN Bayern 13077000.0 Niedersachsen 7982000.0 Baden-Württemberg 11070000.0 dtype: float64
pd.isnull(obj4)
Berlin True Bayern False Niedersachsen False Baden-Württemberg False dtype: bool
pd.notnull(obj4)
Berlin False Bayern True Niedersachsen True Baden-Württemberg True dtype: bool
obj4.isnull()
Berlin True Bayern False Niedersachsen False Baden-Württemberg False dtype: bool
obj3
Nordrhein-Westfalen 17933000 Bayern 13077000 Baden-Württemberg 11070000 Niedersachsen 7982000 dtype: int64
obj4
Berlin NaN Bayern 13077000.0 Niedersachsen 7982000.0 Baden-Württemberg 11070000.0 dtype: float64
obj3 + obj4
Baden-Württemberg 22140000.0 Bayern 26154000.0 Berlin NaN Niedersachsen 15964000.0 Nordrhein-Westfalen NaN dtype: float64
obj4.name = 'population'
obj4.index.name = 'state'
obj4
state Berlin NaN Bayern 13077000.0 Niedersachsen 7982000.0 Baden-Württemberg 11070000.0 Name: population, dtype: float64
areacode_dict = {'Nordrhein-Westfalen': 3, 'Bayern': 8, 'Baden-Württemberg': 7, 'Niedersachsen': 3 }
areacode = pd.Series(areacode_dict)
areacode
Nordrhein-Westfalen 3 Bayern 8 Baden-Württemberg 7 Niedersachsen 3 dtype: int64
population = obj3
states = pd.DataFrame({'population': population, 'areacode': areacode})
states
population | areacode | |
---|---|---|
Nordrhein-Westfalen | 17933000 | 3 |
Bayern | 13077000 | 8 |
Baden-Württemberg | 11070000 | 7 |
Niedersachsen | 7982000 | 3 |
states.index
Index(['Nordrhein-Westfalen', 'Bayern', 'Baden-Württemberg', 'Niedersachsen'], dtype='object')
states.columns
Index(['population', 'areacode'], dtype='object')
states['areacode']
Nordrhein-Westfalen 3 Bayern 8 Baden-Württemberg 7 Niedersachsen 3 Name: areacode, dtype: int64
data = [{'x': i, 'y': 4 * i}
for i in range(4)]
pd.DataFrame(data)
x | y | |
---|---|---|
0 | 0 | 0 |
1 | 1 | 4 |
2 | 2 | 8 |
3 | 3 | 12 |
pd.DataFrame(population, columns=['population'])
population | |
---|---|
Nordrhein-Westfalen | 17933000 |
Bayern | 13077000 |
Baden-Württemberg | 11070000 |
Niedersachsen | 7982000 |
pd.DataFrame({'population': population, 'areacode': areacode})
population | areacode | |
---|---|---|
Nordrhein-Westfalen | 17933000 | 3 |
Bayern | 13077000 | 8 |
Baden-Württemberg | 11070000 | 7 |
Niedersachsen | 7982000 | 3 |
pd.DataFrame(np.random.rand(3, 2),
columns=['a', 'b'],
index=['x', 'y', 'z'])
a | b | |
---|---|---|
x | 0.811815 | 0.630333 |
y | 0.480516 | 0.370507 |
z | 0.742164 | 0.799571 |
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
A
array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])
pd.DataFrame(A)
A | B | |
---|---|---|
0 | 0 | 0.0 |
1 | 0 | 0.0 |
2 | 0 | 0.0 |