import pandas as pd
pd.__version__
'0.22.0'
New in 0.18.1
# create an example DataFrame
df = pd.DataFrame([[12, 25, 2017, 10], [1, 15, 2018, 11]],
columns=['month', 'day', 'year', 'hour'])
df
month | day | year | hour | |
---|---|---|---|---|
0 | 12 | 25 | 2017 | 10 |
1 | 1 | 15 | 2018 | 11 |
# new: create a datetime column from the entire DataFrame
pd.to_datetime(df)
0 2017-12-25 10:00:00 1 2018-01-15 11:00:00 dtype: datetime64[ns]
# new: create a datetime column from a subset of columns
pd.to_datetime(df[['month', 'day', 'year']])
0 2017-12-25 1 2018-01-15 dtype: datetime64[ns]
# overwrite the index
df.index = pd.to_datetime(df[['month', 'day', 'year']])
df
month | day | year | hour | |
---|---|---|---|---|
2017-12-25 | 12 | 25 | 2017 | 10 |
2018-01-15 | 1 | 15 | 2018 | 11 |
New in 0.19.0
# read the drinks dataset into a DataFrame
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()
country | beer_servings | spirit_servings | wine_servings | total_litres_of_pure_alcohol | continent | |
---|---|---|---|---|---|---|
0 | Afghanistan | 0 | 0 | 0 | 0.0 | Asia |
1 | Albania | 89 | 132 | 54 | 4.9 | Europe |
2 | Algeria | 25 | 0 | 14 | 0.7 | Africa |
3 | Andorra | 245 | 138 | 312 | 12.4 | Europe |
4 | Angola | 217 | 57 | 45 | 5.9 | Africa |
# data types are automatically detected
drinks.dtypes
country object beer_servings int64 spirit_servings int64 wine_servings int64 total_litres_of_pure_alcohol float64 continent object dtype: object
# old way to create a category (after file reading)
drinks['continent'] = drinks.continent.astype('category')
drinks.dtypes
country object beer_servings int64 spirit_servings int64 wine_servings int64 total_litres_of_pure_alcohol float64 continent category dtype: object
# new way to create a category (during file reading)
drinks = pd.read_csv('http://bit.ly/drinksbycountry', dtype={'continent':'category'})
drinks.dtypes
country object beer_servings int64 spirit_servings int64 wine_servings int64 total_litres_of_pure_alcohol float64 continent category dtype: object
New in 0.19.0
# read the drinks dataset into a DataFrame
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.dtypes
country object beer_servings int64 spirit_servings int64 wine_servings int64 total_litres_of_pure_alcohol float64 continent object dtype: object
# old way to convert data types (one at a time)
drinks['beer_servings'] = drinks.beer_servings.astype('float')
drinks['spirit_servings'] = drinks.spirit_servings.astype('float')
drinks.dtypes
country object beer_servings float64 spirit_servings float64 wine_servings int64 total_litres_of_pure_alcohol float64 continent object dtype: object
# new way to convert data types (all at once)
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks = drinks.astype({'beer_servings':'float', 'spirit_servings':'float'})
drinks.dtypes
country object beer_servings float64 spirit_servings float64 wine_servings int64 total_litres_of_pure_alcohol float64 continent object dtype: object
New in 0.20.0
# example of a single aggregation function after a groupby
drinks.groupby('continent').beer_servings.mean()
continent Africa 61.471698 Asia 37.045455 Europe 193.777778 North America 145.434783 Oceania 89.687500 South America 175.083333 Name: beer_servings, dtype: float64
# multiple aggregation functions can be applied simultaneously
drinks.groupby('continent').beer_servings.agg(['mean', 'min', 'max'])
mean | min | max | |
---|---|---|---|
continent | |||
Africa | 61.471698 | 0.0 | 376.0 |
Asia | 37.045455 | 0.0 | 247.0 |
Europe | 193.777778 | 0.0 | 361.0 |
North America | 145.434783 | 1.0 | 285.0 |
Oceania | 89.687500 | 0.0 | 306.0 |
South America | 175.083333 | 93.0 | 333.0 |
# new: apply the same aggregations to a Series
drinks.beer_servings.agg(['mean', 'min', 'max'])
mean 106.160622 min 0.000000 max 376.000000 Name: beer_servings, dtype: float64
# new: apply the same aggregations to a DataFrame
drinks.agg(['mean', 'min', 'max'])
country | beer_servings | spirit_servings | wine_servings | total_litres_of_pure_alcohol | continent | |
---|---|---|---|---|---|---|
max | Zimbabwe | 376.000000 | 438.000000 | 370.000000 | 14.400000 | South America |
mean | NaN | 106.160622 | 80.994819 | 49.450777 | 4.717098 | NaN |
min | Afghanistan | 0.000000 | 0.000000 | 0.000000 | 0.000000 | Africa |
# DataFrame describe method provides similar functionality but is less flexible
drinks.describe()
beer_servings | spirit_servings | wine_servings | total_litres_of_pure_alcohol | |
---|---|---|---|---|
count | 193.000000 | 193.000000 | 193.000000 | 193.000000 |
mean | 106.160622 | 80.994819 | 49.450777 | 4.717098 |
std | 101.143103 | 88.284312 | 79.697598 | 3.773298 |
min | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
25% | 20.000000 | 4.000000 | 1.000000 | 1.300000 |
50% | 76.000000 | 56.000000 | 8.000000 | 4.200000 |
75% | 188.000000 | 128.000000 | 59.000000 | 7.200000 |
max | 376.000000 | 438.000000 | 370.000000 | 14.400000 |