In [1]:
import pandas as pd
pd.__version__
Out[1]:
'0.22.0'

1. Create a datetime column from a DataFrame

New in 0.18.1

In [2]:
# create an example DataFrame
df = pd.DataFrame([[12, 25, 2017, 10], [1, 15, 2018, 11]],
                  columns=['month', 'day', 'year', 'hour'])
df
Out[2]:
month day year hour
0 12 25 2017 10
1 1 15 2018 11
In [3]:
# new: create a datetime column from the entire DataFrame
pd.to_datetime(df)
Out[3]:
0   2017-12-25 10:00:00
1   2018-01-15 11:00:00
dtype: datetime64[ns]
In [4]:
# new: create a datetime column from a subset of columns
pd.to_datetime(df[['month', 'day', 'year']])
Out[4]:
0   2017-12-25
1   2018-01-15
dtype: datetime64[ns]
In [5]:
# overwrite the index
df.index = pd.to_datetime(df[['month', 'day', 'year']])
df
Out[5]:
month day year hour
2017-12-25 12 25 2017 10
2018-01-15 1 15 2018 11

2. Create a category column during file reading

New in 0.19.0

In [6]:
# read the drinks dataset into a DataFrame
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()
Out[6]:
country beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol continent
0 Afghanistan 0 0 0 0.0 Asia
1 Albania 89 132 54 4.9 Europe
2 Algeria 25 0 14 0.7 Africa
3 Andorra 245 138 312 12.4 Europe
4 Angola 217 57 45 5.9 Africa
In [7]:
# data types are automatically detected
drinks.dtypes
Out[7]:
country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object
In [8]:
# old way to create a category (after file reading)
drinks['continent'] = drinks.continent.astype('category')
drinks.dtypes
Out[8]:
country                           object
beer_servings                      int64
spirit_servings                    int64
wine_servings                      int64
total_litres_of_pure_alcohol     float64
continent                       category
dtype: object
In [9]:
# new way to create a category (during file reading)
drinks = pd.read_csv('http://bit.ly/drinksbycountry', dtype={'continent':'category'})
drinks.dtypes
Out[9]:
country                           object
beer_servings                      int64
spirit_servings                    int64
wine_servings                      int64
total_litres_of_pure_alcohol     float64
continent                       category
dtype: object

3. Convert the data type of multiple columns at once

New in 0.19.0

In [10]:
# read the drinks dataset into a DataFrame
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.dtypes
Out[10]:
country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object
In [11]:
# old way to convert data types (one at a time)
drinks['beer_servings'] = drinks.beer_servings.astype('float')
drinks['spirit_servings'] = drinks.spirit_servings.astype('float')
drinks.dtypes
Out[11]:
country                          object
beer_servings                   float64
spirit_servings                 float64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object
In [12]:
# new way to convert data types (all at once)
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks = drinks.astype({'beer_servings':'float', 'spirit_servings':'float'})
drinks.dtypes
Out[12]:
country                          object
beer_servings                   float64
spirit_servings                 float64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

4. Apply multiple aggregations on a Series or DataFrame

New in 0.20.0

In [13]:
# example of a single aggregation function after a groupby
drinks.groupby('continent').beer_servings.mean()
Out[13]:
continent
Africa            61.471698
Asia              37.045455
Europe           193.777778
North America    145.434783
Oceania           89.687500
South America    175.083333
Name: beer_servings, dtype: float64
In [14]:
# multiple aggregation functions can be applied simultaneously
drinks.groupby('continent').beer_servings.agg(['mean', 'min', 'max'])
Out[14]:
mean min max
continent
Africa 61.471698 0.0 376.0
Asia 37.045455 0.0 247.0
Europe 193.777778 0.0 361.0
North America 145.434783 1.0 285.0
Oceania 89.687500 0.0 306.0
South America 175.083333 93.0 333.0
In [15]:
# new: apply the same aggregations to a Series
drinks.beer_servings.agg(['mean', 'min', 'max'])
Out[15]:
mean    106.160622
min       0.000000
max     376.000000
Name: beer_servings, dtype: float64
In [16]:
# new: apply the same aggregations to a DataFrame
drinks.agg(['mean', 'min', 'max'])
Out[16]:
country beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol continent
max Zimbabwe 376.000000 438.000000 370.000000 14.400000 South America
mean NaN 106.160622 80.994819 49.450777 4.717098 NaN
min Afghanistan 0.000000 0.000000 0.000000 0.000000 Africa
In [17]:
# DataFrame describe method provides similar functionality but is less flexible
drinks.describe()
Out[17]:
beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol
count 193.000000 193.000000 193.000000 193.000000
mean 106.160622 80.994819 49.450777 4.717098
std 101.143103 88.284312 79.697598 3.773298
min 0.000000 0.000000 0.000000 0.000000
25% 20.000000 4.000000 1.000000 1.300000
50% 76.000000 56.000000 8.000000 4.200000
75% 188.000000 128.000000 59.000000 7.200000
max 376.000000 438.000000 370.000000 14.400000

Bonus: Download the official pandas cheat sheet

New in 0.19.2

Cheat sheet (PDF)