#!/usr/bin/env python # coding: utf-8 # # 4 new time-saving tricks in pandas ([video](https://www.youtube.com/watch?v=-NbY7E9hKxk&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=32)) # # - [My pandas video series (30 videos)](http://www.dataschool.io/easier-data-analysis-with-pandas/) # - [GitHub repository](https://github.com/justmarkham/pandas-videos) # - [pandas release notes](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html) # In[1]: import pandas as pd pd.__version__ # ## 1. Create a datetime column from a DataFrame # # *New in 0.18.1* # In[2]: # create an example DataFrame df = pd.DataFrame([[12, 25, 2017, 10], [1, 15, 2018, 11]], columns=['month', 'day', 'year', 'hour']) df # In[3]: # new: create a datetime column from the entire DataFrame pd.to_datetime(df) # In[4]: # new: create a datetime column from a subset of columns pd.to_datetime(df[['month', 'day', 'year']]) # In[5]: # overwrite the index df.index = pd.to_datetime(df[['month', 'day', 'year']]) df # - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#assembling-datetimes) # - [Video: How do I work with dates and times in pandas?](https://www.youtube.com/watch?v=yCgJGsg0Xa4&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=25) # ## 2. Create a category column during file reading # # *New in 0.19.0* # In[6]: # read the drinks dataset into a DataFrame drinks = pd.read_csv('http://bit.ly/drinksbycountry') drinks.head() # In[7]: # data types are automatically detected drinks.dtypes # In[8]: # old way to create a category (after file reading) drinks['continent'] = drinks.continent.astype('category') drinks.dtypes # In[9]: # new way to create a category (during file reading) drinks = pd.read_csv('http://bit.ly/drinksbycountry', dtype={'continent':'category'}) drinks.dtypes # - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#read-csv-supports-parsing-categorical-directly) # - [Video: How do I make my pandas DataFrame smaller and faster?](https://www.youtube.com/watch?v=wDYDYGyN_cw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=21) # ## 3. Convert the data type of multiple columns at once # # *New in 0.19.0* # In[10]: # read the drinks dataset into a DataFrame drinks = pd.read_csv('http://bit.ly/drinksbycountry') drinks.dtypes # In[11]: # old way to convert data types (one at a time) drinks['beer_servings'] = drinks.beer_servings.astype('float') drinks['spirit_servings'] = drinks.spirit_servings.astype('float') drinks.dtypes # In[12]: # new way to convert data types (all at once) drinks = pd.read_csv('http://bit.ly/drinksbycountry') drinks = drinks.astype({'beer_servings':'float', 'spirit_servings':'float'}) drinks.dtypes # - [More information](http://pandas.pydata.org/pandas-docs/stable/basics.html#astype) # - [Video: How do I change the data type of a pandas Series?](https://www.youtube.com/watch?v=V0AWyzVMf54&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=13) # ## 4. Apply multiple aggregations on a Series or DataFrame # # *New in 0.20.0* # In[13]: # example of a single aggregation function after a groupby drinks.groupby('continent').beer_servings.mean() # In[14]: # multiple aggregation functions can be applied simultaneously drinks.groupby('continent').beer_servings.agg(['mean', 'min', 'max']) # In[15]: # new: apply the same aggregations to a Series drinks.beer_servings.agg(['mean', 'min', 'max']) # In[16]: # new: apply the same aggregations to a DataFrame drinks.agg(['mean', 'min', 'max']) # In[17]: # DataFrame describe method provides similar functionality but is less flexible drinks.describe() # - [More information](http://pandas.pydata.org/pandas-docs/stable/basics.html#basics-aggregate) # - [Video: When should I use a "groupby" in pandas?](https://www.youtube.com/watch?v=qy0fDqoMJx8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=14) # ## Bonus: Download the official pandas cheat sheet # # *New in 0.19.2* # # [Cheat sheet (PDF)](https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf)