How do I use the MultiIndex in pandas? (video)

In [1]:
import pandas as pd
In [2]:
stocks = pd.read_csv('data/stocks.csv')
stocks
Out[2]:
Date Close Volume Symbol
0 2016-10-03 31.50 14070500 CSCO
1 2016-10-03 112.52 21701800 AAPL
2 2016-10-03 57.42 19189500 MSFT
3 2016-10-04 113.00 29736800 AAPL
4 2016-10-04 57.24 20085900 MSFT
5 2016-10-04 31.35 18460400 CSCO
6 2016-10-05 57.64 16726400 MSFT
7 2016-10-05 31.59 11808600 CSCO
8 2016-10-05 113.05 21453100 AAPL
In [3]:
stocks.index
Out[3]:
RangeIndex(start=0, stop=9, step=1)
In [4]:
stocks.groupby('Symbol').Close.mean()
Out[4]:
Symbol
AAPL    112.856667
CSCO     31.480000
MSFT     57.433333
Name: Close, dtype: float64

Series with MultiIndex

In [5]:
ser = stocks.groupby(['Symbol', 'Date']).Close.mean()
ser
Out[5]:
Symbol  Date      
AAPL    2016-10-03    112.52
        2016-10-04    113.00
        2016-10-05    113.05
CSCO    2016-10-03     31.50
        2016-10-04     31.35
        2016-10-05     31.59
MSFT    2016-10-03     57.42
        2016-10-04     57.24
        2016-10-05     57.64
Name: Close, dtype: float64
In [6]:
ser.index
Out[6]:
MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],
           codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
           names=['Symbol', 'Date'])
In [7]:
ser.unstack()
Out[7]:
Date 2016-10-03 2016-10-04 2016-10-05
Symbol
AAPL 112.52 113.00 113.05
CSCO 31.50 31.35 31.59
MSFT 57.42 57.24 57.64
In [8]:
df = stocks.pivot_table(values='Close', index='Symbol', columns='Date')
df
Out[8]:
Date 2016-10-03 2016-10-04 2016-10-05
Symbol
AAPL 112.52 113.00 113.05
CSCO 31.50 31.35 31.59
MSFT 57.42 57.24 57.64

Selection from Series with MultiIndex

In [9]:
ser
Out[9]:
Symbol  Date      
AAPL    2016-10-03    112.52
        2016-10-04    113.00
        2016-10-05    113.05
CSCO    2016-10-03     31.50
        2016-10-04     31.35
        2016-10-05     31.59
MSFT    2016-10-03     57.42
        2016-10-04     57.24
        2016-10-05     57.64
Name: Close, dtype: float64
In [10]:
ser.loc['AAPL']
Out[10]:
Date
2016-10-03    112.52
2016-10-04    113.00
2016-10-05    113.05
Name: Close, dtype: float64
In [11]:
ser.loc['AAPL', '2016-10-03']
Out[11]:
112.52
In [12]:
ser.loc[:, '2016-10-03']
Out[12]:
Symbol
AAPL    112.52
CSCO     31.50
MSFT     57.42
Name: Close, dtype: float64
In [13]:
df
Out[13]:
Date 2016-10-03 2016-10-04 2016-10-05
Symbol
AAPL 112.52 113.00 113.05
CSCO 31.50 31.35 31.59
MSFT 57.42 57.24 57.64
In [14]:
df.loc['AAPL']
Out[14]:
Date
2016-10-03    112.52
2016-10-04    113.00
2016-10-05    113.05
Name: AAPL, dtype: float64
In [15]:
df.loc['AAPL', '2016-10-03']
Out[15]:
112.52
In [16]:
df.loc[:, '2016-10-03']
Out[16]:
Symbol
AAPL    112.52
CSCO     31.50
MSFT     57.42
Name: 2016-10-03, dtype: float64

DataFrame with MultiIndex

In [17]:
stocks.set_index(['Symbol', 'Date'], inplace=True)
stocks
Out[17]:
Close Volume
Symbol Date
CSCO 2016-10-03 31.50 14070500
AAPL 2016-10-03 112.52 21701800
MSFT 2016-10-03 57.42 19189500
AAPL 2016-10-04 113.00 29736800
MSFT 2016-10-04 57.24 20085900
CSCO 2016-10-04 31.35 18460400
MSFT 2016-10-05 57.64 16726400
CSCO 2016-10-05 31.59 11808600
AAPL 2016-10-05 113.05 21453100
In [18]:
stocks.index
Out[18]:
MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],
           codes=[[1, 0, 2, 0, 2, 1, 2, 1, 0], [0, 0, 0, 1, 1, 1, 2, 2, 2]],
           names=['Symbol', 'Date'])
In [19]:
stocks.sort_index(inplace=True)
stocks
Out[19]:
Close Volume
Symbol Date
AAPL 2016-10-03 112.52 21701800
2016-10-04 113.00 29736800
2016-10-05 113.05 21453100
CSCO 2016-10-03 31.50 14070500
2016-10-04 31.35 18460400
2016-10-05 31.59 11808600
MSFT 2016-10-03 57.42 19189500
2016-10-04 57.24 20085900
2016-10-05 57.64 16726400

Selection from DataFrame with MultiIndex

In [20]:
stocks.loc['AAPL']
Out[20]:
Close Volume
Date
2016-10-03 112.52 21701800
2016-10-04 113.00 29736800
2016-10-05 113.05 21453100
In [21]:
stocks.loc[('AAPL', '2016-10-03'), :]
Out[21]:
Close          112.52
Volume    21701800.00
Name: (AAPL, 2016-10-03), dtype: float64
In [22]:
stocks.loc[('AAPL', '2016-10-03'), 'Close']
Out[22]:
112.52
In [23]:
stocks.loc[['AAPL', 'MSFT'], :]
Out[23]:
Close Volume
Symbol Date
AAPL 2016-10-03 112.52 21701800
2016-10-04 113.00 29736800
2016-10-05 113.05 21453100
MSFT 2016-10-03 57.42 19189500
2016-10-04 57.24 20085900
2016-10-05 57.64 16726400
In [24]:
stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), :]
Out[24]:
Close Volume
Symbol Date
AAPL 2016-10-03 112.52 21701800
MSFT 2016-10-03 57.42 19189500
In [25]:
stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), 'Close']
Out[25]:
Symbol  Date      
AAPL    2016-10-03    112.52
MSFT    2016-10-03     57.42
Name: Close, dtype: float64
In [26]:
stocks.loc[('AAPL', ['2016-10-03', '2016-10-04']), 'Close']
Out[26]:
Symbol  Date      
AAPL    2016-10-03    112.52
        2016-10-04    113.00
Name: Close, dtype: float64
In [27]:
stocks.loc[(slice(None), ['2016-10-03', '2016-10-04']), :]
Out[27]:
Close Volume
Symbol Date
AAPL 2016-10-03 112.52 21701800
2016-10-04 113.00 29736800
CSCO 2016-10-03 31.50 14070500
2016-10-04 31.35 18460400
MSFT 2016-10-03 57.42 19189500
2016-10-04 57.24 20085900

Merging DataFrames with MultiIndexes

In [28]:
close = pd.read_csv('data/stocks.csv', usecols=[0, 1, 3], index_col=['Symbol', 'Date']).sort_index()
close
Out[28]:
Close
Symbol Date
AAPL 2016-10-03 112.52
2016-10-04 113.00
2016-10-05 113.05
CSCO 2016-10-03 31.50
2016-10-04 31.35
2016-10-05 31.59
MSFT 2016-10-03 57.42
2016-10-04 57.24
2016-10-05 57.64
In [29]:
volume = pd.read_csv('data/stocks.csv', usecols=[0, 2, 3], index_col=['Symbol', 'Date']).sort_index()
volume
Out[29]:
Volume
Symbol Date
AAPL 2016-10-03 21701800
2016-10-04 29736800
2016-10-05 21453100
CSCO 2016-10-03 14070500
2016-10-04 18460400
2016-10-05 11808600
MSFT 2016-10-03 19189500
2016-10-04 20085900
2016-10-05 16726400
In [30]:
both = pd.merge(close, volume, left_index=True, right_index=True)
both
Out[30]:
Close Volume
Symbol Date
AAPL 2016-10-03 112.52 21701800
2016-10-04 113.00 29736800
2016-10-05 113.05 21453100
CSCO 2016-10-03 31.50 14070500
2016-10-04 31.35 18460400
2016-10-05 31.59 11808600
MSFT 2016-10-03 57.42 19189500
2016-10-04 57.24 20085900
2016-10-05 57.64 16726400
In [31]:
both.reset_index()
Out[31]:
Symbol Date Close Volume
0 AAPL 2016-10-03 112.52 21701800
1 AAPL 2016-10-04 113.00 29736800
2 AAPL 2016-10-05 113.05 21453100
3 CSCO 2016-10-03 31.50 14070500
4 CSCO 2016-10-04 31.35 18460400
5 CSCO 2016-10-05 31.59 11808600
6 MSFT 2016-10-03 57.42 19189500
7 MSFT 2016-10-04 57.24 20085900
8 MSFT 2016-10-05 57.64 16726400