from datetime import datetime
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as pyplot
data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'],
'battle_deaths': [34, 25, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths'])
print(df)
date battle_deaths 0 2014-05-01 18:47:05.069722 34 1 2014-05-01 18:47:05.119994 25 2 2014-05-02 18:47:05.178768 26 3 2014-05-02 18:47:05.230071 15 4 2014-05-02 18:47:05.230071 15 5 2014-05-02 18:47:05.280592 14 6 2014-05-03 18:47:05.332662 26 7 2014-05-03 18:47:05.385109 25 8 2014-05-04 18:47:05.436523 62 9 2014-05-04 18:47:05.486877 41 [10 rows x 2 columns]
df['date'] = pd.to_datetime(df['date'])
df.index = df['date']
del df['date']
df
battle_deaths | |
---|---|
date | |
2014-05-01 18:47:05.069722 | 34 |
2014-05-01 18:47:05.119994 | 25 |
2014-05-02 18:47:05.178768 | 26 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.280592 | 14 |
2014-05-03 18:47:05.332662 | 26 |
2014-05-03 18:47:05.385109 | 25 |
2014-05-04 18:47:05.436523 | 62 |
2014-05-04 18:47:05.486877 | 41 |
10 rows × 1 columns
df['2014']
battle_deaths | |
---|---|
date | |
2014-05-01 18:47:05.069722 | 34 |
2014-05-01 18:47:05.119994 | 25 |
2014-05-02 18:47:05.178768 | 26 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.280592 | 14 |
2014-05-03 18:47:05.332662 | 26 |
2014-05-03 18:47:05.385109 | 25 |
2014-05-04 18:47:05.436523 | 62 |
2014-05-04 18:47:05.486877 | 41 |
10 rows × 1 columns
df['2014-05']
battle_deaths | |
---|---|
date | |
2014-05-01 18:47:05.069722 | 34 |
2014-05-01 18:47:05.119994 | 25 |
2014-05-02 18:47:05.178768 | 26 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.280592 | 14 |
2014-05-03 18:47:05.332662 | 26 |
2014-05-03 18:47:05.385109 | 25 |
2014-05-04 18:47:05.436523 | 62 |
2014-05-04 18:47:05.486877 | 41 |
10 rows × 1 columns
df[datetime(2014, 5, 3):]
battle_deaths | |
---|---|
date | |
2014-05-03 18:47:05.332662 | 26 |
2014-05-03 18:47:05.385109 | 25 |
2014-05-04 18:47:05.436523 | 62 |
2014-05-04 18:47:05.486877 | 41 |
4 rows × 1 columns
df['5/3/2014':'5/4/2014']
battle_deaths | |
---|---|
date | |
2014-05-03 18:47:05.332662 | 26 |
2014-05-03 18:47:05.385109 | 25 |
2014-05-04 18:47:05.436523 | 62 |
2014-05-04 18:47:05.486877 | 41 |
4 rows × 1 columns
df.truncate(after='5/3/2014')
battle_deaths | |
---|---|
date | |
2014-05-01 18:47:05.069722 | 34 |
2014-05-01 18:47:05.119994 | 25 |
2014-05-02 18:47:05.178768 | 26 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.280592 | 14 |
6 rows × 1 columns
df.ix['5-2014']
battle_deaths | |
---|---|
date | |
2014-05-01 18:47:05.069722 | 34 |
2014-05-01 18:47:05.119994 | 25 |
2014-05-02 18:47:05.178768 | 26 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.230071 | 15 |
2014-05-02 18:47:05.280592 | 14 |
2014-05-03 18:47:05.332662 | 26 |
2014-05-03 18:47:05.385109 | 25 |
2014-05-04 18:47:05.436523 | 62 |
2014-05-04 18:47:05.486877 | 41 |
10 rows × 1 columns
df.groupby(level=0).count()
battle_deaths | |
---|---|
date | |
2014-05-01 18:47:05.069722 | 1 |
2014-05-01 18:47:05.119994 | 1 |
2014-05-02 18:47:05.178768 | 1 |
2014-05-02 18:47:05.230071 | 2 |
2014-05-02 18:47:05.280592 | 1 |
2014-05-03 18:47:05.332662 | 1 |
2014-05-03 18:47:05.385109 | 1 |
2014-05-04 18:47:05.436523 | 1 |
2014-05-04 18:47:05.486877 | 1 |
9 rows × 1 columns
df.resample('D', how='mean')
battle_deaths | |
---|---|
date | |
2014-05-01 | 29.5 |
2014-05-02 | 17.5 |
2014-05-03 | 25.5 |
2014-05-04 | 51.5 |
4 rows × 1 columns
df.resample('D', how='sum')
battle_deaths | |
---|---|
date | |
2014-05-01 | 59 |
2014-05-02 | 70 |
2014-05-03 | 51 |
2014-05-04 | 103 |
4 rows × 1 columns
df.resample('D', how='sum').plot()
<matplotlib.axes.AxesSubplot at 0x10809de50>