import pandas as pd
import numpy as np
from datetime import datetime as dt
# Build a sample DataFrame that looks like what you will get back from a history() call.
df = pd.DataFrame(np.array([np.arange(10), np.arange(10)]).transpose(),
index=pd.date_range(start=pd.Timestamp('2014-06-17'), periods=10),
# In the actual history dataframe, columns are Security objects.
columns=['sid_1', 'sid_2'])
df
# Get the mean of all prices in this window.
means = df.mean()
means
# Get a rolling means window in which each cell contains
# the mean of the current day and the previous three days.
# Note that the first three windows have NaN (not a number),
# because they don't have a full 3 days yet.
rolling_means = pd.rolling_mean(df, window=4)
rolling_means
# We can customize the minumum number of required observations by specifying min_periods.
rolling_means_less_nans = pd.rolling_mean(df, window=4, min_periods=2)
rolling_means_less_nans
# Pandas has similar rolling functions for sum, stddev,
# and a variety of other statistical operations.
rolling_sums = pd.rolling_sum(df, window=2)
rolling_stddevs = pd.rolling_std(df, window=3)
# Combine both DataFrames by joining on the date index.
combined = rolling_sums.merge(rolling_stddevs, left_index=True, right_index=True, suffixes=("_sum", "_stddev"))
combined
# Note that these outputs are still DataFrames, so we can do nice things like this:
# Get the first 5 elements of the frame.
combined.iloc[:5]
# Or this:
# Get all but the last element of the frame:
combined.iloc[:-1]
# Or this:
# Get every other entry in the DataFrame, in reverse.
combined.iloc[::-2]
# Or this:
# Get the entries for June 20th and June 21st.
combined.loc[[pd.Timestamp('2014-06-20'), pd.Timestamp('2014-06-21')]]
# Or this:
# Get a specific column (this returns a Series)
combined['sid_2_sum']
# Or this:
# Get multiple specific columns (This returns a DataFrame)
combined[['sid_2_sum', 'sid_1_stddev']]