from pandas import DataFrame, read_csv
import pandas as pd
import numpy as np
import plotly.offline as py
import cufflinks
from plotly import tools
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import time
import datetime
df = pd.read_csv('SEG_Saleshistory_Stores.csv')
df.columns = ['date', 'store', 'item', 'sales']
df.date = pd.to_datetime(df.date)
df.item = df.item.astype('category')
df.store = df.store.astype('category')
df.dtypes
date datetime64[ns] store category item category sales int64 dtype: object
Below are some summary statistics on the data. Overall the quantities for individual items at individual stores is quite small. It would be difficult to forecast daily quantities of individual items and individual stores, so we will work towards forecasting weekly item sales at individual stores.
df.describe(include='all')
date | store | item | sales | |
---|---|---|---|---|
count | 425575 | 425575 | 425575.0 | 425575.000000 |
unique | 342 | 181 | 39.0 | NaN |
top | 2016-11-05 00:00:00 | SEGWD7 | 41795.0 | NaN |
freq | 4110 | 6111 | 27516.0 | NaN |
first | 2016-02-04 00:00:00 | NaN | NaN | NaN |
last | 2017-01-11 00:00:00 | NaN | NaN | NaN |
mean | NaN | NaN | NaN | 1.670345 |
std | NaN | NaN | NaN | 1.112165 |
min | NaN | NaN | NaN | 1.000000 |
25% | NaN | NaN | NaN | 1.000000 |
50% | NaN | NaN | NaN | 1.000000 |
75% | NaN | NaN | NaN | 2.000000 |
max | NaN | NaN | NaN | 112.000000 |
Let's take a look at a few cuts on the data to see if we can spot any trends. Below are plots of a handful of individual stores' sales. It looks like the answer to the question about the big jump in sales in September is the addition of a good number of stores.
df_total = df.groupby(pd.Grouper(freq='W', key='date')).sum().fillna(0).unstack('date', 0)
df_total.index.levels[1]
len(df_total) == len(df_total.index.levels[1])
trace = go.Scatter(
x = df_total.index.levels[1],
y = df_total
)
layout = go.Layout(
title='Total Sales'
)
fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig, filename='total-sales')
df_1w = df.groupby(['store']+[pd.Grouper(freq='W', key='date')]).sum().fillna(0).unstack('date', 0)
rows = 10
cols = 3
spidx = np.arange(rows*cols).reshape(rows,cols)
fig = tools.make_subplots(rows=rows, cols=cols, shared_yaxes=True, subplot_titles=df_1w.index[:rows*cols])
for i in range(rows):
for j in range(cols):
trace = go.Scatter(
x = df_1w.iloc[1].index.levels[1],
y = df_1w.iloc[spidx[i,j]],
)
fig.append_trace(trace, i+1, j+1)
fig['layout'].update(height=250*rows, title='Sales by Store', showlegend=False);
py.iplot(fig, filename='sales-by-store')
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y1 ] [ (1,3) x3,y1 ] [ (2,1) x4,y2 ] [ (2,2) x5,y2 ] [ (2,3) x6,y2 ] [ (3,1) x7,y3 ] [ (3,2) x8,y3 ] [ (3,3) x9,y3 ] [ (4,1) x10,y4 ] [ (4,2) x11,y4 ] [ (4,3) x12,y4 ] [ (5,1) x13,y5 ] [ (5,2) x14,y5 ] [ (5,3) x15,y5 ] [ (6,1) x16,y6 ] [ (6,2) x17,y6 ] [ (6,3) x18,y6 ] [ (7,1) x19,y7 ] [ (7,2) x20,y7 ] [ (7,3) x21,y7 ] [ (8,1) x22,y8 ] [ (8,2) x23,y8 ] [ (8,3) x24,y8 ] [ (9,1) x25,y9 ] [ (9,2) x26,y9 ] [ (9,3) x27,y9 ] [ (10,1) x28,y10 ] [ (10,2) x29,y10 ] [ (10,3) x30,y10 ]
store_sales = df.groupby(['store']+[pd.Grouper(freq='W', key='date')]).sum().fillna(0).unstack('date')
stores_with_sales = store_sales['sales'].where(store_sales.sales > 0).count()
stores_with_sales.index
trace = go.Bar(
x = stores_with_sales.index,
y = stores_with_sales
)
layout = go.Layout(
title='No. of Stores with Sales'
)
fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig, filename='stores-with-sales')
df_1w = df.groupby(['item']+[pd.Grouper(freq='W', key='date')]).sum().fillna(0).unstack('date', 0)
rows = 13
cols = 3
fig = tools.make_subplots(rows=rows, cols=cols, shared_yaxes=True, subplot_titles=df_1w.index[:rows*cols])
spidx = np.arange(rows*cols).reshape(rows,cols)
for i in range(rows):
for j in range(cols):
trace = go.Scatter(
x = df_1w.iloc[1].index.levels[1],
y = df_1w.iloc[spidx[i,j]],
)
fig.append_trace(trace, i+1, j+1)
fig['layout'].update(height=250*rows, title='Sales by Store', showlegend=False);
py.iplot(fig, filename='sales-by-store')
This is the format of your plot grid: [ (1,1) x1,y1 ] [ (1,2) x2,y1 ] [ (1,3) x3,y1 ] [ (2,1) x4,y2 ] [ (2,2) x5,y2 ] [ (2,3) x6,y2 ] [ (3,1) x7,y3 ] [ (3,2) x8,y3 ] [ (3,3) x9,y3 ] [ (4,1) x10,y4 ] [ (4,2) x11,y4 ] [ (4,3) x12,y4 ] [ (5,1) x13,y5 ] [ (5,2) x14,y5 ] [ (5,3) x15,y5 ] [ (6,1) x16,y6 ] [ (6,2) x17,y6 ] [ (6,3) x18,y6 ] [ (7,1) x19,y7 ] [ (7,2) x20,y7 ] [ (7,3) x21,y7 ] [ (8,1) x22,y8 ] [ (8,2) x23,y8 ] [ (8,3) x24,y8 ] [ (9,1) x25,y9 ] [ (9,2) x26,y9 ] [ (9,3) x27,y9 ] [ (10,1) x28,y10 ] [ (10,2) x29,y10 ] [ (10,3) x30,y10 ] [ (11,1) x31,y11 ] [ (11,2) x32,y11 ] [ (11,3) x33,y11 ] [ (12,1) x34,y12 ] [ (12,2) x35,y12 ] [ (12,3) x36,y12 ] [ (13,1) x37,y13 ] [ (13,2) x38,y13 ] [ (13,3) x39,y13 ]
item_sales = df.groupby(['item']+[pd.Grouper(freq='W', key='date')]).sum().fillna(0).unstack('date')
items_with_sales = item_sales['sales'].where(item_sales.sales > 0).count()
items_with_sales.index
trace = go.Bar(
x = items_with_sales.index,
y = items_with_sales
)
layout = go.Layout(
title='No. of Items with Sales'
)
fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig, filename='items-with-sales')
But before we do that, let's trim the range of the data to go from 2016-03-01 to 2016-12-15 in order to avoid anomolies in the data evident in the total sales graph above. We're going to trim to an approximate range here to reduce the amount of data we have to process. Once we aggregate into the order periods we will trim so that we end up with whole periods on either end of our data.
df = df[(df['date'] > '2016-02-15') & (df['date'] < '2016-12-30')]
We need to make sure that we add records with zero sales for any combination of store, item and period that doesn't appear in the dataset so that our trailing averages are calculated correctly.
import itertools
beg_date = df['date'].min()
end_date = df['date'].max()
dates = pd.DatetimeIndex(start=beg_date, end=end_date, freq='D')
items = df['item'].value_counts().index
stores = df['store'].value_counts().index
all_periods = pd.DataFrame(list(itertools.product(dates, stores, items)), columns=['date', 'store', 'item'])
all_periods['sales'] = 0
all_periods.date = pd.to_datetime(df.date)
all_periods.item = df.item.astype('category')
all_periods.store = df.store.astype('category')
incl_periods = df.groupby(['store', 'item', 'date']).sum().fillna(0).reset_index()
df_all = pd.concat([all_periods, df]).groupby(['store', 'item', 'date']).sum().fillna(0).reset_index()
len(df_all)
2237703
This time we also want to predict sales for order periods of two and three times per week as well as weekly, assuming orders are placed on the same day each week. The day numbers start on Monday with 0 and end on Sunday with 6.
For the two orders per week periods, we will predict sales from:
For the three orders per week periods, we will predict sales from:
In order to aggregate sales over the correct periods we need to add columns to represent the series for each order period. The end of the two orders per week periods are created in the column freq2_end
and the three orders per week periods are in freq3_end
.
We are also adding in columns to distinguish between the intra-weekly periods (which have a zero index) in order to allow the model to compensate for differences in sales volumes between intra-weekly periods, which are freq2_per
and freq3_per
.
df_f = df_all.copy()
# Assign each record to its respective intra-week group.
#
# These are the intra-week periods that each day of the week belongs to.
freq = list([[0, 0, 1, 1, 1, 0, 0],
[0, 0, 1, 1, 2, 2, 0]
])
# Map the day of the week of each record to its respective intra-week period.
for i, f in enumerate(freq):
df_f['freq' + str(i + 2) + '_per'] = df_f['date'].dt.weekday.map(pd.Series(f))
# Assign each record to its respective group within each series of intra-week groups.
# Group membership is indicated in a separate column by the end date of the group.
#
# 1. Calculate the numeric day of the week for each date in the range of dates
# in the data.
# 2. Create a boolean array with an entry for each record indicating whether
# the date of the record falls on a day of the week on which an intra-week
# period ends.
# 3. Calculate the cumulative sum of the boolean array for the range of dates, which
# will then represent the sequential period each date in our range belongs to.
# 4. Index the cumulative sums by the range of dates to create a lookup table.
# 5. Map the 'date' column in our data to the sequence number using the lookup table.
# 6. Group the lookup table by the period, aggregating the date column by max, which
# represents the end date of each sequential period, to create another lookup table.
# 7. Map the sequence number series we created earlier to the period ending date using
# the new lookup table and add it to our data frame.
#
# These are the days of the week that new periods begin on for order frequencies of two
# and three times per week.
period_ends = list([[2, 5],
[2, 4, 6]
])
# Execute the same process for each of our order frequencies
for i, p in enumerate(period_ends):
# Steps 1 through 4
periods = pd.Series(dates.weekday).isin(period_ends[i]).cumsum()
date_lookup = pd.DataFrame({'date': dates, 'period': periods})
date_lookup.set_index('date', inplace=True)
# Step 5
seq_col = df_f['date'].map(date_lookup.period)
# Step 6
period_lookup = date_lookup.reset_index().groupby('period').max()
period_lookup.to_csv('freq' + str(i+2) + '.csv')
# Step 7
df_f['freq' + str(i+2) + '_end'] = seq_col.map(period_lookup.date)
df_f[df_f['date'] > '2016-10-06'].head(10)
store | item | date | sales | freq2_per | freq3_per | freq2_end | freq3_end | |
---|---|---|---|---|---|---|---|---|
234 | SEGWD103 | 41774 | 2016-10-07 | 2.0 | 1 | 2 | 2016-10-07 | 2016-10-08 |
235 | SEGWD103 | 41774 | 2016-10-08 | 0.0 | 0 | 2 | 2016-10-11 | 2016-10-08 |
236 | SEGWD103 | 41774 | 2016-10-09 | 0.0 | 0 | 0 | 2016-10-11 | 2016-10-11 |
237 | SEGWD103 | 41774 | 2016-10-10 | 0.0 | 0 | 0 | 2016-10-11 | 2016-10-11 |
238 | SEGWD103 | 41774 | 2016-10-11 | 0.0 | 0 | 0 | 2016-10-11 | 2016-10-11 |
239 | SEGWD103 | 41774 | 2016-10-12 | 0.0 | 1 | 1 | 2016-10-14 | 2016-10-13 |
240 | SEGWD103 | 41774 | 2016-10-13 | 2.0 | 1 | 1 | 2016-10-14 | 2016-10-13 |
241 | SEGWD103 | 41774 | 2016-10-14 | 1.0 | 1 | 2 | 2016-10-14 | 2016-10-15 |
242 | SEGWD103 | 41774 | 2016-10-15 | 1.0 | 0 | 2 | 2016-10-18 | 2016-10-15 |
243 | SEGWD103 | 41774 | 2016-10-16 | 1.0 | 0 | 0 | 2016-10-18 | 2016-10-18 |
That looks like its working right.
Adding in the rolling average sales is more complicated because we want to calculate the average of like intra-week periods. For example, for the two orders per week frequency the first period spans from Friday through Monday. When we do our trailing averages we want to only include trailing Monday through Friday periods and exclude the Tuesday through Thursday periods that would be included in a strictly sequential calculation.
# This function calculates the trailing average for a given order frequency.
def calc_trailing(orders_per_week):
# Check to make sure orders per week is in the available range
if orders_per_week not in [2, 3]:
print('Orders per week must be either 2 or 3.')
raise
freq_per = 'freq' + str(orders_per_week) + '_per'
freq_end = 'freq' + str(orders_per_week) + '_end'
freq_end_avg = freq_end + '_avg'
f = {'sales': 'sum', freq_per: 'mean'}
g = ['store', 'item', freq_end]
# Here we filter the data frame for each of the intra-week periods in
# the specified order frequency and perform the trailing average and
# calculations on them separately.
df_final = pd.DataFrame()
for i, n in enumerate(df_f[freq_per].value_counts().index):
df_model = df_f[df_f[freq_per] == n].groupby(g).agg(f).fillna(0)
rolling_sum = (df_model
.apply(lambda x:x.rolling(window=3).mean())
.shift(1)
)
df_model[freq_end_avg] = rolling_sum['sales']
df_final = df_final.append(df_model.reset_index())
return df_final.groupby(g).sum()
df_final = calc_trailing(3)
df_final.head(20)
sales | freq3_per | freq3_end_avg | |||
---|---|---|---|---|---|
store | item | freq3_end | |||
SEGWD103 | 41774 | 2016-02-16 | 0.0 | 0 | NaN |
2016-02-18 | 2.0 | 1 | NaN | ||
2016-02-20 | 1.0 | 2 | NaN | ||
2016-02-23 | 0.0 | 0 | NaN | ||
2016-02-25 | 0.0 | 1 | NaN | ||
2016-02-27 | 2.0 | 2 | NaN | ||
2016-03-01 | 1.0 | 0 | NaN | ||
2016-03-03 | 0.0 | 1 | NaN | ||
2016-03-05 | 2.0 | 2 | NaN | ||
2016-03-08 | 2.0 | 0 | 0.333333 | ||
2016-03-10 | 2.0 | 1 | 0.666667 | ||
2016-03-12 | 1.0 | 2 | 1.666667 | ||
2016-03-15 | 2.0 | 0 | 1.000000 | ||
2016-03-17 | 1.0 | 1 | 0.666667 | ||
2016-03-19 | 2.0 | 2 | 1.666667 | ||
2016-03-22 | 3.0 | 0 | 1.666667 | ||
2016-03-24 | 0.0 | 1 | 1.000000 | ||
2016-03-26 | 2.0 | 2 | 1.666667 | ||
2016-03-29 | 4.0 | 0 | 2.333333 | ||
2016-03-31 | 0.0 | 1 | 1.000000 |
def remove_missing(df_final):
beg_len = len(df_final)
df_final['cum_sales'] = df_final.groupby(level=[0,1]).cumsum()['sales']
df_final_masked = df_final[df_final.cum_sales != 0].dropna(how='any')
print('{} records removed'.format(beg_len - len(df_final_masked)))
print('{} records remaining'.format(len(df_final_masked)))
return df_final_masked.reset_index()
df_final_masked = remove_missing(df_final)
store = 'SEGWD104'
item = 41783
df_final_masked.query('(store == @store) & (item == @item)').head()
487591 records removed 479492 records remaining
store | item | freq3_end | sales | freq3_per | freq3_end_avg | cum_sales | |
---|---|---|---|---|---|---|---|
4501 | SEGWD104 | 41783 | 2016-08-30 | 1.0 | 0 | 0.000000 | 1.0 |
4502 | SEGWD104 | 41783 | 2016-09-01 | 2.0 | 1 | 0.000000 | 3.0 |
4503 | SEGWD104 | 41783 | 2016-09-03 | 1.0 | 2 | 0.000000 | 4.0 |
4504 | SEGWD104 | 41783 | 2016-09-06 | 5.0 | 0 | 0.333333 | 9.0 |
4505 | SEGWD104 | 41783 | 2016-09-08 | 0.0 | 1 | 0.666667 | 9.0 |
# We have some extra logic here to deal with needing to categorize the three order
# per week variable, but not the two, which is already binary.
def encode_cat_vars(df_final_masked):
# Make sure sales is the first column
cols = df_final_masked.columns.tolist()
cols.remove('sales')
cols = ['sales'] + cols
df_final_masked = df_final_masked[cols]
# Initial list of columns to drop
drop_cols = ['cum_sales', 41793, 'SEGWD103']
# Initial dummy variables
stores = pd.get_dummies(df_final_masked['store'])
items = pd.get_dummies(df_final_masked['item'])
concat_tables = [df_final_masked, stores, items]
# Create dummy variables for freq3_end if necessary
try:
freq = cols[cols.index('freq2_end')][:5]
except:
freq = 'freq3'
freq3_d = pd.get_dummies(df_final_masked['freq3_per'], prefix='freq3_per')
concat_tables.append(freq3_d)
drop_cols.append(freq + '_per_2')
# Add dummy variables
df_final_masked = pd.concat(concat_tables, axis=1)
# Drop columns and return
return df_final_masked.drop(drop_cols, axis=1).sort_values(freq + '_end')
data = encode_cat_vars(df_final_masked)
data.head()
sales | store | item | freq3_end | freq3_per | freq3_end_avg | SEGWD104 | SEGWD116 | SEGWD12 | SEGWD123 | ... | 42045 | 42046 | 42047 | 42048 | 42049 | 42050 | 42051 | 42052 | freq3_per_0 | freq3_per_1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
410400 | 2.0 | SEGWD671 | 41797 | 2016-02-16 | 0 | 3.000000 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
49344 | 2.0 | SEGWD151 | 41791 | 2016-02-16 | 0 | 1.000000 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
86661 | 2.0 | SEGWD1690 | 41793 | 2016-02-16 | 0 | 2.333333 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
393506 | 1.0 | SEGWD649 | 41795 | 2016-02-16 | 0 | 0.333333 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
366425 | 3.0 | SEGWD54 | 41797 | 2016-02-16 | 0 | 2.000000 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows × 226 columns
This time we'll train the model with data through the end of October and then use the remaining data to test the model. This is where we also do the final trimming of the data to ensure it includes on whole intra-week periods.
# Values are begining of training, end of training, end of testing and
# represent the end of the respective intra-week order period.
date_range = dict(
freq2_end=['2016-03-01', '2016-10-28', '2016-12-23'],
freq3_end=['2016-03-03', '2016-10-29', '2016-12-24']
)
freq_avg = data.columns.tolist()[5]
freq = data.columns.tolist()[3]
beg_train, end_train, end_test = date_range[freq]
# Add constant
data['const'] = 1
# Normalize average sales variable.
mu = data[freq_avg].mean()
std = data[freq_avg].std()
data[freq_avg] = (data[freq_avg] - mu) / std
data_train = data[data[freq].ge(beg_train) & data[freq].le(end_train)]
data_test = data[data[freq].gt(end_train) & data[freq].le(end_test)]
X_train = data_train.iloc[:,5:].as_matrix()
y_train = data_train.iloc[:,0].as_matrix()
X_test = data_test.iloc[:,5:].as_matrix()
y_test = data_test.iloc[:,0].as_matrix()
This is where we actually train the model. I ran it for 200 iterations - more won't likely increase the predictive power of the model, but there are some other diagnostics we can run to see what other improvements we can make.
from sklearn import linear_model
clf = linear_model.SGDRegressor(n_iter=100)
clf.fit(X_train, y_train)
SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01, fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling', loss='squared_loss', n_iter=100, penalty='l2', power_t=0.25, random_state=None, shuffle=True, verbose=0, warm_start=False)
predict = clf.predict(X_test)
predict_neg = predict < 0
error = predict - y_test
error_neg = predict_neg @ error
np.savetxt('modelparams.csv', clf.predict(np.eye(X_test.shape[1])), delimiter=",")
print('R-squared: {:.{p}f}'.format(clf.score(X_test, y_test), p=4))
print('Total error in sales quantity: {:.{p}f}'.format(sum(error), p=0))
print('Total error as a % of actual: {:.{p}f}%'.format(sum(error) / sum(y_test)*100, p=2))
print('Total error in sales quantity with zero min prediction: {:.{p}f}'.format(sum(error)-error_neg, p=0))
print('Total error as a % of actual with zero min prediction: {:.{p}f}%'.format((sum(error)-error_neg) / sum(y_test)*100, p=2))
R-squared: 0.5402 Total error in sales quantity: 21932 Total error as a % of actual: 12.81% Total error in sales quantity with zero min prediction: 23599 Total error as a % of actual with zero min prediction: 13.79%
This is still quite a good fit overall. As you would expect, it is not quite as accurate as the model that was based on seven day periods.
data_predict = data_test.iloc[:,:5]
data_predict[freq_avg] = data[freq_avg] * std + mu
data_predict['predict'] = predict
data_predict['variance'] = predict - data_predict['sales']
data_predict.head()
sales | store | item | freq3_end | freq3_per | freq3_end_avg | predict | variance | |
---|---|---|---|---|---|---|---|---|
73276 | 1.0 | SEGWD168 | 41780 | 2016-11-01 | 0 | 0.333333 | 0.645468 | -0.354532 |
285476 | 0.0 | SEGWD2366 | 42043 | 2016-11-01 | 0 | 1.000000 | 1.174602 | 1.174602 |
407040 | 0.0 | SEGWD67 | 42040 | 2016-11-01 | 0 | 0.000000 | 0.247199 | 0.247199 |
222097 | 6.0 | SEGWD2261 | 41795 | 2016-11-01 | 0 | 4.000000 | 4.417526 | -1.582474 |
386161 | 0.0 | SEGWD622 | 42045 | 2016-11-01 | 0 | 0.000000 | 0.149843 | 0.149843 |
py.iplot(data_predict.groupby(freq).sum()[['sales', 'predict']].iplot(asFigure=True,
kind='bar',xTitle='Dates',yTitle='Sales',title='Actual vs. Predicted'))
pivot = (pd.pivot_table(data_predict,index=['store','item'],columns=[freq],values=['sales', 'predict', 'variance'],aggfunc=np.sum,margins=True, margins_name='Total')
.swaplevel(axis=1)
.sortlevel(0, axis=1, sort_remaining=False)
)
pivot
freq3_end | 2016-11-01 00:00:00 | 2016-11-03 00:00:00 | 2016-11-05 00:00:00 | 2016-11-08 00:00:00 | ... | 2016-12-20 00:00:00 | 2016-12-22 00:00:00 | 2016-12-24 00:00:00 | Total | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sales | predict | variance | sales | predict | variance | sales | predict | variance | sales | ... | variance | sales | predict | variance | sales | predict | variance | sales | predict | variance | ||
store | item | |||||||||||||||||||||
SEGWD103 | 41774 | 3.0 | 1.496760 | -1.503240 | 3.0 | 1.632384 | -1.367616 | 0.0 | 1.656427 | 1.656427 | 3.0 | ... | 0.698157 | 1.0 | 1.028195 | 0.028195 | 0.0 | 1.253634 | 1.253634 | 30.0 | 37.881773 | 7.881773 |
41775 | 1.0 | 1.370742 | 0.370742 | 1.0 | 1.506365 | 0.506365 | 2.0 | 0.926219 | -1.073781 | 2.0 | ... | 0.370742 | 0.0 | 0.700780 | 0.700780 | 0.0 | 2.134597 | 2.134597 | 28.0 | 32.843361 | 4.843361 | |
41776 | 0.0 | 0.820901 | 0.820901 | 0.0 | 0.755128 | 0.755128 | 0.0 | 0.376379 | 0.376379 | 0.0 | ... | 0.820901 | 1.0 | 0.352336 | -0.647664 | 0.0 | 0.376379 | 0.376379 | 6.0 | 14.612282 | 8.612282 | |
41777 | 3.0 | 1.491449 | -1.508551 | 0.0 | 0.821487 | 0.821487 | 4.0 | 1.248323 | -2.751677 | 1.0 | ... | 0.894242 | 1.0 | 0.821487 | -0.178513 | 0.0 | 0.845530 | 0.845530 | 28.0 | 29.899845 | 1.899845 | |
41778 | 1.0 | 1.472789 | 0.472789 | 1.0 | 0.601431 | -0.398569 | 1.0 | 0.826870 | -0.173130 | 1.0 | ... | -1.527211 | 1.0 | 1.004224 | 0.004224 | 0.0 | 0.826870 | 0.826870 | 27.0 | 27.236655 | 0.236655 | |
41779 | 1.0 | 1.152855 | 0.152855 | 1.0 | 1.087082 | 0.087082 | 0.0 | 1.715314 | 1.715314 | 2.0 | ... | -1.041560 | 2.0 | 1.489874 | -0.510126 | 0.0 | 1.111125 | 1.111125 | 35.0 | 34.260152 | -0.739848 | |
41780 | 3.0 | 1.547540 | -1.452460 | 2.0 | 0.877578 | -1.122422 | 1.0 | 1.304414 | 0.304414 | 1.0 | ... | 1.353125 | 1.0 | 0.676182 | -0.323818 | 2.0 | 1.304414 | -0.695586 | 35.0 | 32.857197 | -2.142803 | |
41781 | 0.0 | 0.253061 | 0.253061 | 0.0 | -0.014109 | -0.014109 | 0.0 | 0.009934 | 0.009934 | 0.0 | ... | 0.253061 | 0.0 | -0.014109 | -0.014109 | 0.0 | 0.009934 | 0.009934 | 0.0 | 1.991090 | 1.991090 | |
41782 | 1.0 | 2.069870 | 1.069870 | 1.0 | 1.399908 | 0.399908 | 1.0 | 1.625347 | 0.625347 | 4.0 | ... | -0.735715 | 0.0 | 1.198512 | 1.198512 | 3.0 | 1.625347 | -1.374653 | 35.0 | 38.747044 | 3.747044 | |
41783 | 3.0 | 1.997093 | -1.002907 | 2.0 | 1.125735 | -0.874265 | 3.0 | 1.351174 | -1.648826 | 3.0 | ... | -0.002907 | 0.0 | 1.528527 | 1.528527 | 0.0 | 1.753967 | 1.753967 | 45.0 | 42.035298 | -2.964702 | |
41786 | 0.0 | 0.316015 | 0.316015 | 0.0 | 0.048845 | 0.048845 | 0.0 | 0.072888 | 0.072888 | 0.0 | ... | 0.316015 | 0.0 | 0.048845 | 0.048845 | 0.0 | 0.072888 | 0.072888 | 0.0 | 3.501988 | 3.501988 | |
41788 | 3.0 | 1.492568 | -1.507432 | 1.0 | 1.829588 | 0.829588 | 0.0 | 0.846649 | 0.846649 | 4.0 | ... | -1.104639 | 0.0 | 1.628191 | 1.628191 | 1.0 | 0.645253 | -0.354747 | 33.0 | 36.975574 | 3.975574 | |
41789 | 2.0 | 1.118977 | -0.881023 | 0.0 | 0.851807 | 0.851807 | 1.0 | 1.278643 | 0.278643 | 3.0 | ... | -0.276835 | 0.0 | 1.053203 | 1.053203 | 1.0 | 0.875850 | -0.124150 | 29.0 | 30.023336 | 1.023336 | |
41790 | 2.0 | 0.999797 | -1.000203 | 0.0 | 1.336817 | 1.336817 | 1.0 | 0.756671 | -0.243329 | 2.0 | ... | 1.201194 | 1.0 | 0.934024 | -0.065976 | 0.0 | 0.555275 | 0.555275 | 21.0 | 24.947678 | 3.947678 | |
41791 | 3.0 | 3.222639 | 0.222639 | 5.0 | 2.552677 | -2.447323 | 1.0 | 2.576720 | 1.576720 | 2.0 | ... | 1.410073 | 1.0 | 1.545696 | 0.545696 | 0.0 | 1.368343 | 1.368343 | 28.0 | 54.933915 | 26.933915 | |
41792 | 10.0 | 4.952470 | -5.047530 | 3.0 | 3.074130 | 0.074130 | 2.0 | 2.493984 | 0.493984 | 5.0 | ... | 1.938507 | 0.0 | 1.462959 | 1.462959 | 0.0 | 1.889795 | 1.889795 | 37.0 | 69.664135 | 32.664135 | |
41793 | 10.0 | 4.662246 | -5.337754 | 3.0 | 3.992284 | 0.992284 | 5.0 | 3.814931 | -1.185069 | 10.0 | ... | -1.136358 | 5.0 | 3.186699 | -1.813301 | 3.0 | 3.412138 | 0.412138 | 113.0 | 104.991987 | -8.008013 | |
41794 | 6.0 | 3.695985 | -2.304015 | 4.0 | 2.824627 | -1.175373 | 5.0 | 2.244481 | -2.755519 | 6.0 | ... | -1.699826 | 3.0 | 4.435797 | 1.435797 | 3.0 | 2.043085 | -0.956915 | 94.0 | 86.031049 | -7.968951 | |
41795 | 7.0 | 4.138919 | -2.861081 | 3.0 | 3.267561 | 0.267561 | 9.0 | 3.895793 | -5.104207 | 6.0 | ... | -2.249910 | 2.0 | 4.274543 | 2.274543 | 2.0 | 3.895793 | 1.895793 | 115.0 | 108.543855 | -6.456145 | |
41796 | 0.0 | 2.333422 | 2.333422 | 4.0 | 2.267649 | -1.732351 | 5.0 | 2.090296 | -2.909704 | 6.0 | ... | -1.256804 | 1.0 | 2.871838 | 1.871838 | 3.0 | 1.687503 | -1.312497 | 71.0 | 57.558860 | -13.441140 | |
41797 | 0.0 | 2.085702 | 2.085702 | 0.0 | 2.019929 | 2.019929 | 5.0 | 2.043972 | -2.956028 | 1.0 | ... | -1.907316 | 0.0 | 2.019929 | 2.019929 | 3.0 | 1.439783 | -1.560217 | 46.0 | 45.168899 | -0.831101 | |
41798 | 3.0 | 3.315367 | 0.315367 | 2.0 | 2.041216 | 0.041216 | 5.0 | 1.863863 | -3.136137 | 3.0 | ... | 0.711178 | 4.0 | 2.444008 | -1.555992 | 3.0 | 2.065259 | -0.934741 | 70.0 | 59.576130 | -10.423870 | |
41799 | 2.0 | 1.360804 | -0.639196 | 3.0 | 0.690842 | -2.309158 | 1.0 | 0.714885 | -0.285115 | 1.0 | ... | -0.437799 | 1.0 | 0.892239 | -0.107761 | 1.0 | 1.319074 | 0.319074 | 33.0 | 28.576936 | -4.423064 | |
41800 | 0.0 | 0.170117 | 0.170117 | 0.0 | -0.097052 | -0.097052 | 0.0 | -0.073009 | -0.073009 | 0.0 | ... | 0.170117 | 0.0 | -0.097052 | -0.097052 | 0.0 | -0.073009 | -0.073009 | 0.0 | 0.000452 | 0.000452 | |
41803 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
41804 | 0.0 | 0.110489 | 0.110489 | 0.0 | -0.156681 | -0.156681 | 0.0 | -0.132638 | -0.132638 | 0.0 | ... | 0.110489 | 0.0 | -0.156681 | -0.156681 | 0.0 | -0.132638 | -0.132638 | 0.0 | -1.430642 | -1.430642 | |
42040 | 0.0 | 0.776137 | 0.776137 | 0.0 | 0.508968 | 0.508968 | 0.0 | 0.130218 | 0.130218 | 0.0 | ... | 0.373345 | 0.0 | 0.106175 | 0.106175 | 0.0 | 0.130218 | 0.130218 | 0.0 | 6.489080 | 6.489080 | |
42041 | 0.0 | 0.817053 | 0.817053 | 0.0 | 0.751280 | 0.751280 | 0.0 | 0.372530 | 0.372530 | 0.0 | ... | 0.414260 | 0.0 | 0.147091 | 0.147091 | 0.0 | 0.171134 | 0.171134 | 0.0 | 8.478029 | 8.478029 | |
42042 | 0.0 | 0.828125 | 0.828125 | 0.0 | 0.359559 | 0.359559 | 0.0 | 0.584999 | 0.584999 | 0.0 | ... | 0.425332 | 0.0 | 0.158163 | 0.158163 | 0.0 | 0.182206 | 0.182206 | 0.0 | 8.139574 | 8.139574 | |
42043 | 0.0 | 0.740928 | 0.740928 | 0.0 | 0.272362 | 0.272362 | 0.0 | 0.296405 | 0.296405 | 0.0 | ... | 0.338135 | 0.0 | 0.070966 | 0.070966 | 0.0 | 0.095009 | 0.095009 | 0.0 | 5.845440 | 5.845440 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
SEGWD97 | 41786 | 0.0 | 0.108289 | 0.108289 | 0.0 | -0.158880 | -0.158880 | 0.0 | -0.134837 | -0.134837 | 0.0 | ... | 0.108289 | 0.0 | -0.158880 | -0.158880 | 0.0 | -0.134837 | -0.134837 | 0.0 | -1.483428 | -1.483428 |
41788 | 2.0 | 1.083446 | -0.916554 | 2.0 | 1.017673 | -0.982327 | 1.0 | 1.243112 | 0.243112 | 1.0 | ... | -0.312365 | 1.0 | 0.816277 | -0.183723 | 1.0 | 0.638924 | -0.361076 | 27.0 | 28.163628 | 1.163628 | |
41789 | 2.0 | 0.508458 | -1.491542 | 0.0 | 1.852459 | 1.852459 | 3.0 | 1.473710 | -1.526290 | 0.0 | ... | -0.484560 | 1.0 | 0.845478 | -0.154522 | 1.0 | 0.265332 | -0.734668 | 25.0 | 23.426750 | -1.573250 | |
41790 | 1.0 | 0.792072 | -0.207928 | 0.0 | 0.524902 | 0.524902 | 4.0 | 0.548945 | -3.451055 | 0.0 | ... | 0.590675 | 1.0 | 0.323506 | -0.676494 | 1.0 | 0.548945 | -0.451055 | 16.0 | 15.732939 | -0.267061 | |
41791 | 4.0 | 2.612121 | -1.387879 | 5.0 | 2.546348 | -2.453652 | 0.0 | 2.167598 | 2.167598 | 7.0 | ... | -1.186483 | 2.0 | 2.344952 | 0.344952 | 3.0 | 2.973184 | -0.026816 | 70.0 | 66.060203 | -3.939797 | |
41792 | 3.0 | 2.730781 | -0.269219 | 4.0 | 2.463612 | -1.536388 | 4.0 | 3.494636 | -0.505364 | 3.0 | ... | -1.665030 | 3.0 | 3.067800 | 0.067800 | 3.0 | 2.286258 | -0.713742 | 68.0 | 72.533175 | 4.533175 | |
41793 | 4.0 | 2.037765 | -1.962235 | 2.0 | 2.576180 | 0.576180 | 4.0 | 2.801620 | -1.198380 | 2.0 | ... | -0.760839 | 3.0 | 3.180369 | 0.180369 | 2.0 | 2.398827 | 0.398827 | 61.0 | 66.373388 | 5.373388 | |
41794 | 4.0 | 2.682674 | -1.317326 | 3.0 | 2.415505 | -0.584495 | 5.0 | 2.036755 | -2.963245 | 5.0 | ... | -2.122911 | 2.0 | 3.422486 | 1.422486 | 3.0 | 3.043737 | 0.043737 | 75.0 | 69.767440 | -5.232560 | |
41795 | 3.0 | 3.931194 | 0.931194 | 3.0 | 2.657043 | -0.342957 | 3.0 | 2.681086 | -0.318914 | 4.0 | ... | -0.471599 | 3.0 | 4.066817 | 1.066817 | 2.0 | 3.083878 | 1.083878 | 77.0 | 83.016016 | 6.016016 | |
41796 | 2.0 | 1.722904 | -0.277096 | 1.0 | 1.052942 | 0.052942 | 4.0 | 1.076985 | -2.923015 | 2.0 | ... | -0.478492 | 2.0 | 0.650149 | -1.349851 | 0.0 | 1.681174 | 1.681174 | 34.0 | 33.843588 | -0.156412 | |
41797 | 2.0 | 1.676580 | -0.323420 | 2.0 | 1.208014 | -0.791986 | 4.0 | 0.829265 | -3.170735 | 3.0 | ... | -0.524816 | 0.0 | 0.603825 | 0.603825 | 2.0 | 1.030661 | -0.969339 | 35.0 | 33.738801 | -1.261199 | |
41798 | 0.0 | 1.295074 | 1.295074 | 1.0 | 1.430698 | 0.430698 | 1.0 | 1.051948 | 0.051948 | 3.0 | ... | -0.906322 | 1.0 | 0.423716 | -0.576284 | 2.0 | 1.454741 | -0.545259 | 28.0 | 30.020364 | 2.020364 | |
41799 | 0.0 | 0.347493 | 0.347493 | 0.0 | 0.080324 | 0.080324 | 0.0 | 0.104367 | 0.104367 | 0.0 | ... | 0.750286 | 0.0 | 0.080324 | 0.080324 | 0.0 | 0.305763 | 0.305763 | 4.0 | 5.868645 | 1.868645 | |
41800 | 0.0 | -0.037608 | -0.037608 | 0.0 | -0.304778 | -0.304778 | 0.0 | -0.280735 | -0.280735 | 0.0 | ... | -0.037608 | 0.0 | -0.304778 | -0.304778 | 0.0 | -0.280735 | -0.280735 | 0.0 | -4.984964 | -4.984964 | |
41803 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
41804 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
42040 | 0.0 | 0.165619 | 0.165619 | 0.0 | -0.101550 | -0.101550 | 0.0 | 0.123889 | 0.123889 | 0.0 | ... | 0.165619 | 0.0 | -0.101550 | -0.101550 | 0.0 | -0.077507 | -0.077507 | 0.0 | 0.093890 | 0.093890 | |
42041 | 0.0 | 0.206535 | 0.206535 | 0.0 | -0.060635 | -0.060635 | 0.0 | -0.036592 | -0.036592 | 0.0 | ... | 0.206535 | 0.0 | -0.060635 | -0.060635 | 0.0 | -0.036592 | -0.036592 | 0.0 | 0.874461 | 0.874461 | |
42042 | 0.0 | 0.217607 | 0.217607 | 0.0 | -0.049563 | -0.049563 | 0.0 | 0.175877 | 0.175877 | 0.0 | ... | 0.217607 | 0.0 | -0.049563 | -0.049563 | 0.0 | -0.025520 | -0.025520 | 0.0 | 1.341591 | 1.341591 | |
42043 | 0.0 | 0.331806 | 0.331806 | 0.0 | -0.136760 | -0.136760 | 0.0 | -0.112717 | -0.112717 | 0.0 | ... | 0.130409 | 0.0 | -0.136760 | -0.136760 | 0.0 | -0.112717 | -0.112717 | 0.0 | -0.549751 | -0.549751 | |
42044 | 0.0 | 0.529452 | 0.529452 | 0.0 | -0.140510 | -0.140510 | 0.0 | -0.116467 | -0.116467 | 0.0 | ... | 0.126660 | 0.0 | -0.140510 | -0.140510 | 0.0 | -0.116467 | -0.116467 | 0.0 | -0.236952 | -0.236952 | |
42045 | 0.0 | 0.153535 | 0.153535 | 0.0 | -0.113635 | -0.113635 | 0.0 | 0.111804 | 0.111804 | 0.0 | ... | 0.153535 | 0.0 | -0.113635 | -0.113635 | 0.0 | -0.089592 | -0.089592 | 0.0 | -0.196142 | -0.196142 | |
42046 | 0.0 | 0.073952 | 0.073952 | 0.0 | -0.193217 | -0.193217 | 0.0 | -0.169174 | -0.169174 | 0.0 | ... | 0.073952 | 0.0 | -0.193217 | -0.193217 | 0.0 | -0.169174 | -0.169174 | 0.0 | -2.307511 | -2.307511 | |
42047 | 1.0 | 1.247721 | 0.247721 | 0.0 | 0.577759 | 0.577759 | 5.0 | 0.601802 | -4.398198 | 1.0 | ... | -0.752279 | 0.0 | 0.577759 | 0.577759 | 1.0 | 1.407388 | 0.407388 | 35.0 | 27.071326 | -7.928674 | |
42048 | 1.0 | 1.257838 | 0.257838 | 1.0 | 0.386480 | -0.613520 | 2.0 | 1.216108 | -0.783892 | 2.0 | ... | 0.063423 | 2.0 | 0.587876 | -1.412124 | 1.0 | 1.014712 | 0.014712 | 38.0 | 28.321106 | -9.678894 | |
42049 | 2.0 | 1.131052 | -0.868948 | 1.0 | 0.259693 | -0.740307 | 4.0 | 1.290718 | -2.709282 | 1.0 | ... | 0.131052 | 2.0 | 1.468071 | -0.531929 | 2.0 | 0.485133 | -1.514867 | 42.0 | 28.500577 | -13.499423 | |
42050 | 2.0 | 1.472176 | -0.527824 | 0.0 | 0.399422 | 0.399422 | 2.0 | 0.826257 | -1.173743 | 4.0 | ... | 0.069384 | 2.0 | 0.802214 | -1.197786 | 2.0 | 1.027653 | -0.972347 | 31.0 | 24.603784 | -6.396216 | |
42051 | 0.0 | 1.289408 | 1.289408 | 1.0 | 1.022239 | 0.022239 | 3.0 | 1.046282 | -1.953718 | 1.0 | ... | 0.094993 | 1.0 | 0.619446 | -0.380554 | 1.0 | 0.643489 | -0.356511 | 29.0 | 25.655051 | -3.344949 | |
42052 | 2.0 | 1.079798 | -0.920202 | 1.0 | 0.409836 | -0.590164 | 4.0 | 0.635276 | -3.364724 | 1.0 | ... | 1.281195 | 0.0 | 0.208440 | 0.208440 | 4.0 | 0.232483 | -3.767517 | 29.0 | 19.416036 | -9.583964 | |
Total | 8733.0 | 9304.347387 | 571.347387 | 6118.0 | 6650.350036 | 532.350036 | 10153.0 | 7259.384284 | -2893.615716 | 9235.0 | ... | 1261.308616 | 5625.0 | 6810.836373 | 1185.836373 | 6752.0 | 7032.609569 | 280.609569 | 171148.0 | 193080.171850 | 21932.171850 |
7060 rows × 75 columns
timestring = datetime.datetime.fromtimestamp(time.time()).strftime('%m%d%H%M')
pivot.to_csv('pivot_' + freq[:5] + '_' + timestring + '.csv')