Analysis yoochoose data

Load Data

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from __future__ import division
import pandas as pd
import sys
print "my python version: ", sys.version_info[:]
print "my pandas version: ", pd.__version__
print "my numpy version: ", np.version.version

plt.rcParams['figure.figsize'] = (15, 10)  # set default fig size

click_file = './data/yoochoose-data/yoochoose-clicks.dat'
buy_file = './data/yoochoose-data/yoochoose-buys.dat'

click = pd.read_csv(click_file,
                    header=None, index_col=False, parse_dates=[1],
                    names=['SesID', 'TimeStamp','ItemID','Category'], 
                    dtype={'SesID':str,
                           'TimeStamp':datetime,
                           'ItemID':str,
                           'Category': str})

buy = pd.read_csv(buy_file, 
                  header=None,index_col=False, parse_dates=[1],
                  names=['SesID', 'TimeStamp', 'ItemID', 'Price', 'Quantity'],
                  dtype={'SesID':str,
                         'TimeStamp':datetime,
                         'ItemID':str,
                         'Price':float,
                         'Quantity':int})
my python version:  (2, 7, 9, 'final', 0)
my pandas version:  0.15.2
my numpy version:  1.9.1

Let's see some examples of click records and buy records:

In [9]:
click.tail()
Out[9]:
SesID TimeStamp ItemID Category
33003939 11299809 2014-09-25 09:33:22.412000 214819412 S
33003940 11299809 2014-09-25 09:43:52.821000 214830939 S
33003941 11299811 2014-09-24 19:02:09.741000 214854855 S
33003942 11299811 2014-09-24 19:02:11.894000 214854838 S
33003943 11299811 2014-09-24 19:02:25.146000 214848658 S
In [4]:
buy.tail()
Out[4]:
SesID TimeStamp ItemID Price Quantity
1150748 11368701 2014-09-26 07:52:51.357000 214849809 554 2
1150749 11368691 2014-09-25 09:37:44.206000 214700002 6806 5
1150750 11523941 2014-09-25 06:14:47.965000 214578011 14556 1
1150751 11423202 2014-09-26 18:49:34.024000 214849164 1046 1
1150752 11423202 2014-09-26 18:49:34.026000 214560500 5549 1

Statics

First we join buy and click data together.

In [6]:
total = pd.concat([click[['TimeStamp','SesID','ItemID']],buy[['TimeStamp','SesID','ItemID']]],
                  ignore_index=True)

In [7]:
item2category = click[['ItemID','Category']].copy()
item2category.drop_duplicates(inplace=True)
print 'length of item2category :',len(item2category)
print 'total num of items: ', len(total['ItemID'].drop_duplicates())
# len(item2category) >= len(total['ItemID'].drop_duplicates())
# this means some item belongs to multiple category,
# about 2categories per item
length of item2category : 100110
total num of items:  52739
In [8]:
items_group = item2category.groupby('ItemID')
category_group = item2category.groupby('Category')
In [9]:
items_group_count = items_group.count()
category_group_count = category_group.count()
In [65]:
buy_sessions = buy['SesID'].drop_duplicates()
total['IsBuy'] = total['SesID'].isin(buy_sessions)
total_sesID_group = total.groupby('SesID')
click_sesID_group = total[total['IsBuy'] == False].groupby('SesID')
buy_sesID_group = total[total['IsBuy'] == True].groupby('SesID')
In [49]:
n_sessions = len(total_sesID_group)
n_click_sessions = len(click_sesID_group)
n_buy_sessions = len(buy_sesID_group)
total_records = len(total)
n_clicks = (total['IsBuy'] == False).sum()
n_buys = (total['IsBuy'] == True).sum()

print "number of records: in click sessions:", n_clicks
print "number of records in buy sessions:", n_buys
print "total records: ", total_records

print 'n_click_sessions: ', n_click_sessions
print "total sessions: ", n_sessions
print 'n_buy_sessions: ', n_buy_sessions

print "avg clicks per_session: ", n_clicks / n_click_sessions
print "avg clicks(including buys) per_session: ", n_buys / n_buy_sessions
number of records: in click sessions: 29698257
number of records in buy sessions: 4456440
total records:  34154697
n_click_sessions:  9249729
total sessions:  9249729
n_buy_sessions:  509696
avg clicks per_session:  3.21071644369
avg clicks(including buys) per_session:  8.74332935711

From the above data, we know that

  1. all sessions in buy data also appear in click data: len(set(click_sessions + buy_sessions) = len(set(click_sessions))

  2. avg clicks in sessions(which finally lead to buy sth) are much more than avg click in click sessions.

In [23]:
total_sesID_group_count = total_sesID_group['TimeStamp'].count()
click_sesID_group_count = click_sesID_group['TimeStamp'].count()
buy_sesID_group_count = buy_sesID_group['TimeStamp'].count()
max_count = total_sesID_group_count.max(axis=0)

total_sesID_group_count.plot(kind='hist', bins=np.arange(max_count))
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2b112a250>

The fig above is not so intuitive. We can only know that most sessions have less than 10 actions (click/buy). So we cut out bins greater than 25. And this time we only show the hist of clicks and buy sessions' length.

In [47]:
click_bins, _ = np.histogram(click_sesID_group_count.values , bins=np.arange(1, 25))
buy_bins, _ = np.histogram(buy_sesID_group_count.values , bins=np.arange(1, 25))
# scale to [0, 1]
click_bins = click_bins / click_bins.sum()
buy_bins = buy_bins / buy_bins.sum() 

plt.bar(np.arange(1, 24)-0.4, click_bins, 0.4, color='b', label='click')
plt.bar(np.arange(1, 24), buy_bins, 0.4, color='g', label='buy')
plt.legend()
plt.xticks(np.arange(1, 24),np.arange(1, 24))
Out[47]:
([<matplotlib.axis.XTick at 0x7fe2bb0d6c90>,
  <matplotlib.axis.XTick at 0x7fe2bb0d6dd0>,
  <matplotlib.axis.XTick at 0x7fe2baec8850>,
  <matplotlib.axis.XTick at 0x7fe2baec8e50>,
  <matplotlib.axis.XTick at 0x7fe2baed05d0>,
  <matplotlib.axis.XTick at 0x7fe2baed0d10>,
  <matplotlib.axis.XTick at 0x7fe2baede490>,
  <matplotlib.axis.XTick at 0x7fe2baedebd0>,
  <matplotlib.axis.XTick at 0x7fe2bae66350>,
  <matplotlib.axis.XTick at 0x7fe2bae66a90>,
  <matplotlib.axis.XTick at 0x7fe2bae70210>,
  <matplotlib.axis.XTick at 0x7fe2bae70950>,
  <matplotlib.axis.XTick at 0x7fe2bae7b0d0>,
  <matplotlib.axis.XTick at 0x7fe2bae7b810>,
  <matplotlib.axis.XTick at 0x7fe2bae7bf50>,
  <matplotlib.axis.XTick at 0x7fe2bae856d0>,
  <matplotlib.axis.XTick at 0x7fe2bae85e10>,
  <matplotlib.axis.XTick at 0x7fe2bae8f590>,
  <matplotlib.axis.XTick at 0x7fe2bae8fcd0>,
  <matplotlib.axis.XTick at 0x7fe2bae98450>,
  <matplotlib.axis.XTick at 0x7fe2bae98b90>,
  <matplotlib.axis.XTick at 0x7fe2bae23310>,
  <matplotlib.axis.XTick at 0x7fe2bae23a50>],
 <a list of 23 Text xticklabel objects>)

Sounds interesting. It's clear that people who finally buy sth would like to click more times. Maybe they are comparing with some related items?

Next, we draw click(buy) session each day/month, and also buy/click ratio.

In [51]:
sessions_with_one_record = total_sesID_group.first()
buy_sessions_with_one_record = buy_sesID_group.first()

sessions_with_month = sessions_with_one_record['TimeStamp'].apply(lambda x: x.strftime('%Y-%m'))
sessions_with_month_count = sessions_with_month.value_counts().sort_index()
buy_sessions_with_month = buy_sessions_with_one_record['TimeStamp'].apply(lambda x: x.strftime('%Y-%m'))
buy_sessions_with_month_count = buy_sessions_with_month.value_counts().sort_index()

buy_sessons_monthly_ratio = buy_sessions_with_month_count /sessions_with_month_count 
In [86]:
ax = sessions_with_month_count.plot(kind='bar', alpha=0.5, color='b', label='click sessions')
buy_sessions_with_month_count.plot(kind='bar', ax=ax, alpha=0.5, color='y', label='buy sessions')
plt.legend(loc=2)
ax1 = ax.twinx()
buy_sessons_monthly_ratio.plot(ax=ax1, color='r',marker='o',markersize=5, label='buy/click ratio')
ax1.set_ylim([0, 0.07])
plt.legend()
print 'buy_sessons_monthly_ration:\n', buy_sessons_monthly_ratio.values
print 'mean: ', buy_sessons_monthly_ratio.values.mean(), ' std: ', buy_sessons_monthly_ratio.values.std()
buy_sessons_monthly_ration:
[ 0.05987775  0.05767783  0.0547208   0.0524773   0.05327894  0.05268662]
mean:  0.0551198724794  std:  0.00275779664857
In [56]:
sessions_with_week = sessions_with_one_record['TimeStamp'].apply(lambda x: x.timetuple().tm_yday // 7)
sessions_with_week_count = sessions_with_week.value_counts().sort_index()
buy_sessions_with_week = buy_sessions_with_one_record['TimeStamp'].apply(lambda x: x.timetuple().tm_yday // 7)
buy_sessions_with_week_count = buy_sessions_with_week.value_counts().sort_index()
In [77]:
sessions_with_week_count.index
Out[77]:
Int64Index([13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], dtype='int64')
In [97]:
buy_sessions_week_ratio = buy_sessions_with_week_count /sessions_with_week_count 
fig, ax1 = plt.subplots()

ax1.bar(sessions_with_week_count.index -0.4, sessions_with_week_count.values, 0.8, alpha=0.5, color='b', label='click sessions')
ax1.bar(buy_sessions_with_week_count.index -0.4, buy_sessions_with_week_count.values, 0.8, alpha=0.5, color='y', label='buy sessions')
plt.legend(loc=2)
ax1 = ax1.twinx()
ax1.plot(sessions_with_week_count.index, buy_sessions_week_ratio.values, color='r',marker='o',markersize=5, label='buy/click ratio')
ax1.set_ylim([0, 0.07])
mean_ration = buy_sessons_week_ratio.values.mean()
plt.axhline(mean_ration,color='r', linestyle='dotted', label='avg ratio:{:.3f}'.format(mean_ration))
plt.legend(loc=1)
print 'buy_sessons_week_ration:\n', buy_sessons_week_ratio.values
print 'mean: ', mean_ration, ' std: ', buy_sessons_week_ratio.values.std()
buy_sessons_week_ration:
[ 0.06583851  0.0634527   0.06071765  0.05451357  0.04906384  0.0693577
  0.06308127  0.04445721  0.05527256  0.05167474  0.0519934   0.05987153
  0.05694732  0.05692821  0.05605115  0.05776482  0.04654129  0.05246112
  0.05878865  0.04847408  0.05017729  0.05448669  0.04794499  0.04795683
  0.06423523  0.04810544  0.02657807]
mean:  0.0541754020361  std:  0.00834434971228
In [87]:
sessions_with_day = sessions_with_one_record['TimeStamp'].apply(lambda x: x.date())
sessions_with_day_count = sessions_with_day.value_counts().sort_index()
buy_sessions_with_day = buy_sessions_with_one_record['TimeStamp'].apply(lambda x: x.date())
buy_sessions_with_day_count = buy_sessions_with_day.value_counts().sort_index()
In [88]:
plt.title("[0,max] scaled sessions_per_day statistics")
plt.plot(sessions_with_day_count.values / max(sessions_with_day_count.values), label="all_sessions_per_day")
plt.plot(buy_sessions_with_day_count.values / max(buy_sessions_with_day_count.values), label="buy_sessions_per_day")
plt.plot(buy_sessions_with_day_count.values / sessions_with_day_count.values, label="ratio")
plt.vlines(np.arange(0, len(sessions_with_day_count), 7), [0], [1], linestyles='dotted', label='Tuesday')
plt.legend(loc=2)
plt.xlim([0,len(sessions_with_day_count)])
Out[88]:
(0, 183)
In [89]:
sessions_with_weekday = sessions_with_one_record['TimeStamp'].dt.weekday
sessions_with_weekday_count = sessions_with_weekday.value_counts().sort_index()
buy_sessions_with_weekday = buy_sessions_with_one_record['TimeStamp'].dt.weekday
buy_sessions_with_weekday_count = buy_sessions_with_weekday.value_counts().sort_index()
In [109]:
buy_sessions_weekday_ratio = buy_sessions_with_weekday_count / sessions_with_weekday_count 

fig, ax1 = plt.subplots()
ax1.bar(np.arange(7)-0.4, sessions_with_weekday_count.values, 0.8, alpha=0.5, color='b', label='click sessions')
ax1.bar(np.arange(7)-0.4, buy_sessions_with_weekday_count.values, 0.8, alpha=0.5, color='y', label='buy sessions')
plt.xticks(np.arange(7), ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
plt.legend(loc=2)
ax1.set_ylim([0, 2200000])
ax2 = ax1.twinx()
ax2.plot(np.arange(7), buy_sessions_weekday_ratio.values, color='r',marker='o',markersize=5, label='buy/click ratio')
ax2.set_ylim([0, 0.07])
plt.legend(loc=1)
print 'buy_sessons_weekday_ration:\n', buy_sessons_weekday_ratio.values
buy_sessons_weekday_ration:
[ 0.05262548  0.03241294  0.04989819  0.05217814  0.05651848  0.06887879
  0.0624327 ]
In [111]:
sessions_with_hour = sessions_with_one_record['TimeStamp'].dt.hour
sessions_with_hour_count = sessions_with_hour.value_counts().sort_index()
buy_sessions_with_hour = buy_sessions_with_one_record['TimeStamp'].dt.hour
buy_sessions_with_hour_count = buy_sessions_with_hour.value_counts().sort_index()
In [120]:
buy_sessons_hour_ratio = buy_sessions_with_hour_count /sessions_with_hour_count 
print 'buy_sessons_hour_ratio:\n', buy_sessons_hour_ratio.values

fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(sessions_with_hour_count.values, color='b', label='total sessions')
ax1.set_ylabel('total sessions')
ax1.legend(loc=1)
ax2 = ax1.twinx()
ax2.plot(buy_sessions_with_hour_count, color='r', label='buy sessions')
ax2.set_ylabel('buy sessions')
ax2.legend(loc=2)
plt.grid()
ax2.set_xlim([0,23])
ax2.set_ylim([0,50000])
ax2.set_xticks(np.arange(0,24))
ax1.vlines(np.arange(0,24), [0], sessions_with_hour_count.values, linestyles='dotted')
buy_sessons_hour_ratio:
[ 0.02316928  0.02389155  0.02407361  0.02942573  0.03565868  0.04101091
  0.05099876  0.05687048  0.05938213  0.05720923  0.0574788   0.05945185
  0.05788164  0.05830583  0.05943467  0.06231237  0.06260238  0.06418852
  0.06259432  0.05286058  0.04320709  0.03632203  0.02990141  0.02466328]
Out[120]:
<matplotlib.collections.LineCollection at 0x7fe1002befd0>

In the next part, we will explore the timestamp of a session.

In [121]:
# it takes me about half an hour to run this cell, I really don't know how to speed up
# if you know, please let me know: [email protected]
total.sort(['SesID', 'TimeStamp'], inplace=True)
total['TimeDiff'] = total.groupby('SesID', sort=False)['TimeStamp'].transform(lambda x: x.diff())
print "max diff: ",total['TimeDiff'].max()  # less than an hour
max diff:  1970-01-01 00:59:59.995000
In [123]:
total['TimeDiff'] = total['TimeDiff'].map(lambda x : x.microsecond / 10**6 + x.second + 60 * x.minute if x else None)
In [124]:
total['TimeDiffRank'] = total.groupby('SesID', sort=False).cumcount()
In [127]:
click_records = total[total['IsBuy']==False]
buy_records = total[total['IsBuy']==True]

click_time_diff_statistic = [click_records[click_records['TimeDiffRank']==i]['TimeDiff'] for i in range(1,100)]
avg_click_timediff = [x.mean() for x in click_time_diff_statistic]
plt.plot(np.arange(1,100), avg_click_timediff, color='b', label='avg time diff between two sequencial clicks in click sessions')

buy_time_diff_statistic = [buy_records[buy_records['TimeDiffRank']==i]['TimeDiff'] for i in range(1,100)]
avg_buy_timediff = [x.mean() for x in buy_time_diff_statistic]
plt.plot(np.arange(1,100), avg_buy_timediff, color='r', label='avg time diff between two sequencial clicks in buy sessions')


plt.legend()
Out[127]:
<matplotlib.legend.Legend at 0x7fe266623250>

From the view of items

Question 1: How many items(categories) will people view in a session?

In [35]:
items_group_count['Category'].value_counts()
Out[35]:
2    25407
1    17511
3     7740
4     1859
5      203
6       19
dtype: int64
In [41]:
click['Category'].value_counts()
Out[41]:
0             16337653
S             10769610
1              1671754
2              1292249
3               789713
4               480569
5               471923
6               414696
7               389910
9               105282
11               70264
10               69820
8                44840
12               19357
2089046251        7100
...
2089134695    1
2089156240    1
2089286957    1
2089179505    1
2089580928    1
2089084275    1
2088962151    1
2089028904    1
2089251022    1
2089759631    1
2089508713    1
2089160270    1
2088970021    1
193464283     1
193465576     1
Length: 339, dtype: int64
In [46]:
category_counts = category_group.count()
In [53]:
category_counts.columns = ['Count']
category_counts.sort('Count',inplace=True, ascending=False)
In [56]:
category_counts[category_counts['Count']>100].plot(kind='bar')
Out[56]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff101099210>
In [64]:
items_group_count.columns=['Count']
items_group_count['Count'].value_counts().sort_index().plot(kind='bar')
Out[64]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff10dc2c210>
In [76]:
n_views_in_buyses = buy_sesID_group['ItemID'].apply(lambda x: len(x.unique()))
n_views_in_buyses_stat = n_views_in_buyses.value_counts().sort_index()

n_views_in_clickses = click_sesID_group['ItemID'].apply(lambda x: len(x.unique()))
n_views_in_clickses_stat = n_views_in_clickses.value_counts().sort_index()
In [77]:
n_views_in_buyses_stat = n_views_in_buyses_stat / n_views_in_buyses_stat.sum()  # scale to [0,1]

n_views_in_clickses_stat = n_views_in_clickses_stat / n_views_in_clickses_stat.sum()  # scale to [0,1]

g = pd.DataFrame({'click':n_views_in_buyses_stat[n_views_in_buyses_stat.index < 30 ],
                  'buy': n_views_in_clickses_stat[n_views_in_clickses_stat.index < 30 ]})
g.plot(kind='bar')
Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x7ff10bbfdcd0>

Different items' click rate

In [78]:
click_item_group = click.groupby('ItemID')
buy_item_group = buy.groupby('ItemID')
item_click_count = click_item_group.count()
item_buy_count = buy_item_group.count()
In [112]:
item_buy_count_sorted = item_buy_count['TimeStamp'].copy()
item_buy_count_sorted.sort(ascending=False)
#plt.plot(np.log(item_buy_count_sorted))
Out[112]:
0.64997962203878679
In [183]:
click_groupby_category = click.groupby(['Category', click.TimeStamp.dt.dayofyear])
category_clickcount_daily = click_groupby_category.count()
category_clickcount_daily.index.names = ['Category','Day']
category_clickcount_daily = category_clickcount_daily['SesID']
category_clickcount_daily = category_clickcount_daily.unstack()
In [184]:
category_clickcount_daily.fillna(0, inplace=True)
#category_clickcount_daily.reset_index(inplace=True)
In [220]:
selected_category_clickcount = category_clickcount_daily[category_clickcount_daily.index.isin([str(i) for i in range(10)] + ['S'])]
selected_category_clickcount.T.plot(kind='area')
Out[220]:
<matplotlib.axes._subplots.AxesSubplot at 0x7feff3fe6f50>
In [269]:
for i, label in enumerate(['Price','Quantity']):
    plt.subplot(2,1,i+1)
    haveprice_items = buy[~(buy[label] == 0)]
    haveprice_items_group = haveprice_items.groupby(haveprice_items.TimeStamp.dt.dayofyear)
    haveprice_items_day_count = haveprice_items_group.count()['SesID']
    plt.bar(haveprice_items_day_count.index, haveprice_items_day_count.values, label='items have '+ label)
    noprice_items = buy[(buy[label] == 0)]
    noprice_items_group = noprice_items.groupby(noprice_items.TimeStamp.dt.dayofyear)
    noprice_items_day_count = noprice_items_group.count()['SesID']
    plt.bar(noprice_items_day_count.index, noprice_items_day_count.values, color='r',label='items lost '+label)
    plt.legend()
    plt.xlim([91, 273])
    plt.xlabel('day of year')

The above data shows that 610030 records have a Quantity and Price of 0!!! It has been discussed on twitter that:

if it's in buys, it has been bought at least once however if we didn't have the price or the quantity we wrote 0

In [265]:
plt.hist(buy['Price'].values, bins=np.arange(1, 10000, 100))
Out[265]:
(array([  1.17500000e+03,   3.05500000e+03,   9.01400000e+03,
          2.14030000e+04,   2.61540000e+04,   3.85760000e+04,
          2.98770000e+04,   3.85280000e+04,   3.07400000e+04,
          1.86680000e+04,   5.86060000e+04,   9.26500000e+03,
          3.72900000e+03,   2.74740000e+04,   5.57500000e+03,
          1.49660000e+04,   1.03840000e+04,   3.65800000e+03,
          6.83200000e+03,   3.30100000e+03,   2.40530000e+04,
          7.60000000e+02,   7.70000000e+01,   8.24000000e+03,
          2.40200000e+03,   9.12000000e+02,   1.19120000e+04,
          2.02000000e+03,   1.15600000e+03,   2.79200000e+03,
          7.19000000e+02,   1.11070000e+04,   8.30000000e+01,
          3.44900000e+03,   1.19600000e+03,   1.17000000e+02,
          5.98700000e+03,   8.88000000e+02,   7.98000000e+02,
          8.57000000e+02,   3.24000000e+02,   1.78210000e+04,
          1.60000000e+01,   1.98200000e+03,   1.00000000e+00,
          3.99000000e+02,   3.80000000e+01,   2.18000000e+03,
          7.30000000e+01,   1.32000000e+02,   2.95000000e+02,
          8.80000000e+01,   1.02380000e+04,   3.00000000e+00,
          2.10000000e+01,   1.73000000e+03,   7.40000000e+01,
          1.28100000e+03,   5.60000000e+01,   9.70000000e+01,
          1.36000000e+02,   4.03000000e+02,   7.66600000e+03,
          5.00000000e+00,   3.40000000e+01,   1.23600000e+03,
          0.00000000e+00,   8.00000000e+01,   1.58200000e+03,
          1.90000000e+01,   6.00000000e+00,   1.25000000e+02,
          4.80000000e+01,   4.84700000e+03,   3.00000000e+00,
          2.60000000e+01,   7.37000000e+02,   2.00000000e+00,
          9.59000000e+02,   4.00000000e+00,   3.80000000e+01,
          1.03000000e+02,   1.80000000e+01,   5.07100000e+03,
          0.00000000e+00,   1.30000000e+01,   2.30000000e+01,
          7.10000000e+01,   8.24000000e+02,   0.00000000e+00,
          1.90000000e+01,   1.80000000e+01,   7.20000000e+01,
          1.70000000e+01,   4.24300000e+03,   0.00000000e+00,
          1.00000000e+00,   6.60000000e+01,   3.00000000e+00]),
 array([   1,  101,  201,  301,  401,  501,  601,  701,  801,  901, 1001,
        1101, 1201, 1301, 1401, 1501, 1601, 1701, 1801, 1901, 2001, 2101,
        2201, 2301, 2401, 2501, 2601, 2701, 2801, 2901, 3001, 3101, 3201,
        3301, 3401, 3501, 3601, 3701, 3801, 3901, 4001, 4101, 4201, 4301,
        4401, 4501, 4601, 4701, 4801, 4901, 5001, 5101, 5201, 5301, 5401,
        5501, 5601, 5701, 5801, 5901, 6001, 6101, 6201, 6301, 6401, 6501,
        6601, 6701, 6801, 6901, 7001, 7101, 7201, 7301, 7401, 7501, 7601,
        7701, 7801, 7901, 8001, 8101, 8201, 8301, 8401, 8501, 8601, 8701,
        8801, 8901, 9001, 9101, 9201, 9301, 9401, 9501, 9601, 9701, 9801,
        9901]),
 <a list of 99 Patch objects>)