#!/usr/bin/env python # coding: utf-8 # **Title:** Two Years of Bayesian Bandits for E-Commerce # # **Abstract:** At Monetate, we've deployed Bayesian bandits (both noncontextual and contextual) to help our clients optimize their e-commerce sites since early 2016. This talk is an overview of the lessons we've learned from both the processes of deploying real-time Bayesian machine learning systems at scale and building a data product on top of these systems that is accessible to non-technical users (marketers). This talk will cover # # * the place of multi-armed bandits in the A/B testing industry, # * Thompson sampling and the basic theory of Bayesian bandits, # * Bayesian approaches for accommodating nonstationarity in bandit feedback, # * user experience challenges in driving adoption of these technologies by nontechnical marketers. # # We will focus primarily on noncontextual bandits and give a brief overview of these problems in the contextual setting as time permits. # # **Bio:** Austin Rochford is a Principal Data Scientist at Monetate. He is a founding member of Monetate Labs, where he does research and development for machine learning-driven marketing products. He is a recovering mathematician, a passionate Bayesian, and a PyMC3 developer. # A `Dockerfile` that will produce a container with the dependenceis of this notebook is available [here](https://github.com/AustinRochford/blog). # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: from io import StringIO from math import floor from tqdm import tqdm, trange # In[3]: from matplotlib import pyplot as plt from matplotlib.dates import DateFormatter, MonthLocator from matplotlib.ticker import StrMethodFormatter import numpy as np import pandas as pd import scipy as sp import seaborn as sns # In[4]: SEED = 7097089 # from random.org, for reproducibiliy np.random.seed(SEED) # In[5]: sns.set() C = sns.color_palette() pct_formatter = StrMethodFormatter('{x:.1%}') #configure matplotlib FIGURE_WIDTH = 8 FIGURE_HEIGHT = 6 plt.rc('figure', figsize=(FIGURE_WIDTH, FIGURE_HEIGHT)) LABELSIZE = 14 plt.rc('axes', labelsize=LABELSIZE) plt.rc('axes', titlesize=LABELSIZE) plt.rc('figure', titlesize=LABELSIZE) plt.rc('legend', fontsize=LABELSIZE) plt.rc('xtick', labelsize=LABELSIZE) plt.rc('ytick', labelsize=LABELSIZE) # # Two Years of Bayesian Bandits for E-Commerce # #
# # # ### Data Philly • April 2, 2018 • [@AustinRochford](https://twitter.com/AustinRochford) # ### About Me # #
# # # # # #
#
# # #### Principal Data Scientist and Director of [Monetate Labs](http://www.monetate.com/) # # #### [@AustinRochford](https://twitter.com/AustinRochford) • [austinrochford.com](http://austinrochford.com) • [github.com/AustinRochford](http://github.com/AustinRochford) # # #### [arochford@monetate.com](mailto:arochford@monetate.com) • [austin.rochford@gmail.com](mailto:arochford@monetate.com) # ### About Monetate # #
# # * Founded 2008, web optimization and personalization SaaS # * Observed 5B impressions and $4.1B in revenue during Cyber Week 2017 # #### Nontechnical marketer-focused # #
# #
#
# #
#
# # monetate.com/about/careers #
# ### About this talk # #
# #
# ### Outline # # * Web optimization # * A/B testing # * Multi-armed bandits # * Bayesian bandits # * Thompson sampling # * Nonstationarity # * ~~Kalman filters~~ # * Decayed posteriors # * Bandit bias # * Inverse propensity weighting # * Contextual bandits? # ## Web Optimization # #
# ### A/B testing # #
# #### A/B testing machinery # #
# # # # # # #
# #
Ronald Fisher
#
# #
Abraham Wald
#
#
# Much of the A/B testing industry uses classical Fisher/Neyman-Pearson style fixed-horizon frequentist significance tests. Sophistocated approaches use sequential hypothesis testing. We've found, through many years of interaction with marketers, that they take a fundamentally Bayesian view of the world. Most interpret p-values as the "probability that the experiment is better/worse than the control." # ### Multi-armed bandits # #
# [Multi-armed bandit problems](https://en.wikipedia.org/wiki/Multi-armed_bandit) are extensively studied and come in many variantions. Here we focus on the simplest multi-armed bandit objective, [regret minimization](https://en.wikipedia.org/wiki/Multi-armed_bandit#The_multi-armed_bandit_model). #
# #
# #### Multi-armed bandit systems # #
# #
# ## Bayesian Bandits # #
# #
# ### Beta-binomial model # # $$ # \begin{align*} # x_A, x_B # & = \textrm{number of rewards from users shown variant } A, B \\ # x_A # & \sim \textrm{Binomial}(n_A, r_A) \\ # x_B # & \sim \textrm{Binomial}(n_B, r_B) \\ # r_A, r_B # & \sim \textrm{Beta}(1, 1) # \end{align*} # $$ # $$ # \begin{align*} # r_A\ |\ n_A, x_A # & \sim \textrm{Beta}(x_A + 1, n_A - x_A + 1) \\ # r_B\ |\ n_B, x_B # & \sim \textrm{Beta}(x_B + 1, n_B - x_B + 1) # \end{align*} # $$ # ### Thompson sampling # #
# #
# # _Thompson sampling_ randomizes user/variant assignment according to the probabilty that each variant maximizes the posterior expected reward. # The probability that a user is assigned variant A is # # $$ # \begin{align*} # P(r_A > r_B\ |\ \mathcal{D}) # & = \int_0^1 P(r_A > r\ |\ \mathcal{D})\ \pi_B(r\ |\ \mathcal{D})\ dr \\ # & = \int_0^1 \left(\int_r^1 \pi_A(s\ |\ \mathcal{D})\ ds\right)\ \pi_B(r\ |\ \mathcal{D})\ dr \\ # & \propto \int_0^1 \left(\int_r^1 s^{\alpha_A - 1} (1 - s)^{\beta_A - 1}\ ds\right) r^{\alpha_B - 1} (1 - r)^{\beta_B - 1}\ dr # \end{align*} # $$ # In[6]: fig, ax = plt.subplots( figsize=(0.75 * FIGURE_WIDTH, 0.75 * FIGURE_WIDTH) ) ax.set_aspect('equal'); a_post = sp.stats.beta(5 + 1, 15 + 1) b_post = sp.stats.beta(7 + 1, 13 + 1) p = np.linspace(0, 1, 100) px, py = np.meshgrid(p, p) z = a_post.pdf(px) * b_post.pdf(py) ax.contourf(px, py, z, 1000, cmap='BuGn'); ax.plot([0, 1], [0, 1], c='k', ls='--'); ax.xaxis.set_major_formatter(pct_formatter); ax.set_xlabel(r"$r_A \sim \operatorname{Beta}(6, 16)$"); ax.yaxis.set_major_formatter(pct_formatter); ax.set_ylabel(r"$r_B \sim \operatorname{Beta}(8, 14)$"); ax.set_title("Posterior reward rates"); # In[7]: fig # In[8]: N_SAMPLES = 1000 a_samples = a_post.rvs(size=N_SAMPLES) b_samples = b_post.rvs(size=N_SAMPLES) ax.scatter( a_samples[b_samples > a_samples], b_samples[b_samples > a_samples], c='k', alpha=0.25 ); ax.scatter( a_samples[b_samples < a_samples], b_samples[b_samples < a_samples], c=C[2], alpha=0.25 ); # In[9]: ts_fig = fig # In[10]: fig # In[11]: (a_samples > b_samples).mean() # #### Thompson sampling, redeemed # # 1. Sample $\hat{r}_A \sim r_A\ |\ n_A, x_A$ and $\hat{r}_B \sim r_B\ |\ n_B, x_B$. # 2. Assign the user variant A if $\hat{r}_A > \hat{r}_B$, otherwise assign them variant B. # #### Simulating a bandit # In[12]: class BetaBinomial: def __init__(self, a0=1., b0=1.): self.a = a0 self.b = b0 def sample(self): return sp.stats.beta.rvs(self.a, self.b) def update(self, n, x): self.a += x self.b += n - x # In[13]: class Bandit: def __init__(self, a_post, b_post): self.a_post = a_post self.b_post = b_post def assign(self): return 1 * (self.a_post.sample() < self.b_post.sample()) def update(self, arm, reward): arm_post = self.a_post if arm == 0 else self.b_post arm_post.update(1, reward) # In[14]: def generate_rewards(a_rate, b_rate, n): yield from zip( np.random.binomial(1, a_rate, size=n), np.random.binomial(1, b_rate, size=n) ) # In[15]: A_RATE, B_RATE = 0.05, 0.1 N = 1000 rewards_gen = generate_rewards(A_RATE, B_RATE, N) # In[16]: bandit = Bandit(BetaBinomial(), BetaBinomial()) arms = np.empty(N, dtype=np.int64) rewards = np.empty(N) for t, arm_rewards in tqdm(enumerate(rewards_gen), total=N): arms[t] = bandit.assign() rewards[t] = arm_rewards[arms[t]] bandit.update(arms[t], rewards[t]) # In[17]: fig, (regret_ax, assign_ax) = plt.subplots( ncols=2, sharex=True, figsize=(2 * FIGURE_WIDTH, 6) ) plot_t = np.arange(N) + 1 regret_ax.plot( plot_t, rewards.cumsum(), c=C[0], label="Bandit" ); regret_ax.plot( plot_t, A_RATE * plot_t, c=C[1], ls='--', label="Variant A" ); regret_ax.plot( plot_t, B_RATE * plot_t, c=C[2], ls='--', label="Variant B" ); regret_ax.plot( plot_t, 0.5 * (A_RATE + B_RATE) * plot_t, c='k', ls='--', label="A/B testing" ); regret_ax.set_xlim(1, N); regret_ax.set_xlabel("Sessions"); regret_ax.set_ylabel("Rewards"); regret_ax.legend(loc=2); assign_ax.plot(plot_t, (arms == 1).cumsum() / plot_t); assign_ax.hlines( 0.5, 1, N, colors='k', linestyles='--' ); assign_ax.set_xlabel("Sessions"); assign_ax.set_ylim(0, 1); assign_ax.yaxis.set_major_formatter(pct_formatter); assign_ax.set_yticks(np.linspace(0, 1, 5)); assign_ax.set_ylabel("Traffic shown variant B"); fig.suptitle("Cumulative bandit performance"); # In[18]: fig # #### Simulating many bandits # In[19]: def simulate_bandit(rewards, n, progressbar=False, prior=BetaBinomial): bandit = Bandit(prior(), prior()) arms = np.empty(n, dtype=np.int64) obs_rewards = np.empty(n) if progressbar: rewards = tqdm(rewards, total=n) for t, arm_rewards in enumerate(rewards): arms[t] = bandit.assign() obs_rewards[t] = arm_rewards[arms[t]] bandit.update(arms[t], obs_rewards[t]) return arms, obs_rewards # In[20]: N_BANDIT = 100 arms = np.empty((N_BANDIT, N), dtype=np.int64) rewards = np.empty((N_BANDIT, N)) for i in trange(N_BANDIT): arms[i], rewards[i] = simulate_bandit( generate_rewards(A_RATE, B_RATE, N), N ) # In[21]: fig, (regret_ax, assign_ax) = plt.subplots( ncols=2, sharex=True, figsize=(2 * FIGURE_WIDTH, 6) ) plot_t = np.arange(N) + 1 cum_rewards = rewards.cumsum(axis=1) ALPHA = 0.1 low_rewards, high_rewards = np.percentile( cum_rewards, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) regret_ax.fill_between( plot_t, low_rewards, high_rewards, color=C[0], alpha=0.5, label=f"{1 - ALPHA:.0%} interval" ); regret_ax.plot( plot_t, cum_rewards.mean(axis=0), c=C[0], label="Bandit (average)" ); regret_ax.plot( plot_t, A_RATE * plot_t, c=C[1], ls='--', label="Variant A" ); regret_ax.plot( plot_t, B_RATE * plot_t, c=C[2], ls='--', label="Variant B" ); regret_ax.plot( plot_t, 0.5 * (A_RATE + B_RATE) * plot_t, c='k', ls='--', label="A/B testing" ); regret_ax.set_xlim(1, N); regret_ax.set_xlabel("Sessions"); regret_ax.set_ylabel("Rewards"); regret_ax.legend(loc=2); cum_winner = (arms == 1 * (A_RATE < B_RATE)).cumsum(axis=1) low_winner, high_winner = np.percentile( cum_winner / plot_t, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) assign_ax.fill_between( plot_t, low_winner, high_winner, color=C[0], alpha=0.5, label=f"{1 - ALPHA:.0%} interval" ); assign_ax.plot(plot_t, cum_winner.mean(axis=0) / plot_t); assign_ax.hlines( 0.5, 1, N, colors='k', linestyles='--' ); assign_ax.set_xlabel("Sessions"); assign_ax.set_ylim(0, 1); assign_ax.yaxis.set_major_formatter(pct_formatter); assign_ax.set_yticks(np.linspace(0, 1, 5)); assign_ax.set_ylabel("Traffic shown better variant"); fig.suptitle(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[22]: fig # ### A/A testing # #
# ##### A/A bandits # In[23]: def simulate_bandits(rewards_factory, n, n_bandit, progressbar=True, prior=BetaBinomial): arms = np.empty((n_bandit, n), dtype=np.int64) rewards = np.empty((n_bandit, n)) if progressbar: bandits_range = trange(n_bandit) else: bandits_range = range(n_bandit) for i in bandits_range: arms[i], rewards[i] = simulate_bandit( rewards_factory(), n, prior=prior ) return arms, rewards # In[24]: N = 2000 arms, rewards = simulate_bandits( lambda: generate_rewards(A_RATE, A_RATE, N), N, N_BANDIT ) # In[25]: fig, ax = plt.subplots() plot_t = np.arange(N) + 1 cum_winner = (arms == 0).cumsum(axis=1) low_winner, high_winner = np.percentile( cum_winner / plot_t, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) ax.fill_between( plot_t, low_winner, high_winner, color=C[0], alpha=0.5, label=f"{1 - ALPHA:.0%} interval" ); ax.plot( plot_t, cum_winner.mean(axis=0) / plot_t, label="Bandit (average)" ); ax.hlines( 0.5, 1, N, colors='k', linestyles='--' ); ax.set_xlim(1, N); ax.set_xlabel("Sessions"); ax.set_ylim(0, 1); ax.yaxis.set_major_formatter(pct_formatter); ax.set_yticks(np.linspace(0, 1, 5)); ax.set_ylabel("Traffic shown variant A"); ax.legend(loc=2) ax.set_title(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[26]: fig # In[27]: fig, ax = plt.subplots() ax.fill_between( plot_t, low_winner, high_winner, color=C[0], alpha=0.5, label=f"{1 - ALPHA:.0%} interval" ); ax.plot( plot_t, (cum_winner[0] / plot_t[np.newaxis]).T, c=C[0], alpha=0.75, label="Bandit (samples)" ); ax.plot( plot_t, (cum_winner[1::25] / plot_t[np.newaxis]).T, c=C[0], alpha=0.75 ); ax.hlines( 0.5, 1, N, colors='k', linestyles='--' ); ax.set_xlim(1, N); ax.set_xlabel("Sessions"); ax.set_ylim(0, 1); ax.yaxis.set_major_formatter(pct_formatter); ax.set_yticks(np.linspace(0, 1, 5)); ax.set_ylabel("Traffic shown variant A"); ax.legend(loc=2) ax.set_title(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[28]: fig # ### Why Bayesian? # # * Thompson sampling is [intuitive](https://en.wikipedia.org/wiki/Probability_matching) and simple to implement # * Principled randomization # * Robustness against [delayed feedback](https://papers.nips.cc/paper/4321-an-empirical-evaluation-of-thompson-sampling.pdf) # * Marketers are natural Bayesians! # #
# #
# ## Nonstationarity # In[29]: ts_buff = StringIO('2017-01-01,0.15823722496066375\n2017-01-02,0.3197075629190358\n2017-01-03,0.3234798483436177\n2017-01-04,0.2960957562324266\n2017-01-05,0.3394484805208908\n2017-01-06,0.26307306468996705\n2017-01-07,0.2152411941056972\n2017-01-08,0.2572751218902651\n2017-01-09,0.2775975046173342\n2017-01-10,0.27105952194939803\n2017-01-11,0.2733014741759613\n2017-01-12,0.3130533531082352\n2017-01-13,0.26986421792858306\n2017-01-14,0.20587503592866294\n2017-01-15,0.2398935182953528\n2017-01-16,0.30413590746716296\n2017-01-17,0.28188696514148326\n2017-01-18,0.2640367562528867\n2017-01-19,0.2872358110925165\n2017-01-20,0.2000199403392954\n2017-01-21,0.13685670124785604\n2017-01-22,0.20050726715197675\n2017-01-23,0.2694649170263376\n2017-01-24,0.2455975972915983\n2017-01-25,0.23635537081216731\n2017-01-26,0.2745161585375562\n2017-01-27,0.22948453673903202\n2017-01-28,0.1704352187678122\n2017-01-29,0.2174009034130299\n2017-01-30,0.2949041489338379\n2017-01-31,0.2772886270047901\n2017-02-01,0.2475453672914969\n2017-02-02,0.2328534036252254\n2017-02-03,0.1446477812474505\n2017-02-04,0.08934009518730321\n2017-02-05,0.10534179655795631\n2017-02-06,0.15453583608016266\n2017-02-07,0.18260393160683228\n2017-02-08,0.1761459408837018\n2017-02-09,0.20390493919056643\n2017-02-10,0.1485319565714929\n2017-02-11,0.09551479254379179\n2017-02-12,0.12306302278194745\n2017-02-13,0.1864631083043351\n2017-02-14,0.13029811209859957\n2017-02-15,0.15818475712589875\n2017-02-16,0.2136431486688817\n2017-02-17,0.16922915399121008\n2017-02-18,0.0789085489849185\n2017-02-19,0.10201159885029742\n2017-02-20,0.18461690666263175\n2017-02-21,0.17547974395696633\n2017-02-22,0.1755886325571937\n2017-02-23,0.1977380561924471\n2017-02-24,0.15764752456316364\n2017-02-25,0.10005764324588169\n2017-02-26,0.12140281018967443\n2017-02-27,0.17459904563808554\n2017-02-28,0.18254881018433064\n2017-03-01,0.18681270476318268\n2017-03-02,0.19545595243833785\n2017-03-03,0.15501659297562018\n2017-03-04,0.09654493360375771\n2017-03-05,0.13640480440661001\n2017-03-06,0.22322600431067094\n2017-03-07,0.2013130610942642\n2017-03-08,0.18090051128596485\n2017-03-09,0.2050881282108183\n2017-03-10,0.17291279128478337\n2017-03-11,0.11399393832718048\n2017-03-12,0.13907984045239682\n2017-03-13,0.1986815987891734\n2017-03-14,0.20810738033755904\n2017-03-15,0.17932058344817878\n2017-03-16,0.21712877341548695\n2017-03-17,0.16264437667608378\n2017-03-18,0.11708511183188654\n2017-03-19,0.1531345953503799\n2017-03-20,0.22914171150406415\n2017-03-21,0.20695738861492274\n2017-03-22,0.1818959177974885\n2017-03-23,0.21758551991707922\n2017-03-24,0.17048845522799058\n2017-03-25,0.08421363818920491\n2017-03-26,0.13970240873662101\n2017-03-27,0.21717004127994285\n2017-03-28,0.19992199550090606\n2017-03-29,0.18824322646109376\n2017-03-30,0.1890201420485646\n2017-03-31,0.159308725388111\n2017-04-01,0.1116037511994674\n2017-04-02,0.135421860607146\n2017-04-03,0.22730303141104877\n2017-04-04,0.18764633392579713\n2017-04-05,0.16406997551122832\n2017-04-06,0.18625234023562615\n2017-04-07,0.11761998361656631\n2017-04-08,0.038888895599110754\n2017-04-09,0.0642234663461572\n2017-04-10,0.14332411678260282\n2017-04-11,0.14716470006534413\n2017-04-12,0.13999261973198324\n2017-04-13,0.15597064011966252\n2017-04-14,0.10836231142644762\n2017-04-15,0.015978660908857092\n2017-04-16,0.0\n2017-04-17,0.1473787073411523\n2017-04-18,0.15775170990788537\n2017-04-19,0.14682688919616824\n2017-04-20,0.16450957434586042\n2017-04-21,0.1245516306003832\n2017-04-22,0.04343128878607365\n2017-04-23,0.07079376788266502\n2017-04-24,0.16143452367423006\n2017-04-25,0.17721171322175894\n2017-04-26,0.17560332794307312\n2017-04-27,0.18957472358491578\n2017-04-28,0.12886519302142294\n2017-04-29,0.05777250305537752\n2017-04-30,0.08816687999682009\n2017-05-01,0.21218109502841737\n2017-05-02,0.1933335763653688\n2017-05-03,0.1634790672737929\n2017-05-04,0.18182745523388338\n2017-05-05,0.14376528946927183\n2017-05-06,0.04665054822573621\n2017-05-07,0.0692251681194236\n2017-05-08,0.17049087090786116\n2017-05-09,0.16965083653346863\n2017-05-10,0.154179834409537\n2017-05-11,0.17656218814625155\n2017-05-12,0.13678267530030652\n2017-05-13,0.0650914640451123\n2017-05-14,0.04029496768863468\n2017-05-15,0.15666650232723306\n2017-05-16,0.17178499819610935\n2017-05-17,0.13944925646654627\n2017-05-18,0.15347650555509376\n2017-05-19,0.101542956955403\n2017-05-20,0.033216275342998175\n2017-05-21,0.06220604424332323\n2017-05-22,0.1614936163280341\n2017-05-23,0.13516173580653568\n2017-05-24,0.12698870387831182\n2017-05-25,0.13295270636851175\n2017-05-26,0.0827298983317205\n2017-05-27,0.013116574378545307\n2017-05-28,0.028024942699890462\n2017-05-29,0.13219249923347914\n2017-05-30,0.18256500621982666\n2017-05-31,0.14539860017207692\n2017-06-01,0.16920214229811165\n2017-06-02,0.11814671163198537\n2017-06-03,0.031082333287618422\n2017-06-04,0.07279223055863586\n2017-06-05,0.18813929732484316\n2017-06-06,0.17651502748695982\n2017-06-07,0.1564529342659468\n2017-06-08,0.17250133878076465\n2017-06-09,0.11408979689659071\n2017-06-10,0.027919952128545207\n2017-06-11,0.07080796915220727\n2017-06-12,0.1867155102496016\n2017-06-13,0.18620766845862533\n2017-06-14,0.16981307139750484\n2017-06-15,0.2095896012448218\n2017-06-16,0.15054509633273808\n2017-06-17,0.03521881905390191\n2017-06-18,0.04511909869080765\n2017-06-19,0.1895967209122222\n2017-06-20,0.18832052821695264\n2017-06-21,0.1901976395806365\n2017-06-22,0.1954672988134876\n2017-06-23,0.14914442292171282\n2017-06-24,0.06161283012964771\n2017-06-25,0.0833714443434129\n2017-06-26,0.17146323695758883\n2017-06-27,0.19217359251235866\n2017-06-28,0.19434600243961705\n2017-06-29,0.20411869025790347\n2017-06-30,0.14119833679708885\n2017-07-01,0.07498488095511296\n2017-07-02,0.07598228223259038\n2017-07-03,0.14243351783637362\n2017-07-04,0.1182149911895397\n2017-07-05,0.13754753078902948\n2017-07-06,0.19169440946893804\n2017-07-07,0.15673653874287508\n2017-07-08,0.11544133318297556\n2017-07-09,0.09468046795818429\n2017-07-10,0.21419407008421681\n2017-07-11,0.217208399348191\n2017-07-12,0.20782705166894014\n2017-07-13,0.21187682916835351\n2017-07-14,0.1719288775532501\n2017-07-15,0.11190091642476012\n2017-07-16,0.10520390149867681\n2017-07-17,0.18614482418077982\n2017-07-18,0.20788649203424073\n2017-07-19,0.19242562844552355\n2017-07-20,0.18683477529290943\n2017-07-21,0.15087765492825594\n2017-07-22,0.058368041345898236\n2017-07-23,0.09285520220869271\n2017-07-24,0.20125609131064945\n2017-07-25,0.19710426963730873\n2017-07-26,0.1796954713432472\n2017-07-27,0.21389130487376956\n2017-07-28,0.155830457484748\n2017-07-29,0.0670891397950647\n2017-07-30,0.08493298007309334\n2017-07-31,0.19728712928327036\n2017-08-01,0.21011965997036547\n2017-08-02,0.21457938932418097\n2017-08-03,0.21866961116082165\n2017-08-04,0.15839023631973953\n2017-08-05,0.08093782987984445\n2017-08-06,0.10826592213949007\n2017-08-07,0.21413009116885634\n2017-08-08,0.23471490457277586\n2017-08-09,0.21006314770187773\n2017-08-10,0.21676625672945274\n2017-08-11,0.16996593635173998\n2017-08-12,0.08764871666457857\n2017-08-13,0.09776693820738473\n2017-08-14,0.21911998905184601\n2017-08-15,0.21389835060672546\n2017-08-16,0.1991393701246502\n2017-08-17,0.2160969853010638\n2017-08-18,0.16868747438144047\n2017-08-19,0.0962738650413094\n2017-08-20,0.1299700737525365\n2017-08-21,0.21394831125859448\n2017-08-22,0.22868869832590824\n2017-08-23,0.23477924950023793\n2017-08-24,0.21641503151675306\n2017-08-25,0.1750140850607\n2017-08-26,0.08548254724365253\n2017-08-27,0.08059048439542175\n2017-08-28,0.20767598017400288\n2017-08-29,0.23196113932393392\n2017-08-30,0.27552832840333336\n2017-08-31,0.23695310517529766\n2017-09-01,0.18265597852768045\n2017-09-02,0.12981973428180246\n2017-09-03,0.17533870119361306\n2017-09-04,0.222843704670545\n2017-09-05,0.2336786694113182\n2017-09-06,0.2061993592518947\n2017-09-07,0.22405114199252815\n2017-09-08,0.186171323456936\n2017-09-09,0.10908838793300558\n2017-09-10,0.14356359850068268\n2017-09-11,0.17832069328841046\n2017-09-12,0.19800398228486787\n2017-09-13,0.21623165945384917\n2017-09-14,0.230930540748841\n2017-09-15,0.19730316061332062\n2017-09-16,0.13386162422042625\n2017-09-17,0.1576701441110428\n2017-09-18,0.24830449469082977\n2017-09-19,0.24703979477555058\n2017-09-20,0.2443643927176622\n2017-09-21,0.2649961471736125\n2017-09-22,0.20603501981827427\n2017-09-23,0.1382242505636861\n2017-09-24,0.18852536688961563\n2017-09-25,0.26673874908995665\n2017-09-26,0.25554304282184204\n2017-09-27,0.25827032709513287\n2017-09-28,0.2394911977932691\n2017-09-29,0.20834267121707553\n2017-09-30,0.14000327068413992\n2017-10-01,0.19595720601148525\n2017-10-02,0.24028290026964477\n2017-10-03,0.2378475472485937\n2017-10-04,0.24691921208867698\n2017-10-05,0.28347027859120916\n2017-10-06,0.23090133298313295\n2017-10-07,0.15845864398152942\n2017-10-08,0.2088258620930087\n2017-10-09,0.31347920818845204\n2017-10-10,0.26973541827002845\n2017-10-11,0.24437905150233144\n2017-10-12,0.24370304545127297\n2017-10-13,0.20837733256309746\n2017-10-14,0.13146268940348482\n2017-10-15,0.18278646184190248\n2017-10-16,0.2711214511969895\n2017-10-17,0.2575897824940145\n2017-10-18,0.2605767340527887\n2017-10-19,0.26899138886988677\n2017-10-20,0.2311238683409085\n2017-10-21,0.1560863548455848\n2017-10-22,0.21803287820826536\n2017-10-23,0.3237459208408791\n2017-10-24,0.31955826658279135\n2017-10-25,0.2921121720215884\n2017-10-26,0.32965286204443894\n2017-10-27,0.2626305926603374\n2017-10-28,0.17101414010892008\n2017-10-29,0.21521996540380414\n2017-10-30,0.3080116279116583\n2017-10-31,0.2386871424084643\n2017-11-01,0.25746974882529333\n2017-11-02,0.323577317366275\n2017-11-03,0.2805689738642105\n2017-11-04,0.23079645221541817\n2017-11-05,0.281942562379717\n2017-11-06,0.35282292872562165\n2017-11-07,0.35786509313672044\n2017-11-08,0.3526442416176183\n2017-11-09,0.365737409522266\n2017-11-10,0.3596095434141777\n2017-11-11,0.35340913370755145\n2017-11-12,0.39713484262815574\n2017-11-13,0.4588052464613859\n2017-11-14,0.4553150648641156\n2017-11-15,0.37865836873310876\n2017-11-16,0.40631341960311773\n2017-11-17,0.3726632003124572\n2017-11-18,0.32727923696432343\n2017-11-19,0.428612688592768\n2017-11-20,0.526142670565574\n2017-11-21,0.5049966874074744\n2017-11-22,0.5502104002265469\n2017-11-23,0.557065001662976\n2017-11-24,1.0\n2017-11-25,0.6387880739056028\n2017-11-26,0.6012410884746047\n2017-11-27,0.9004113463605092\n2017-11-28,0.6831565366778348\n2017-11-29,0.5035838257934657\n2017-11-30,0.4914872905408821\n2017-12-01,0.44894100071588305\n2017-12-02,0.3697053833169708\n2017-12-03,0.4294390890163758\n2017-12-04,0.5255606930234177\n2017-12-05,0.5558994544260215\n2017-12-06,0.48911489160130694\n2017-12-07,0.4822040524054663\n2017-12-08,0.4174271940622882\n2017-12-09,0.3547946908187804\n2017-12-10,0.41820760506532934\n2017-12-11,0.5361836086531263\n2017-12-12,0.48812779356448976\n2017-12-13,0.46118597217627244\n2017-12-14,0.45174044427500526\n2017-12-15,0.38211485891642627\n2017-12-16,0.3059378033439671\n2017-12-17,0.3453442949565619\n2017-12-18,0.4394182625617824\n2017-12-19,0.3851278888374398\n2017-12-20,0.32231722920347605\n2017-12-21,0.3153870096668554\n2017-12-22,0.22108029496622458\n2017-12-23,0.13014572296009652\n2017-12-24,0.07020666617108628\n2017-12-25,0.09263477142050139\n2017-12-26,0.4566239241394534\n2017-12-27,0.44405289079834964\n2017-12-28,0.42877237967269805\n2017-12-29,0.3420741781353814\n2017-12-30,0.2420617556846529\n2017-12-31,0.1577637883072383\n') ts_df = pd.read_csv( ts_buff, names=['date', 'sessions'], parse_dates=['date'] ) # `ts_df` contains Monetate's daily session traffic for 2017, normalized to have mean zero and standard deviation one. # In[30]: ts_df.head() # This traffic data exhibits multiple types of periodicity and nonstationarity. # In[31]: fig, ax = plt.subplots() ts_df.plot( 'date', 'sessions', legend=False, ax=ax ); ax.xaxis.set_major_locator(MonthLocator(interval=3)); ax.xaxis.set_major_formatter(DateFormatter('%b')); ax.set_xlabel("2017"); ax.set_yticklabels([]); ax.set_ylabel("Traffic"); # In[32]: fig # This bandit ran during the 2016. Thanksgiving was November 24, 2016; one of the the variants was a holiday promotiion and the other was not. #
# # #
# Our approach to nonstationarity, described below, is inspired by the discussion in [_Web-Scale Bayesian Click-Through Rate Prediction for Sponsored Search # Advertising in Microsoft’s Bing Search Engine_](http://quinonero.net/Publications/AdPredictorICML2010-final.pdf). # ### ~~Kalman filters~~ # #
# #
# ### Decaying evidence # # #### Bayes theorem # # $$\pi_{t + 1}(\theta\ |\ \mathcal{D}_{t + 1}, \mathcal{D}_{1, t}) \propto \color{blue}{\underbrace{f(\mathcal{D}_{t + 1}\ |\ \theta)}_{\textrm{likelihood}}} \cdot \color{red}{\underbrace{\pi_{t}(\theta\ |\ \mathcal{D}_{1,t})}_{\textrm{posterior after time } t}}$$ # #### Decayed posterior # # For $0 < \varepsilon \ll 1$ # # $$\color{purple}{\underbrace{\tilde{\pi}_{t + 1}(\theta\ |\ \mathcal{D}_{t + 1}, \mathcal{D}_{1, t})}_{\textrm{decayed posterior after time } t + 1}} \propto \left(\color{blue}{f(\mathcal{D}_{t + 1}\ |\ \theta)} \cdot \tilde{\color{red}{\pi}}_{\color{red}{t}}\color{red}{(\theta\ |\ \mathcal{D}_{1,t})}\right)^{1 - \varepsilon} \cdot \color{green}{\underbrace{\pi_0(\theta)}_{\textrm{no-data prior}}}^{\varepsilon}$$ # If we stop collecting data at time $T$, # # $$\color{purple}{\tilde{\pi}_{t}(\theta\ |\ \mathcal{D}_{1, T})} \overset{t \to \infty}{\longrightarrow} \color{green}{\pi_0(\theta)}.$$ # **Proof** # # $$ # \begin{align*} # \tilde{\pi}_{T + 1}(\theta\ |\ \mathcal{D}_{T + 1} = \emptyset, \mathcal{D}_{1, T}) # & \propto \tilde{\pi}_T (\theta\ |\ \mathcal{D}_{1, T})^{1 - \varepsilon} \cdot \pi_0(\theta)^{\varepsilon}, \\ # \tilde{\pi}_{T + 2}(\theta\ |\ \mathcal{D}_{T + 1, T + 2} = \emptyset, \mathcal{D}_{1, T}) # & \propto \left(\tilde{\pi}_{T}(\theta\ |\ \mathcal{D}_{1, T})^{1 - \varepsilon} \cdot \pi_0(\theta)^{\varepsilon}\right)^{1 - \varepsilon} \cdot \pi_0(\theta)^{\varepsilon} \\ # & = \tilde{\pi}_T \theta\ |\ \mathcal{D}_{1, T})^{(1 - \varepsilon)^2} \cdot \pi_0(\theta)^{\varepsilon \cdot (1 + (1 - \varepsilon))}, \\ # \tilde{\pi}_{T + 3}(\theta\ |\ \mathcal{D}_{T + 1, T + 3} = \emptyset, \mathcal{D}_{1, T}) # & \propto \tilde{\pi}_T (\theta\ |\ \mathcal{D}_{1, T})^{(1 - \varepsilon)^3} \cdot \pi_0(\theta)^{\varepsilon \cdot (1 + (1 - \varepsilon) + (1 - \varepsilon)^2)}, \\ # & \vdots \\ # \tilde{\pi}_{T + n}(\theta\ |\ \mathcal{D}_{T + 1, T + n} = \emptyset, \mathcal{D}_{1, T}) # & \propto \tilde{\pi}_T (\theta\ |\ \mathcal{D}_{1, T})^{(1 - \varepsilon)^n} \cdot \pi_0(\theta)^{\varepsilon \cdot \sum_{i = 0}^{n - 1} (1 - \varepsilon)^i} # \end{align*} # $$ # # Since $0 < \varepsilon < 1$, $(1 - \varepsilon)^n \overset{n \to \infty}{\longrightarrow} 0$ and # # $$ # \varepsilon \cdot \sum_{i = 0}^{n - 1} (1 - \varepsilon)^i # \overset{n \to \infty}{\longrightarrow} \frac{\varepsilon}{1 - (1 - \varepsilon)} # = 1, # $$ # # the claim is proved. ∎ # #### Decayed beta-Binomial # # If # # # $$ # \begin{align*} # \color{green}{\pi_0(r)} # & = \textrm{Beta}(\alpha_0, \beta_0) \\ # \color{purple}{\tilde{\pi}_t(r\ |\ \mathcal{D}_{1, t})} # & = \textrm{Beta}(\alpha_t, \beta_t) # \end{align*} # $$ # # and no data is observed at time $t + 1$, then # # $$\color{purple}{\tilde{\pi}_{t + 1}(r\ |\ \mathcal{D}_{t + 1} = \emptyset, \mathcal{D}_{1, t})} = \textrm{Beta}((1 - \varepsilon) \alpha_t + \varepsilon \alpha_0, (1 - \varepsilon) \beta_t + \varepsilon \beta_0).$$ # **Proof** # # Since $\pi_0(r) \propto r^{\alpha_0 - 1} (1 - x)^{\beta_0 - 1}$ and $\tilde{\pi}_t(r\ |\ \mathcal{D}_{1, t}) \propto r^{\alpha_t - 1} (1 - x)^{\beta_t - 1}$, # # $$ # \begin{align*} # \tilde{\pi}_{t + 1}(r\ |\ \mathcal{D}_{t + 1} = \emptyset, \mathcal{D}_{1, t}) # & \propto \left(r^{\alpha_t - 1} (1 - x)^{\beta_t - 1}\right)^{1 - \varepsilon} \cdot \left(r^{\alpha_0 - 1} (1 - x)^{\beta_0 - 1}\right)^\varepsilon \\ # & = r^{(1 - \varepsilon) \alpha_t - (1 - \varepsilon) + \varepsilon \alpha_0 - \varepsilon} (1 - r)^{(1 - \varepsilon) \beta_t - (1 - \varepsilon) + \varepsilon \beta_0 - \varepsilon} \\ # & = r^{(1 - \varepsilon) \alpha_t - \varepsilon \alpha_0 - 1} (1 - r)^{(1 - \varepsilon) \beta_t - \varepsilon \beta_0 - 1} # \end{align*}. # $$ # # ∎ # In[33]: fig, ax = plt.subplots() T_MAX = 100 plot_t = np.arange(T_MAX + 1) EPS = 0.1 alpha0 = 1 alpha_t = 100 ax.plot( plot_t, (1 - EPS)**plot_t * alpha_t + (1 - (1 - EPS)**plot_t * alpha0) ); ax.hlines( alpha0, 0, T_MAX, linestyles='--' ); ax.hlines( alpha_t, 0, T_MAX, linestyles='--' ); ax.set_xlim(0, T_MAX); ax.set_xticks(np.linspace(0, T_MAX, 5)); ax.set_xticklabels([ "$T" + (f"+ {tick}$" if tick > 0 else "$") for tick in np.linspace(0, T_MAX, 5, dtype=np.int64) ]); ax.set_xlabel("Time without data"); ax.set_yscale('log'); ax.set_ylabel(r"$\tilde{\alpha}_t$"); ax.set_title( "Decaying beta-Binomial distribution\n" \ r"$\alpha_T = " + f"{alpha_t:.1f}$, " \ + r"$\alpha_0 = " + f"{alpha0:.1f}$, " \ + r"$\varepsilon = " + f"{EPS:.2f}$" ); # In[34]: fig # In[35]: class DecayedBetaBinomial(BetaBinomial): def __init__(self, eps, a0=1., b0=1.): super(DecayedBetaBinomial, self).__init__(a0=a0, b0=b0) self.eps = eps self.a0 = a0 self.b0 = b0 def update(self, n, x): super(DecayedBetaBinomial, self).update(n, x) self.a = (1 - self.eps) * self.a + self.eps * self.a0 self.b = (1 - self.eps) * self.b + self.eps * self.b0 # In[36]: A_RATES = [0.05, 0.1] B_RATES = [0.1, 0.05] # In[37]: fig, ax = plt.subplots() ax.plot( [1, N], A_RATES, c=C[2], label="Variant A" ); ax.plot( [1, N], B_RATES, c=C[3], label="Variant B" ); ax.set_xlim(1, N); ax.set_xlabel("Sessions"); ax.yaxis.set_major_formatter(pct_formatter); ax.set_ylabel("Reward rate"); ax.legend(loc="upper center"); # In[38]: fig # In[39]: def generate_changing_rewards(a_rates, b_rates, n): w = np.arange(n) / n yield from zip( np.random.binomial( 1, (1 - w) * a_rates[0] + w * a_rates[1], size=n ), np.random.binomial( 1, (1 - w) * b_rates[0] + w * b_rates[1], size=n ) ) # In[40]: arms, rewards = simulate_bandits( lambda: generate_changing_rewards(A_RATES, B_RATES, N), N, N_BANDIT, ) decay_arms, decay_rewards = simulate_bandits( lambda: generate_changing_rewards(A_RATES, B_RATES, N), N, N_BANDIT, prior=lambda: DecayedBetaBinomial(0.01) ) # In[41]: fig, (traffic_ax, rate_ax) = plt.subplots( ncols=2, sharex=True, figsize=(2 * FIGURE_WIDTH, FIGURE_HEIGHT) ) plot_t = np.arange(N) + 1 cum_rewards = rewards.cumsum(axis=1) cum_winner = (arms == 1 * (A_RATE < B_RATE)).cumsum(axis=1) low_winner, high_winner = np.percentile( cum_winner / plot_t, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) cum_winner_decay = (decay_arms == 1 * (A_RATE < B_RATE)).cumsum(axis=1) traffic_ax.plot( plot_t, cum_winner.mean(axis=0) / plot_t, label="Bandit" ); traffic_ax.plot( plot_t, cum_winner_decay.mean(axis=0) / plot_t, c=C[1], label="Decayed bandit" ); traffic_ax.vlines( N // 2, 0., 1, colors='k', linestyles='--', label="Changepoint" ); traffic_ax.set_xlim(1, N); traffic_ax.set_xlabel("Sessions"); traffic_ax.set_ylim(0, 1); traffic_ax.yaxis.set_major_formatter(pct_formatter); traffic_ax.set_yticks(np.linspace(0, 1, 5)); traffic_ax.set_ylabel("Traffic shown variant A"); traffic_ax.legend(loc=2); rate_ax.plot( [1, N], A_RATES, c=C[2], label="Variant A" ); rate_ax.plot( [1, N], B_RATES, c=C[3], label="Variant B" ); rate_ax.set_xlim(1, N); rate_ax.set_xlabel("Sessions"); rate_ax.yaxis.set_major_formatter(pct_formatter); rate_ax.set_ylabel("Reward rate"); rate_ax.legend(loc="upper center"); fig.suptitle(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[42]: fig # ## Bandit Bias # #
# In[43]: N = 1000 arms, rewards = simulate_bandits( lambda: generate_rewards(A_RATE, B_RATE, N), N, N_BANDIT ) # In[44]: fig, (regret_ax, assign_ax) = plt.subplots( ncols=2, sharex=True, figsize=(2 * FIGURE_WIDTH, 6) ) plot_t = np.arange(N) + 1 cum_rewards = rewards.cumsum(axis=1) low_rewards, high_rewards = np.percentile( cum_rewards, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) regret_ax.fill_between( plot_t, low_rewards, high_rewards, color=C[0], alpha=0.5, label=f"{1 - ALPHA:.0%} interval" ); regret_ax.plot( plot_t, cum_rewards.mean(axis=0), c=C[0], label="Bandit (average)" ); regret_ax.plot( plot_t, A_RATE * plot_t, c=C[1], ls='--', label="Variant A" ); regret_ax.plot( plot_t, B_RATE * plot_t, c=C[2], ls='--', label="Variant B" ); regret_ax.plot( plot_t, 0.5 * (A_RATE + B_RATE) * plot_t, c='k', ls='--', label="A/B testing" ); regret_ax.set_xlim(1, N); regret_ax.set_xlabel("Sessions"); regret_ax.set_ylabel("Rewards"); regret_ax.legend(loc=2); cum_winner = (arms == 1 * (A_RATE < B_RATE)).cumsum(axis=1) low_winner, high_winner = np.percentile( cum_winner / plot_t, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) assign_ax.fill_between( plot_t, low_winner, high_winner, color=C[0], alpha=0.5, label=f"{1 - ALPHA:.0%} interval" ); assign_ax.plot(plot_t, cum_winner.mean(axis=0) / plot_t); assign_ax.hlines( 0.5, 1, N, colors='k', linestyles='--' ); assign_ax.set_xlabel("Sessions"); assign_ax.set_ylim(0, 1); assign_ax.yaxis.set_major_formatter(pct_formatter); assign_ax.set_yticks(np.linspace(0, 1, 5)); assign_ax.set_ylabel("Traffic shown better variant"); fig.suptitle(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[45]: fig # In[46]: fig, ax = plt.subplots() plot_t = np.arange(N) + 1 a_obs_rate = (rewards * (arms == 0)).cumsum(axis=1) \ / np.maximum(1, (arms == 0).cumsum(axis=1)) b_obs_rate = (rewards * (arms == 1)).cumsum(axis=1) \ / np.maximum(1, (arms == 1).cumsum(axis=1)) ax.plot( plot_t, a_obs_rate.mean(axis=0), c=C[0], label="Variant A (observed average)" ); ax.plot( plot_t, b_obs_rate.mean(axis=0), c=C[1], label="Variant B (observed average)" ); ax.hlines( A_RATE, 1, N, color=C[0], linestyles='--', label="Variant A (actual)" ); ax.hlines( B_RATE, 1, N, color=C[1], linestyles='--', label="Variant B (actual)" ); ax.set_xlim(1, N); ax.set_xlabel("Sessions"); ax.set_ylim( 0.5 * min(A_RATE, B_RATE), 1.5 * max(A_RATE, B_RATE) ); ax.yaxis.set_major_formatter(pct_formatter); ax.set_ylabel("Reward rate"); ax.legend(); ax.set_title(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[47]: fig # In[48]: fig, ax = plt.subplots() a_obs_rate = (rewards * (arms == 0)).cumsum(axis=1) \ / np.maximum(1, (arms == 0).cumsum(axis=1)) b_obs_rate = (rewards * (arms == 1)).cumsum(axis=1) \ / np.maximum(1, (arms == 1).cumsum(axis=1)) low, high = np.percentile( a_obs_rate, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) ax.fill_between( plot_t, low, high, color=C[0], alpha=0.5 ); low, high = np.percentile( a_obs_rate, [100 * 2 * ALPHA / 2, 100 * (1 - 2 * ALPHA / 2)], axis=0 ) ax.fill_between( plot_t, low, high, color=C[0], alpha=0.5 ); low, high = np.percentile( a_obs_rate, [100 * 4 * ALPHA / 2, 100 * (1 - 4 * ALPHA / 2)], axis=0 ) ax.fill_between( plot_t, low, high, color=C[0], alpha=0.5 ); ax.plot( plot_t, a_obs_rate.mean(axis=0), c=C[0], label="Observed average" ); ax.hlines( A_RATE, 1, N, linestyles='--', label="Actual" ); ax.set_xlim(1, N); ax.set_xlabel("Sessions"); ax.set_ylim(0., 2. * A_RATE); ax.yaxis.set_major_formatter(pct_formatter); ax.set_ylabel("Variant A reward rate"); ax.legend(loc=1); ax.set_title(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[49]: fig #
# ### Inverse propensity weighting # # $$ # \hat{r}^{\textrm{IPS}}_A = \frac{1}{n} \sum_{t = 1}^n \frac{Y_i \cdot \mathbb{I}(A_t = 1)}{P(A_t = 1\ |\ \mathcal{D}_{1, t - 1})} # $$ # # where # # $$ # A_t = \begin{cases} # 1 & \textrm{session } t \textrm{ saw variant A} \\ # 0 & \textrm{session } t \textrm{ saw variant B} # \end{cases}. # $$ # For a more complete overview of inverse propensity weighting and related approaches to causal inference/data missing not at random, [_Improving efficiency and robustness of the doubly robust estimator for a population mean with incomplete data_](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2798744/pdf/asp033.pdf) gives a good overview. # # For simplicity in this talk, we only implement inverse propensity weighting. The general results are similar (with slightly less variance) for doubly robust estimation and related methods. # **Calculating** $P(A_t = 1\ |\ \mathcal{D}_{1, t - 1})$ # In[50]: ts_fig # In[51]: a_post_alpha = (rewards * (arms == 0)).cumsum(axis=1) + 1 a_post_beta = ((1 - rewards) * (arms == 0)).cumsum(axis=1) + 1 b_post_alpha = (rewards * (arms == 1)).cumsum(axis=1) + 1 b_post_beta = ((1 - rewards) * (arms == 1)).cumsum(axis=1) + 1 # In[52]: get_ipython().run_cell_magic('time', '', 'N_MC = 500\n\na_samples = sp.stats.beta.rvs(\n a_post_alpha[..., np.newaxis],\n a_post_beta[..., np.newaxis],\n size=(N_BANDIT, N, N_MC)\n)\nb_samples = sp.stats.beta.rvs(\n b_post_alpha[..., np.newaxis],\n b_post_beta[..., np.newaxis],\n size=(N_BANDIT, N, N_MC)\n)\n\na_prob = np.empty((N_BANDIT, N))\na_prob[:, 0] = 0.5\na_prob[:, 1:] = (a_samples > b_samples).mean(axis=2)[:, :-1]\n') # In[53]: a_prob_ = (a_prob + 1e-12) # In[54]: a_ips_est = 1. / plot_t * (rewards * (arms == 0) / a_prob_).cumsum(axis=1) b_ips_est = 1. / plot_t * (rewards * (arms == 1) / (1 - a_prob_)).cumsum(axis=1) # In[55]: fig, ax = plt.subplots() plot_t = np.arange(N) + 1 a_obs_rate = (rewards * (arms == 0)).cumsum(axis=1) \ / np.maximum(1, (arms == 0).cumsum(axis=1)) b_obs_rate = (rewards * (arms == 1)).cumsum(axis=1) \ / np.maximum(1, (arms == 1).cumsum(axis=1)) ax.plot( plot_t, a_obs_rate.mean(axis=0), c=C[0], label="Variant A (observed average)" ); ax.plot( plot_t, b_obs_rate.mean(axis=0), c=C[1], label="Variant B (observed average)" ); ax.plot( plot_t, a_ips_est.mean(axis=0), c='k', label="IPS estimate" ); ax.plot( plot_t, b_ips_est.mean(axis=0), c='k' ); ax.hlines( A_RATE, 1, N, color=C[0], linestyles='--', label="Variant A (actual)" ); ax.hlines( B_RATE, 1, N, color=C[1], linestyles='--', label="Variant B (actual)" ); ax.set_xlim(1, N); ax.set_xlabel("Sessions"); ax.set_ylim( 0.5 * min(A_RATE, B_RATE), 1.75 * max(A_RATE, B_RATE) ); ax.yaxis.set_major_formatter(pct_formatter); ax.set_ylabel("Reward rate"); ax.legend(); ax.set_title(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[56]: fig # ### Bias-variance tradeoff # #
# In[57]: fig, (naive_ax, ips_ax) = plt.subplots( ncols=2, sharex=True, sharey=True, figsize=(2 * FIGURE_WIDTH, FIGURE_HEIGHT) ) low, high = np.percentile( a_obs_rate, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) naive_ax.fill_between( plot_t, low, high, color=C[0], alpha=0.5 ); naive_ax.plot( plot_t, a_obs_rate.mean(axis=0), c=C[0] ); naive_ax.hlines( A_RATE, 1, N, color=C[0], linestyles='--' ); naive_ax.set_xlim(1, N); naive_ax.set_xlabel("Sessions"); naive_ax.set_ylim(0., 3 * A_RATE); naive_ax.yaxis.set_major_formatter(pct_formatter); naive_ax.set_ylabel("Reward rate"); naive_ax.set_title("Naive estimate"); low, high = np.percentile( a_ips_est, [100 * ALPHA / 2, 100 * (1 - ALPHA / 2)], axis=0 ) ips_ax.fill_between( plot_t, low, high, color='k', alpha=0.5 ); ips_ax.plot( plot_t, a_ips_est.mean(axis=0), c='k' ); ips_ax.hlines( A_RATE, 1, N, color=C[0], linestyles='--' ); ips_ax.set_xlabel("Sessions"); ips_ax.set_title("IPS estimate"); fig.suptitle(f"Cumulative bandit performance ({N_BANDIT} simulations)"); # In[58]: fig # **Intuition:** the variant with the highest reward rate should receive the most traffic. # In[59]: obs_opp_sign = np.sign( (a_obs_rate - b_obs_rate) \ * ((arms == 0).cumsum(axis=1) - (arms == 1).cumsum(axis=1)) ) == -1 ips_opp_sign = np.sign( (a_ips_est - b_ips_est) \ * ((arms == 0).cumsum(axis=1) - (arms == 1).cumsum(axis=1)) ) == -1 # In[60]: fig, ax = plt.subplots() ax.plot( plot_t, obs_opp_sign.mean(axis=0), label="Naive estimate" ); ax.plot( plot_t, ips_opp_sign.mean(axis=0), label="IPS estimate" ); ax.set_xlim(1, N); ax.set_xlabel("Sessions"); ax.yaxis.set_major_formatter(pct_formatter); ax.set_ylabel("Probability intuition is wrong"); ax.legend(loc=1); # In[61]: fig # ## Contextual Bandits # # ### Challenges # # * Streaming feature engineering # * Untrusted feature data # * Streaming feature scaling # * Temporally shifting covariates # * Non-conjugate likelihoods # * Interpretability # ## Thank You! # #
# # # # # # # # # #
# # # #
#
monetate.com/about/careers
#
#
# # #### [@AustinRochford](https://twitter.com/AustinRochford) • [austinrochford.com](http://austinrochford.com) • [github.com/AustinRochford](http://github.com/AustinRochford) # # #### [arochford@monetate.com](mailto:arochford@monetate.com) • [austin.rochford@gmail.com](mailto:arochford@monetate.com) # In[64]: get_ipython().run_cell_magic('bash', '', 'jupyter nbconvert \\\n --to=slides \\\n --reveal-prefix=https://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.2.0/ \\\n --output=data-philly-april-2018-bayes-bandits \\\n ./Data\\ Philly\\ April\\ 2018\\ -\\ Two\\ Years\\ of\\ Bayesian\\ Bandits\\ for\\ E-Commerce.ipynb\n')