img

Winners Take All - The Movies

post @ endlesspint.com

In [4]:
import pymc3 as pm
import numpy as np
import pandas as pd
from theano import shared
import scipy.stats as stats
import matplotlib.pyplot as plt
import arviz as az

plt.style.use('ggplot')
In [5]:
df_movies = pd.read_excel("rundown2.xlsx", sheet_name="list")
df_movies_2018 = df_movies[df_movies.year == 2018]
df_movies_2018.head()
Out[5]:
year rank title studio gross theaters_max opening theaters_open wide close opening_of_gross year_total take_of_year prev_year_Q1_theaters_open prev_year_Q3_theaters_open prev_year_IQR_theaters_open prev_year_IQR top_23
0 2018 1 Black Panther BV 700059566 4084.0 202003951 4020.0 2019-02-16 2019-08-09 00:00:00 0.288553 11443053554 0.061178 2696.25 3771.0 0.0 above 1
1 2018 2 Avengers: Infinity War BV 678815482 4474.0 257698183 4474.0 2019-04-27 2019-09-13 00:00:00 0.379629 11443053554 0.059321 2696.25 3771.0 0.0 above 1
2 2018 3 Incredibles 2 BV 608581744 4410.0 182687905 4410.0 2019-06-15 2019-12-13 00:00:00 0.300186 11443053554 0.053184 2696.25 3771.0 0.0 above 1
3 2018 4 Jurassic World: Fallen Kingdom Uni. 417719760 4485.0 148024610 4475.0 2019-06-22 2019-10-04 00:00:00 0.354363 11443053554 0.036504 2696.25 3771.0 0.0 above 1
4 2018 5 Aquaman WB 335061807 4184.0 67873522 4125.0 2019-12-21 2019-04-04 00:00:00 0.202570 11443053554 0.029281 2696.25 3771.0 0.0 above 1
In [48]:
x = df_movies_2018.opening / 1e6
y = df_movies_2018.gross / 1e6

_, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].plot(x, y, 'C0.')
ax[0].set_xlabel('opening 2018')
ax[0].set_ylabel('total 2018', rotation=90)
# ax[0].plot(x, y_real, 'k')
az.plot_kde(y, ax=ax[1])
ax[1].set_xlabel('total 2018')
plt.tight_layout()
In [7]:
with pm.Model() as model_g:
    α = pm.Normal('α', mu=0, sd=10)
    β = pm.Normal('β', mu=0, sd=1)
    ϵ = pm.HalfCauchy('ϵ', 5)

    μ = pm.Deterministic('μ', α + β * x)
    y_pred = pm.Normal('y_pred', mu=μ, sd=ϵ, observed=y)

    trace_g = pm.sample(2000, tune=1000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [ϵ, β, α]
Sampling 2 chains: 100%|████████████████████████████████████████████████████████| 6000/6000 [05:54<00:00, 16.91draws/s]
In [8]:
varnames=['α', 'β', 'ϵ']
az.plot_trace(trace_g, varnames)
Out[8]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001822BB0D358>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822C50ADD8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822BAF80B8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822BAB5748>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822B9DB780>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822BA98B70>]],
      dtype=object)
In [9]:
pm.autocorrplot(trace_g, varnames)
Out[9]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001822AEBA048>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822B15C4E0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822B001780>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822B09DFD0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822B1CA240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822B026E10>]],
      dtype=object)
In [10]:
pm.summary(trace_g, varnames)
Out[10]:
mean sd mc_error hpd_2.5 hpd_97.5 n_eff Rhat
α 12.729567 4.172058 0.088770 5.037160 20.911477 2720.990831 0.999754
β 2.904945 0.083367 0.001595 2.742058 3.067755 2796.117603 0.999836
ϵ 35.541795 2.569916 0.050670 30.855135 40.846955 2666.898836 0.999751
In [54]:
plt.figure(figsize=(8,8))
plt.plot(x, y, 'b.');
alpha_m = trace_g['α'].mean()
beta_m = trace_g['β'].mean()
plt.plot(x, alpha_m + beta_m * x, c='k', label='y = {:.2f} + {:.2f} * x'.format(alpha_m, beta_m))
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.legend(loc=2, fontsize=14)
Out[54]:
<matplotlib.legend.Legend at 0x182327ca2e8>
In [12]:
ppc = pm.sample_posterior_predictive(trace_g, samples=1000, model=model_g)
100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:01<00:00, 724.01it/s]
In [13]:
idx = np.argsort(x)
x_ord = x[idx]
In [53]:
plt.figure(figsize=(8,8))
plt.plot(x, y, 'b.')
plt.plot(x, alpha_m + beta_m * x, c='k', label='y = {:.2f} + {:.2f} * x'.format(alpha_m, beta_m))

sig0 = pm.hpd(ppc['y_pred'], alpha=0.5)[idx]
sig1 = pm.hpd(ppc['y_pred'], alpha=0.05)[idx]
plt.fill_between(x_ord, sig0[:,0], sig0[:,1], color='gray', alpha=1)
plt.fill_between(x_ord, sig1[:,0], sig1[:,1], color='gray', alpha=0.5)

plt.xlabel('opening 2018', fontsize=16)
plt.ylabel('total 2018', fontsize=16, rotation=90)
Out[53]:
Text(0, 0.5, 'total 2018')

standardize

In [52]:
x_st = ( x - x.mean() ) / x.std()
y_st = ( y - y.mean() ) / y.std()

_, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].plot(x_st, y_st, 'C0.')
ax[0].set_xlabel('opening 2018 (st)')
ax[0].set_ylabel('total 2018 (st)', rotation=90)
# ax[0].plot(x, y_real, 'k')
az.plot_kde(y_st, ax=ax[1])
ax[1].set_xlabel('total 2018')
plt.tight_layout()
In [26]:
with pm.Model() as model_g_st:
    α = pm.Normal('α', mu=0, sd=10)
    β = pm.Normal('β', mu=0, sd=1)
    ϵ = pm.HalfCauchy('ϵ', 5)

    μ = pm.Deterministic('μ', α + β * x_st)
    y_pred = pm.Normal('y_pred', mu=μ, sd=ϵ, observed=y_st)

    trace_g_st = pm.sample(2000, tune=1000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [ϵ, β, α]
Sampling 2 chains: 100%|████████████████████████████████████████████████████████| 6000/6000 [03:02<00:00, 32.82draws/s]
In [27]:
varnames=['α', 'β', 'ϵ']
az.plot_trace(trace_g_st, varnames)
Out[27]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001822E00B630>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822E02AC50>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822E053C88>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822E07DCC0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822E0A8CF8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001823172B908>]],
      dtype=object)
In [28]:
pm.autocorrplot(trace_g_st, varnames)
Out[28]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001822DE39B38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822DED3470>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822DEFC400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822DC66390>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001822DC8E320>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001822DCB82B0>]],
      dtype=object)
In [29]:
pm.summary(trace_g_st, varnames)
Out[29]:
mean sd mc_error hpd_2.5 hpd_97.5 n_eff Rhat
α -0.000343 0.029351 0.000388 -0.057351 0.057744 6008.910937 0.999751
β 0.958174 0.029923 0.000324 0.898988 1.014944 6398.802390 0.999852
ϵ 0.288903 0.021100 0.000288 0.247658 0.328762 5667.138043 0.999754
In [50]:
plt.figure(figsize=(8,8))
plt.plot(x_st, y_st, 'b.');
alpha_m_st = trace_g_st['α'].mean()
beta_m_st = trace_g_st['β'].mean()
plt.plot(x_st, alpha_m_st + beta_m_st * x_st, c='k', label='y_st = {:.2f} + {:.2f} * x_st'.format(alpha_m_st, beta_m_st))
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.legend(loc=2, fontsize=14)
Out[50]:
<matplotlib.legend.Legend at 0x182326cff28>
In [40]:
np.mean((y_hat, y_hat_dst), axis=0)
Out[40]:
array([599.45059209, 760.9561736 , 543.4367262 , 442.91795765,
       210.49087119, 377.62044197, 209.61897841, 191.24452255,
       233.51195557, 161.73722425, 138.09445298, 258.47479038,
       246.39763982, 176.74840289, 116.21593739, 159.25042738,
        90.54277561,  81.88083301, 141.48199626, 193.93144759,
       234.69898504, 145.32712614, 134.3226809 , 134.77700807,
        76.46077653, 115.02352577, 169.700999  , 116.8287768 ,
        86.19526785,  64.44196249, 118.09581346, 117.34606287,
       109.7209224 , 125.48627101,  84.96053855,  14.59623046,
        63.2805185 ,  80.49618955,  15.19238987,  92.71034988,
        64.05004325,  62.98014501,  53.05362483,  90.82661409,
        85.88813477,  99.44887498,  55.72754109,  73.27760682,
        95.20109927,  82.20038619,  83.74802509,  72.68644387,
        57.0124148 ,  60.09871753,  65.53417808,  85.09725254,
        56.32719194,  68.7863471 ,  45.11698989,  60.15244907,
        36.19423372,  62.53962698,  64.7924224 ,  59.49130085,
        59.52841909,  57.76264388,  60.0824087 ,  53.03315471,
        63.45970967,  44.41941071,  57.86899051,  49.51311384,
        67.80123211,  45.57119816,  47.7585147 ,  32.48708506,
        53.39934927,  50.07272629,  53.42652967,  43.68788575,
        52.59261639,  14.89196128,  51.39500822,  48.7641839 ,
        45.77309525,  18.36565946,  53.52015461,  47.31424135,
        40.33320371,  57.69164366,  35.1594209 ,  46.64387158,
        14.43304364,  13.73328956,  43.85804095,  15.1518614 ,
        20.48161039,  14.79440703,  40.65786065,  43.83217998])
In [42]:
y_hat = alpha_m + beta_m * x
y_hat_dst = ((alpha_m_st + beta_m_st * x_st) * y.std()) + y.mean()

# mean absolute percentage diff bt standardized, non-standardized models
# not interested in identifying superior accuracy given ease of use of non-standardized model
np.mean(np.abs(y_hat - y_hat_dst) / np.mean((y_hat, y_hat_dst), axis=0))
Out[42]:
0.03168624102777339
In [51]:
plt.figure(figsize=(8,8))
plt.plot(x, y, 'b.');

alpha_m = trace_g['α'].mean()
beta_m = trace_g['β'].mean()
plt.plot(x, alpha_m + beta_m * x, c='k', label='y = {:.2f} + {:.2f} * x'.format(alpha_m, beta_m))
plt.plot(x, y_hat_dst, label='y de-standardized')
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.legend(loc=2, fontsize=14)
Out[51]:
<matplotlib.legend.Legend at 0x18232720908>
In [ ]:
 

need more robust, Student-t approach, or polynomial approach to capture/weigh clustered mass

In [13]:
with pm.Model() as model_t:
    alpha = pm.Normal('alpha', mu=0, sd=10)
    beta = pm.Normal('beta', mu=0, sd=1)
    epsilon = pm.HalfCauchy('epsilon', 5)
    nu = pm.Deterministic('nu', pm.Exponential('nu_', 1/29) + 1)
    
    y_pred = pm.StudentT('y_pred', mu=alpha + beta * x, sd=epsilon, nu=nu, observed=y)

#     start = pm.find_MAP()
#     step = pm.NUTS(scaling=start) 
    trace_t = pm.sample(2000, tune=1000)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu_, epsilon, beta, alpha]
Sampling 2 chains: 100%|████████████████████████████████████████████████████████| 6000/6000 [09:09<00:00, 10.92draws/s]
In [15]:
varnames_t = ['alpha', 'beta', 'epsilon', 'nu']
pm.traceplot(trace_t, varnames_t)
Out[15]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001BC22B1C198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001BC22916400>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001BC227A0EF0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001BC226FE898>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001BC228A2DA0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001BC22733128>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001BC221C9240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001BC22615A58>]],
      dtype=object)
In [17]:
pm.summary(trace_t, varnames_t)
Out[17]:
mean sd mc_error hpd_2.5 hpd_97.5 n_eff Rhat
alpha 9.999708 2.658146 0.060879 5.083106 15.354958 2100.121507 1.000022
beta 2.620033 0.073872 0.001502 2.476216 2.774543 2385.146731 0.999750
epsilon 15.716498 2.644425 0.061735 10.747377 20.946899 2135.220818 0.999810
nu 1.748127 0.482303 0.010698 1.002571 2.657903 2263.597095 0.999767
In [18]:
df_movies_2017 = df_movies[df_movies.year == 2017]
df_movies_2017.head()
Out[18]:
year rank title studio gross theaters_max opening theaters_open wide close opening_of_gross year_total take_of_year prev_year_Q1_theaters_open prev_year_Q3_theaters_open prev_year_IQR_theaters_open prev_year_IQR top_23
100 2017 1 Star Wars: The Last Jedi BV 620181382 4232.0 220009584 4232.0 2019-12-15 2019-04-19 00:00:00 0.354750 10917015639 0.056809 2870.25 3775.75 0.0 above 1
101 2017 2 Beauty and the Beast (2017) BV 504014165 4210.0 174750616 4210.0 2019-03-17 2019-07-13 00:00:00 0.346718 10917015639 0.046168 2870.25 3775.75 0.0 above 1
102 2017 3 Wonder Woman WB 412563408 4165.0 103251471 4165.0 2019-06-02 2019-11-09 00:00:00 0.250268 10917015639 0.037791 2870.25 3775.75 0.0 above 1
103 2017 4 Jumanji: Welcome to the Jungle Sony 404515480 3849.0 36169328 3765.0 2019-12-20 2019-05-31 00:00:00 0.089414 10917015639 0.037054 2870.25 3775.75 1.0 inside 1
104 2017 5 Guardians of the Galaxy Vol. 2 BV 389813101 4347.0 146510104 4347.0 2019-05-05 2019-09-21 00:00:00 0.375847 10917015639 0.035707 2870.25 3775.75 0.0 above 1
In [24]:
x_17 = df_movies_2017.opening / 1e6
y_17 = df_movies_2017.gross / 1e6

_, ax = plt.subplots(1, 2, figsize=(8, 4))
ax[0].plot(x_17, y_17, 'C0.')
ax[0].set_xlabel('opening 2017')
ax[0].set_ylabel('total 2017', rotation=90)
# ax[0].plot(x, y_real, 'k')
az.plot_kde(y, ax=ax[1])
ax[1].set_xlabel('total 2017')
plt.tight_layout()
In [39]:
alpha_mt = trace_t['alpha'].mean()
beta_mt = trace_t['beta'].mean()

plt.figure(figsize=(8,8))
plt.plot(x_17, y_17, '.')
plt.plot(x_17, alpha_m + beta_m * x_17, label="Gaussian")
plt.plot(x_17, alpha_mt + beta_mt * x_17, label='robust')

plt.legend(loc=2, fontsize=12)
plt.tight_layout()
In [54]:
def rmse(y_s, y_hat):
    return np.sqrt(np.mean(np.square(y_s - y_hat)))
    
rmse(y_17, alpha_m + beta_m * x_17), rmse(y_17, alpha_mt + beta_mt * x_17)
Out[54]:
(45.115778410027026, 45.68096376701758)
In [71]:
df_movies.pivot_table('year_total', 'year').sort_index(ascending=False).head(12)
Out[71]:
year_total
year
2018 11443053554
2017 10917015639
2016 11020669954
2015 11056139869
2014 9962444806
2013 10639099916
2012 10684309637
2011 9720900574
2010 9965289688
2009 10779498528
2008 9329189029
2007 9327619301
In [80]:
not_training = df_movies.year != 2018
ten_bill_plus = df_movies.year >= 2009

df_movies_test = df_movies[not_training & ten_bill_plus]
df_movies_test.describe()
Out[80]:
year rank gross theaters_max opening theaters_open opening_of_gross year_total take_of_year prev_year_Q1_theaters_open prev_year_Q3_theaters_open prev_year_IQR_theaters_open top_23
count 900.000000 900.00000 9.000000e+02 900.000000 9.000000e+02 900.000000 900.000000 9.000000e+02 900.000000 900.000000 900.000000 900.000000 900.000000
mean 2013.000000 50.50000 9.880337e+07 3141.438889 2.965413e+07 2928.901111 0.306564 1.052726e+10 0.009384 2692.250000 3535.527778 0.487778 0.230000
std 2.583425 28.88212 9.599552e+07 678.147361 3.156643e+07 1046.991487 0.121435 4.787958e+08 0.008982 144.901054 124.706744 0.500129 0.421066
min 2009.000000 1.00000 2.078370e+07 776.000000 3.161000e+04 1.000000 0.000607 9.720901e+09 0.001904 2368.000000 3333.500000 0.000000 0.000000
25% 2011.000000 25.75000 4.023636e+07 2807.750000 1.237883e+07 2718.000000 0.256189 9.965290e+09 0.003863 2686.500000 3470.000000 0.000000 0.000000
50% 2013.000000 50.50000 6.420930e+07 3171.000000 1.985234e+07 3115.500000 0.320339 1.068431e+10 0.006075 2727.000000 3506.500000 0.000000 0.000000
75% 2015.000000 75.25000 1.175849e+08 3602.250000 3.426934e+07 3558.750000 0.380311 1.091702e+10 0.011282 2769.750000 3559.500000 1.000000 0.000000
max 2017.000000 100.00000 9.366622e+08 4535.000000 2.479667e+08 4529.000000 0.633332 1.105614e+10 0.084719 2870.250000 3775.750000 1.000000 1.000000
In [81]:
x_test = df_movies_test.opening / 1e6
y_test = df_movies_test.gross / 1e6

_, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].plot(x_test, y_test, 'C0.')
ax[0].set_xlabel('opening 2009 - 17')
ax[0].set_ylabel('total 2009 - 17', rotation=90)
# ax[0].plot(x, y_real, 'k')
az.plot_kde(y, ax=ax[1])
ax[1].set_xlabel('total 2009 - 17')
plt.tight_layout()
In [82]:
plt.plot(x_test, y_test, '.')
plt.plot(x_test, alpha_m + beta_m * x_test, label="Gaussian")
plt.plot(x_test, alpha_mt + beta_mt * x_test, label='robust')

plt.legend(loc=2, fontsize=12)
plt.tight_layout()
In [83]:
rmse(y_test, alpha_m + beta_m * x_test), rmse(y_test, alpha_mt + beta_mt * x_test)
Out[83]:
(44.557219486395766, 45.5443973651019)
In [85]:
df_movies_2019 = pd.read_excel("../../20190520_wta_movies/rundown2.xlsx", sheet_name="2019_YTD (Oct 16)")
df_movies_2019 = df_movies_2019[df_movies_2019.year == 2019]
print(df_movies_2019.shape)
df_movies_2019.head()
(100, 10)
Out[85]:
year rank title studio gross theaters_max opening theaters_open wide close
0 2019.0 1 Avengers: Endgame BV 858373000.0 4662 357115007 4662 2019-04-26 00:00:00 2019-09-12 00:00:00
1 2019.0 2 The Lion King (2019) BV 542461804.0 4802 191770759 4725 2019-07-19 00:00:00 -
2 2019.0 3 Toy Story 4 BV 433586786.0 4575 120908065 4575 2019-06-21 00:00:00 -
3 2019.0 4 Captain Marvel BV 426829839.0 4310 153433423 4310 2019-03-08 00:00:00 2019-07-04 00:00:00
4 2019.0 5 Spider-Man: Far from Home Sony 390470129.0 4634 92579212 4634 2019-07-02 00:00:00 -
In [90]:
df_movies_2019_closed = df_movies_2019[df_movies_2019.close != "-"]
print(df_movies_2019_closed.shape)
df_movies_2019_closed.head(10)
(70, 10)
Out[90]:
year rank title studio gross theaters_max opening theaters_open wide close
0 2019.0 1 Avengers: Endgame BV 858373000.0 4662 357115007 4662 2019-04-26 00:00:00 2019-09-12 00:00:00
3 2019.0 4 Captain Marvel BV 426829839.0 4310 153433423 4310 2019-03-08 00:00:00 2019-07-04 00:00:00
8 2019.0 9 Us Uni. 175005930.0 3743 71117625 3741 2019-03-22 00:00:00 2019-06-06 00:00:00
10 2019.0 11 John Wick: Chapter 3 - Parabellum LG/S 171015687.0 3850 56818067 3850 2019-05-17 00:00:00 2019-09-12 00:00:00
11 2019.0 12 How to Train Your Dragon: The Hidden World Uni. 160799505.0 4286 55022245 4259 2019-02-22 00:00:00 2019-06-13 00:00:00
12 2019.0 13 The Secret Life of Pets 2 Uni. 158257265.0 4564 46652680 4561 2019-06-07 00:00:00 2019-09-19 00:00:00
13 2019.0 14 Pokemon Detective Pikachu WB 144105346.0 4248 54365242 4202 2019-05-10 00:00:00 2019-08-15 00:00:00
14 2019.0 15 Shazam! WB (NL) 140371656.0 4306 53505326 4217 2019-04-05 00:00:00 2019-07-25 00:00:00
16 2019.0 17 Dumbo (2019) BV 114766307.0 4259 45990748 4259 2019-03-29 00:00:00 2019-08-08 00:00:00
17 2019.0 18 Glass Uni. 111035005.0 3844 40328920 3841 2019-01-18 00:00:00 2019-04-04 00:00:00
In [91]:
x_19 = df_movies_2019_closed.opening / 1e6
y_19 = df_movies_2019_closed.gross / 1e6

_, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].plot(x_19, y_19, 'C0.')
ax[0].set_xlabel('opening 2019')
ax[0].set_ylabel('total 2019', rotation=90)
# ax[0].plot(x, y_real, 'k')
az.plot_kde(y, ax=ax[1])
ax[1].set_xlabel('total 2019')
plt.tight_layout()
In [92]:
plt.figure(figsize=(8,8))
plt.plot(x_19, y_19, '.')
plt.plot(x_19, alpha_m + beta_m * x_19, label="Gaussian")

plt.legend(loc=2, fontsize=12)
plt.tight_layout()
In [99]:
jw3_opening = df_movies_2019_closed.loc[10, 'opening']

alpha_m + beta_m * jw3_opening
Out[99]:
164981834.87753886
In [100]:
jw3_total = df_movies_2019_closed.loc[10, 'gross']
jw3_total
Out[100]:
171015687.0
In [ ]: