#!/usr/bin/env python # coding: utf-8 # Gaussian Naive Bayes # ==================== # Let's set some setting for this Jupyter Notebook. # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') from warnings import filterwarnings filterwarnings("ignore") import os os.environ['MKL_THREADING_LAYER'] = 'GNU' os.environ['THEANO_FLAGS'] = 'device=cpu' import numpy as np import pandas as pd import pymc3 as pm import seaborn as sns import matplotlib.pyplot as plt np.random.seed(12345) rc = {'xtick.labelsize': 20, 'ytick.labelsize': 20, 'axes.labelsize': 20, 'font.size': 20, 'legend.fontsize': 12.0, 'axes.titlesize': 10, "figure.figsize": [12, 6]} sns.set(rc = rc) from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" # Now, let's import the `GaussianNB` (GaussianNaiveBayes) model from the `pymc-learn` package. # In[3]: import pmlearn from pmlearn.naive_bayes import GaussianNB print('Running on pymc-learn v{}'.format(pmlearn.__version__)) # ## Step 1: Prepare the data # Use the popular iris dataset. # In[4]: # Load the data and split in train and test set from sklearn.datasets import load_iris X = load_iris().data y = load_iris().target X.shape # In[5]: from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # ## Step 2: Instantiate a model # In[6]: model = GaussianNB() # ## Step 3: Perform Inference # In[7]: model.fit(X_train, y_train, minibatch_size=20, inference_args={'n': 60000}) # ## Step 4: Diagnose convergence # In[8]: model.plot_elbo() # In[9]: pm.traceplot(model.trace); # In[10]: pm.forestplot(model.trace); # ## Step 5: Critize the model # In[11]: pm.summary(model.trace) # In[12]: pm.plot_posterior(model.trace); # ## Step 6: Use the model for prediction # In[13]: y_probs = model.predict_proba(X_test) # In[14]: y_predicted = model.predict(X_test) # In[15]: model.score(X_test, y_test) # In[16]: model.save('pickle_jar/gaussian_nb') # #### Use already trained model for prediction # In[17]: model_new = GaussianNB() # In[18]: model_new.load('pickle_jar/gaussian_nb') # In[19]: model_new.score(X_test, y_test) # ## MCMC # In[20]: model2 = GaussianNB() model2.fit(X_train, y_train, inference_type='nuts') # ### Diagnose convergence # In[21]: pm.traceplot(model2.trace); # In[22]: pm.gelman_rubin(model2.trace) # In[23]: pm.energyplot(model2.trace); # ### Criticize the model # In[24]: pm.summary(model2.trace) # In[25]: pm.plot_posterior(model2.trace); # ### Use the model for prediction # In[26]: y_predict2 = model2.predict(X_test) # In[27]: model2.score(X_test, y_test) # In[28]: model2.save('pickle_jar/gaussian_nb2') model2_new = GaussianNB() model2_new.load('pickle_jar/gaussian_nb2') model2_new.score(X_test, y_test)