""" At the moment of writing (05.20.2019) the only build that supports tensorflow probability is the tensorflow nightly build so we will use that to install tensorflow 2.0 and tensorflow probability. """ # Install tensorflow 2.0 and tensorflow probability from the nightly build !pip install --upgrade tf-nightly-2.0-preview tfp-nightly # Imports import os import random import sys import tensorflow as tf import tensorflow_probability as tfp # By convention, we generally refer to the tf probability distributions library as tfd. tfd = tfp.distributions import seaborn as sns from matplotlib import pyplot as plt from collections import defaultdict # Import helpers file """ For some plots we need to convert tensors into numpy ndarrays. For that we use the evaluate function in the helpers.py. If you are running this in Google Colab, make sure you upload the helpers.py found in the notebooks folder to Google Colab but if you are running this in binder, you should be fine. """ from helpers import evaluate # turning of tensorflow INFO, WARNING, and ERROR messages os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # plt axis colors setup plt.rc_context({'axes.edgecolor':'orange', 'xtick.color':'red', 'ytick.color':'red', 'text.color':'orange'}) color_b = 'dodgerblue' color_o = '#FF9A13' color_sb = '#0504aa' color_do = 'darkorange' # Check the versions of tensorflow 2.0 and tensorflow probability print("Tensorflow version: {}".format(tf.__version__)) print("Tensorflow probability version: {}".format(tfp.__version__)) """ The Rademacher and Rayleigh are two types of distributions we will use to generate our samples. Rademacher: is a discrete probability distribution where a random variate X has a 50% chance of being +1 and a 50% chance of being -1. Rayleigh: is a continuous probability distribution for non-negative valued random variables. Do not worry about what probability distributions mean, we will be looking at it in the next section, for now, you can think of Rademacher as the sampler, the happy guy who tosses coins over and over again where heads represent +1 and tails -1. And Rayleigh is the guy who works at a gas/petrol station who helps you to fill the tank and notes down how much you filled your tank (eg. 1.2l, 4.5l) which are continuous values. """ # Discrete random variable rademacher = tfp.math.random_rademacher([1, 100], dtype=tf.int32) # Continuous random variable rayleigh = tfp.math.random_rayleigh([1, 100], dtype=tf.float32) # Plot discrete random variable 1 and -1 plt.title("Rademacher Discrete Random Variables") plt.hist(rademacher, color=color_b) plt.show() # Plot continuous random variable plt.title("Rayleigh Continuous Random Variables") plt.hist(rayleigh, color=color_o) plt.show() """ In a fair 6 sided dice, when you roll, each number has a chance of 1/6 = 16.7% of landing and we can show this by running long enough rolls. So in this example, we do 10000 rolls and we verify that P(X=4) = 16.7%. In short, the probability from a PMF says what chance x has. Play around with the different x values, number of rolls and sides and see what kind of probability you get and see if it makes sense. """ def single_dice(x, sides, rolls): """Calculates and prints the probability of rolls. Arguments: x (int) : is the number you want to calculate the probability for. sides (int) : Number of sides for the dice. rolls (int) : Number of rolls. Returns: a printout. """ result = roll(sides, rolls) for i in range(1, sides +1): plt.bar(i, result[i] / rolls) print("P(X = {}) = {}%".format(x, tf.divide(tf.multiply(result[x], 100), rolls))) def roll(sides, rolls): """Returns a dictionary of rolls and the sides of each roll. Arguments: sides (int) : Number of sides for the dice. rolls (int) : Number of rolls. Returns: a dictionary. """ d = defaultdict(int) # creating a default dictionary for _ in range(rolls): d[random.randint(1, sides)] += 1 # The random process return d single_dice(x=6, sides=6, rolls=10000) """ In this example, we are rolling two dices, there are ways to simplify the code so it's not this long but I wanted to show that we are rolling two dice 1000 times, and in the example we are calculating the probability of rolling x=4 and y=1, this can be easily calculated by multiplying the individual probabilities of x and y. """ def multi_dice(x, y, sides, rolls, plot=True): """Calculates the joint probability of two dice. Arguments: x (int) : is the number you want to calculate the probability for. y (int) : is the number you want to calculate the probability for. sides (int) : Number of sides for the dice. rolls (int) : Number of rolls. plot (bool) : Whether you want to plot the data or not. Returns: probabilities (float). """ result1 = roll(sides, rolls) # first result from the rolls result2 = roll(sides, rolls) # second result from the rolls prob_x = tf.divide(result1[x], rolls) # calculates the probability of x prob_y = tf.divide(result2[y], rolls) # calculates the probability of y joint_prob = tf.multiply(prob_x, prob_y) # calculates the joint probability of x&y by multiplying if plot: for i in range(1, sides +1): plt.title("Dice 1 {} Rolls".format(rolls)) plt.bar(i, result1[i] / rolls, color=color_b) plt.show() for i in range(1, sides +1): plt.title("Dice 2 {} Rolls".format(rolls)) plt.bar(i, result2[i] / rolls, color=color_o) plt.show() return prob_x, prob_y, joint_prob prob_x, prob_y, joint_prob = multi_dice(x=4, y=1, sides=6, rolls=10000, plot=True) print("P(x = {:.4}%), P(y = {:.4}%), P(x = {}; y = {}) = {:.4}%\n\n".format(tf.multiply(prob_x, 100), tf.multiply(prob_y, 100), 4, 1, tf.multiply(joint_prob, 100))) """ In our guessing game example, I told you how difficult it would be for you to guess a real number I am thinking of between 0 and 1 and below, we plot such a graph with minval of 0 and maxval of 1 and we "guess" the values 500 times and the resulting distribution is plotted. """ # Outputs random values from a uniform distribution continuous = tf.random.uniform([1, 500], minval=0, maxval=1, dtype=tf.float32) g = sns.distplot(continuous, color=color_b) plt.grid() """ Below is the same histogram plot of our continuous random variable, note that the values of y axis looks different between the seaborn distplot and the histogram plot because the sns distplot is also drawing a density plot. You can turn it off by setting ‘kde=False’ and you will get the same plot as you see below. The goal of the following plot is to show you that if you want to calculate the p(0.3) then you would need to calculate the volume of the region delta x """ n, bins, patches = plt.hist(continuous, color=color_b) patches[3].set_fc(color_o) plt.grid() """ Let's say we want to find the probability of 1.55 (p(1.5)) from a continuous distribution. We can ofcourse do the integral and find it but in tensorflow probability you have "prob()" which allows you to calculate both Probability Mass Function and Probability Density Function. For tfp.distributions.Normal "loc" is the mean and "scale" is the std deviation. Don't worry if you don't understand those, we will go through distributions in Section 9. And I recommend you come back and go through these examples again after you finish section 9. Also, there's nothing special about these numbers, play around with the scale, p(x) values and the k limits to get a better understanding. """ # creating an x axis samples = tf.range(-10, 10, 0.001) # Create a Normal distribution with mean 0 and std deviation 3 normal_distribution = tfd.Normal(loc=0., scale=3) # Then we calculate the PDFs of drawing 1.5 pdf_x = normal_distribution.prob(1.5) # We can't plot tensors so evaluate is a helper function to convert to ndarrays [pdf_x_] = evaluate([pdf_x]) # Finally, we plot both the PDF of the samples and p(1.5) plt.plot(samples, normal_distribution.prob(samples), color=color_b) plt.fill_between(samples, normal_distribution.prob(samples), color=color_b) plt.bar(1.5, pdf_x_, color=color_o) plt.grid() print("Probability of drawing 1.5 = {:.4}% from the normal distribution".format(pdf_x*100)) """ Let's start by creating three distributions for Waymo, Uber and Tesla (W, U and T) and use Bernoulli distribution since for Bernoulli distribution the outcome can only be 0 or 1, in our case, not hit and hit. Say the probabilities of getting hit for W = 0.1, U = 0.2 and T = 0.3. Also, nothing against Tesla 😉. With tfp.distributions, you don't have to create individual distributions line by line, you can specify the probabilities inside "probs" argument. This call defines three independent Bernoulli distributions, which happen to be contained in the same Python Distribution object (self_driving). The three events W, U and T are independent but we would like to specify a joint distribution to be able to calculate the marginal probability of the event [0, 0, 0], the probability of not getting hit by any three. For this we would be using a higher order distribution called Independent, which takes independent distributions and yields a new distribution. """ # Lets create three Bernoulli distributions for Waymo, Uber and Tesla self_driving = tfd.Bernoulli(probs=[.1, .2, .3]) # Individual probabilities of getting hit by W, U and T and these should match with the specified probs print("Individual probabilities: {}".format(self_driving.prob([1, 1, 1]))) # Combining the distributions to create the independent distribution self_driving_joint = tfd.Independent(self_driving, reinterpreted_batch_ndims=1) # Finally let's calculate the marginal probability of [0, 0, 0] print("Marginal Probability of event [0, 0, 0]: {:.4}".format(self_driving_joint.prob([0, 0, 0]))) """ Let's say I roll a fair die twice and obtain two numbers. X1 = result of the first roll and X2 = result of the second roll. Given that I know X1+X2 = 7, what is the probability that X1=4 or X2=4? We start by doing 100 trials and we create a Multinomial distribution where total_count= number of trials, probs = probability vectors for the events. We use Multinomial because it can have multiple outcomes and we take 2 samples from this distribution to resemble dice_1 and dice_2. """ for i in range(100): dice_roll_distribution = tfd.Multinomial(total_count=1., probs=[1/6.]*6) dice_sample = dice_roll_distribution.sample(2) # tf.where returns the index of the samples and we add 1 because indexing starts from 0 dice_1 = tf.where(dice_sample[0])[0] + 1 dice_2 = tf.where(dice_sample[1])[0] + 1 if tf.equal((dice_1 + dice_2), 7): # There are two conditions (4, 3) and (3, 4) that would result in a sum of 7 with either x1=4 or x2=4 prob_7 = (2/36.) # There are 6 combinations that would result in getting a 7 {(6,1),(5,2),(4,3),(3,4),(2,5),(1,6)} prob_4 = (6/36.) prob_4_given_7 = tf.divide(prob_7,prob_4)*100 def f1(): return tf.print("X1: {} \t X2: {} \t P(X1 or X2=4 | 7): {:.4}%".format(dice_1, dice_2, prob_4_given_7), output_stream=sys.stdout) def f2(): return None tf.cond(tf.logical_or(tf.equal(dice_1, 4), tf.equal(dice_2, 4)), f1, f2) else: continue """ Lets see an example for three events, but before that, let's break down the steps of finding the conditional probability with three events: P(a, b, c) = P(a|b, c) P(b, c) = P(a|b, c) P(b|c) P(c) Now, to the example, in a factory there are 100 units of a certain product, 5 of which are defective. We pick three units from the 100 units at random. What is the probability that none of them are defective? We start by creating three Bernoulli distributions for the three events. Event 1: The probability of choosing a good part (95/100) Event 2: The probability of choosing a 2nd good part (94/99) Event 3: The probability of choosing a 3rd good part (93/98 We can do this example without using the tfp.Independent call but it would take few more lines but this way, you can keep chaining the conditional probabilities """ # Let's start by creating three Bernoulli distributions units_distribution = tfd.Bernoulli(probs=[95/100., 94/99., 93/98.]) # Let's join these probability distributions joint_unit_distribution = tfd.Independent(units_distribution, reinterpreted_batch_ndims=1) # Finally let's calculate the probability of picking one after the other print("Probability of P(a, b, c) = P(1, 1, 1): {:.4}".format(joint_unit_distribution.prob([1, 1, 1]))) """ Let's revisit our multiple_dice example, there we calculated the probabilities of getting 4 on Dice 1 and 1 on dice two. In this example, let's expand that and see what the probability of getting 4 on dice 1 given we get 1 on dice 2 for 100 rolls. P(x = 4 | y = 1) = P(x=4, y=1) / P(y=1). """ # calculating the prob of x, y and the joint probability _, prob_y, prob_numerator = multi_dice(x=4, y=1, sides=6, rolls=100, plot=False) prob_denominator = prob_y conditional_prob = tf.divide(prob_numerator, prob_denominator) print("Probability of getting 4 on dice 1 given I get 1 on Dice 2: {}%".format(tf.multiply(conditional_prob, 100))) # Let's see the bitcoin example in code. bitcoins = [-1, -1, -1, 0, 0, 4, 4] # gives the winnings we can make for each face of the dice dice = [1/6.]*6 # probability of landing a face expectation = 0 for i in range(0, len(dice)): expectation += (dice[i] * bitcoins[i]) # summing p(x) * f(x) # Calculate the expectation print( "Expectation of the bitcoin game E(X) is : {:.4}".format(expectation)) # Let's create a uniform distribution such that the limit be between 0 and 1 uniform_distribution = tfd.Uniform(low=0.0, high=1.0) # Here we find the expectation of the uniform distribution continuous_expectation = uniform_distribution.mean() print("The Expectation of f(x) = 1 for the limit 0 to 1: {}".format(continuous_expectation)) # Plotting the expectation plt.hist(continuous_expectation, color=color_b) plt.grid() # creating an x axis from -3 to 3 with 0.001 increments x_axis = tf.range(-3, 3, 0.001) # Let's create two distributions to see how variance affects the distributions. loc(=mean) and std.deviation(=scale) distribution_1 = tfd.Normal(loc=0., scale=0.5) distribution_2 = tfd.Normal(loc=0., scale=1.5) # Distribution plot 1 plt.plot(x_axis, distribution_1.prob(x_axis), color=color_b) plt.fill_between(x_axis, distribution_1.prob(x_axis), color=color_b) # Distribution plot 2 plt.plot(x_axis, distribution_2.prob(x_axis), color=color_o) plt.fill_between(x_axis, distribution_2.prob(x_axis), color=color_o) plt.grid() print("Blue Plot Variance: {} \nOrange Plot Variance: {}".format(distribution_1.variance(), distribution_2.variance())) """ To find the covariance we will be using another tensorflow probability library called the stats. """ # We start by creating two normal distributions to represent f(x) and g(y) f_x = tf.random.normal(shape=(100, 1, 3)) g_y = tf.random.normal(shape=(100, 1, 3)) # cov[i, j] is the sample covariance between fx[:, i, j] and fy[:, i, j]. covariance = tfp.stats.covariance(f_x, g_y, sample_axis=0, event_axis=None) print("Covariance of f(x) and g(y): {}".format(covariance[0])) # correlation of f(x) correlation = tfp.stats.correlation(f_x, g_y, sample_axis=0, event_axis=None) print("Correlation of f(x) and g(y): {}".format(correlation[0])) # covariance matrix of f(x) cov_matrix = tfp.stats.covariance(f_x, sample_axis=0, event_axis=-1) print("Variance of x: {} \nCovariance Matrix for x: \n{}".format(covariance[0], cov_matrix[0])) # Create a Bernoulli distribution with a probability .5 and sample size of 1000 bernoulli_distribution = tfd.Bernoulli(probs=.5) bernoulli_trials = bernoulli_distribution.sample(1000) # Plot Bernoulli Distribution sns.distplot(bernoulli_trials, color=color_b) # Properties of Bernoulli distribution property_1 = bernoulli_distribution.prob(1) print("P(x = 1) = {}".format(property_1)) property_2 = bernoulli_distribution.prob(0) print("P(x = 0) = 1 - {} = {}".format(property_1, property_2)) print("Property three is a generalization of property 1 and 2") print("For Bernoulli distribution The expected value of a Bernoulli random variable X is p (E[X] = p)") # Variance is calculated as Var = E[(X - E[X])**2] property_5 = bernoulli_distribution.variance() print("Var(x) = {0} (1 - {0})".format(property_5)) # For a fair dice p = [1/6.]*6 # Multinoulli distribution with 60 trials and sampled once multinoulli_distribution = tfd.Multinomial(total_count=60., probs=p) multinoulli_pdf = multinoulli_distribution.sample(1) print("""Dice throw values: {} In sixty trials, index 0 represents the times the dice landed on 1 (= {} times) and index 1 represents the times the dice landed on 2 (= {} times)\n""".format(multinoulli_pdf, multinoulli_pdf[0][0], multinoulli_pdf[0][1])) g = sns.distplot(multinoulli_pdf, color=color_b) plt.grid() # We use linespace to create a range of values starting from -8 to 8 with incremants (= stop - start / num - 1) rand_x= tf.linspace(start=-8., stop=8., num=150) # Gaussian distribution with a standard deviation of 1 and mean 0 sigma = float(1.) mu = float(0.) gaussian_pdf = tfd.Normal(loc=mu, scale=sigma).prob(rand_x) # convert tensors into numpy ndarrays for plotting [rand_x_, gaussian_pdf_] = evaluate([rand_x, gaussian_pdf]) # Plot of the Gaussian distribution plt.plot(rand_x_, gaussian_pdf_, color=color_b) plt.fill_between(rand_x_, gaussian_pdf_, color=color_b) plt.grid() # We create a multivariate normal distribution with two distributions with mean 0. and std.deviation of 2. mvn = tfd.MultivariateNormalDiag(loc=[0., 0.], scale_diag = [2., 2.]) # we take 1000 samples from the distribution samples = mvn.sample(1000) # Plot of multi variate distribution g = sns.jointplot(samples[:, 0], samples[:, 1], kind='scatter', color=color_b) plt.show() # We use linespace to create a range of values starting from 0 to 4 with incremants (= stop - start / num - 1) a = tf.linspace(start=0., stop=4., num=41) # the tf.newaxis expression is used to increase the dimension of the existing array by one more dimension a = a[..., tf.newaxis] lambdas = tf.constant([1.]) # We create a Exponential distribution and calculate the PDF for a expo_pdf = tfd.Exponential(rate=1.).prob(a) # convert tensors into numpy ndarrays for plotting [a_, expo_pdf_] = evaluate([a,expo_pdf]) # Plot of Exponential distribution plt.figure(figsize=(12.5, 4)) plt.plot(a_.T[0], expo_pdf_.T[[0]][0], color=color_sb) plt.fill_between(a_.T[0], expo_pdf_.T[[0]][0],alpha=.33, color=color_b) plt.title(r"Probability density function of Exponential distribution with $\lambda$ = 1") plt.grid() # We use linespace to create a range of values starting from 0 to 4 with incremants (= stop - start / num - 1) a = tf.linspace(start=0., stop=4., num=41) # the tf.newaxis expression is used to increase the dimension of the existing array by one more dimension a = a[..., tf.newaxis] lambdas = tf.constant([1.]) # We create a Laplace distribution and calculate the PDF for a laplace_pdf = tfd.Laplace(loc=1, scale=1).prob(a) # convert tensors into numpy ndarrays for plotting [a_, laplace_pdf_] = evaluate([a, laplace_pdf]) # Plot of laplace distribution plt.figure(figsize=(12.5, 4)) plt.plot(a_.T[0], laplace_pdf_.T[[0]][0], color=color_sb) plt.fill_between(a_.T[0], laplace_pdf_.T[[0]][0],alpha=.33, color=color_b) plt.title(r"Probability density function of Laplace distribution") plt.grid() """ There is no dirac distribution in tensorflow, you will be able to plot using the fast fourier transform in the tf.signal but that would take us outside the scope of the book so we use the normal distribution to plot a dirac distribution. Play around with the delta and mu values to see how the distribution moves. """ # We use linespace to create a range of values starting from -8 to 8 with incremants (= stop - start / num - 1) rand_x= tf.linspace(start=-8., stop=8., num=150) # Gaussian distribution with a standard deviation of 1/6 and mean 2 delta = float(1./6.) mu = float(2.) dirac_pdf = tfd.Normal(loc=mu, scale=delta).prob(rand_x) # convert tensors into numpy ndarrays for plotting [rand_x_, dirac_pdf_] = evaluate([rand_x, dirac_pdf]) # Plot of the dirac distribution plt.plot(rand_x_, dirac_pdf_, color=color_sb) plt.fill_between(rand_x_, dirac_pdf_, color=color_b) plt.grid() """ We will be creating two variable with two components to plot the mixture of distributions. The tfd.MixtureSameFamily distribution implements a batch of mixture distribution where all components are from different parameterizations of the same distribution type. In our example, we will be using tfd.Categorical to manage the probability of selecting components. Followed by tfd.MultivariateNormalDiag as components. The MultivariateNormalDiag constructs Multivariate Normal distribution on R^k """ num_vars = 2 # Number of variables (`n` in formula). var_dim = 1 # Dimensionality of each variable `x[i]`. num_components = 2 # Number of components for each mixture (`K` in formula). sigma = 5e-2 # Fixed standard deviation of each component. # Set seed. tf.random.set_seed(77) # categorical distribution categorical = tfd.Categorical(logits=tf.zeros([num_vars, num_components])) # Choose some random (component) modes. component_mean = tfd.Uniform().sample([num_vars, num_components, var_dim]) # component distribution for the mixture family components = tfd.MultivariateNormalDiag(loc=component_mean, scale_diag=[sigma]) # create the mixture same family distribution distribution_family = tfd.MixtureSameFamily(mixture_distribution=categorical, components_distribution=components) # Combine the distributions mixture_distribution = tfd.Independent(distribution_family, reinterpreted_batch_ndims=1) # Extract a sample from the distribution samples = mixture_distribution.sample(1000).numpy() # Plot the distributions g = sns.jointplot(x=samples[:, 0, 0], y=samples[:, 1, 0], kind="scatter", color=color_b, marginal_kws=dict(bins=50)) plt.show() def logistic(x, phi): """Calculates the logistic function. Arguments: x (int) : is the x values. phi (int) : parameter. Returns: Values in range(0, 1) """ return 1.0 / (1.0 + tf.exp(phi * x)) # create a range of values starting from -4 to 4 with incremants (= stop - start / num - 1) x_vals = tf.linspace(start=-4., stop=4., num=100) # Create three logistic functions to see the effect of the parameter phi log_phi_1 = logistic(x_vals, 1.) log_phi_3 = logistic(x_vals, 3.) log_phi_5 = logistic(x_vals, -5.) # convert tensors into numpy ndarrays for plotting [x_vals_, log_phi_1_, log_phi_3_, log_phi_5_] = evaluate([x_vals, log_phi_1, log_phi_3, log_phi_5]) # Plot of the logistic function plt.figure(figsize = (12, 5)) plt.plot(x_vals_, log_phi_1_, label=r"$\phi = 1$") plt.plot(x_vals_, log_phi_3_, label=r"$\phi = 3$") plt.plot(x_vals_, log_phi_5_, label=r"$\phi = -5$") plt.legend() plt.grid() def softplus(x, beta): """Calculates the softplus function. Arguments: x (int) : is the number you want to calculate the probability for. beta (int) : paramter. Returns: Values in range(0, infinity). """ return tf.math.log(1 + tf.math.exp(beta * x)) # create a range of values starting from -4 to 4 with incremants (= stop - start / num - 1) x_vals = tf.linspace(start=-4., stop=4., num=100) # Create three softplus functions to see the effect of the parameter beta log_beta_1 = softplus(x_vals, 1.) log_beta_3 = softplus(x_vals, 3.) log_beta_5 = softplus(x_vals, -5.) # convert tensors into numpy ndarrays for plotting [x_vals_, log_beta_1_, log_beta_3_, log_beta_5_] = evaluate([x_vals, log_beta_1, log_beta_3, log_beta_5]) # Plot of the softplu function plt.figure(figsize = (12, 5)) plt.plot(x_vals_, log_beta_1_, label=r"$\beta = 1$") plt.plot(x_vals_, log_beta_3_, label=r"$\beta = 3$") plt.plot(x_vals_, log_beta_5_, label=r"$\beta = -5$") plt.legend() plt.grid() """ There are 2 cookie jars: Jar 1 has 30 vanilla cookies and 10 chocolate cookies Jar 2 has 20 vanilla cookies and 20 chocolate cookies You randomly pick one cookie from one randomly chosen jar. It is vanilla. What is the probability it was from Jar 1? So we are looking for P(Jar 1 | Vanilla). """ # probability of picking between Jar 1 and 2: The Prior probability p_jar_1 = 1/2 # Vanilla / total cookies: The likelihood p_vanilla_given_jar_1 = 30/(30+10) # total vanilla cookies / Total cookies: The marginal likelihood p_vanilla = (30+20) / (30+10 + 20+20) # Bayes' rule p_jar_1_given_vanilla = (p_jar_1 * p_vanilla_given_jar_1) / p_vanilla print('P(Jar 1 | Vanilla) = {}'.format(p_jar_1_given_vanilla)) """ No matter what combination of toss you get the Entropy remains the same but if you change the probability of the trial, the entropy changes, play around with the probs and see how the entropy is changing and see if the increase or decrease makes sense. """ coin_entropy = [0] # creating the coin entropy list for i in range(10, 11): coin = tfd.Bernoulli(probs=0.5) # Bernoulli distribution coin_sample = coin.sample(i) # we take 1 sample coin_entropy.append(coin.entropy()) # append the coin entropy sns.distplot(coin_entropy, color=color_o, hist=False, kde_kws={"shade": True}) # Plot of the entropy print("Entropy of 10 coin tosses in nats: {} \nFor tosses: {}".format(coin_entropy[1], coin_sample)) plt.grid() """ Note here since we are using the Bernoulli distribution to find the expectation we simply use mean, if you change the distribution, you need to find the Expectation accordingly """ def shannon_entropy_func(p): """Calculates the shannon entropy. Arguments: p (int) : probability of event. Returns: shannon entropy. """ return -tf.math.log(p.mean()) # Create a Bernoulli distribution bernoulli_distribution = tfd.Bernoulli(probs=.5) # Use TFPs entropy method to calculate the entropy of the distribution shannon_entropy = bernoulli_distribution.entropy() print("TFPs entropy: {} matches with the Shannon Entropy Function we wrote: {}".format(shannon_entropy, shannon_entropy_func(bernoulli_distribution))) # You can see below by changing the values of x we increase the entropy shannon_list = [] for i in range(1, 20): uniform_distribution = tfd.Uniform(low=0.0, high=i) # We create a uniform distribution shannon_entropy = uniform_distribution.entropy() # Calculate the entropy of the uniform distribution shannon_list.append(shannon_entropy) # Append the results to the list # Plot of Shannon Entropy plt.hist(shannon_list, color=color_b) plt.grid() def kl_func(p, q): """Calculates the KL divergence of two distributions. Arguments: p : Distribution p. q : Distribution q. Returns: the divergence value. """ r = p.loc - q.loc return (tf.math.log(q.scale) - tf.math.log(p.scale) -.5 * (1. - (p.scale**2 + r**2) / q.scale**2)) # We create two normal distributions p = tfd.Normal(loc=1., scale=1.) q = tfd.Normal(loc=0., scale=2.) # Using TFPs KL Divergence kl = tfd.kl_divergence(p, q) print("TFPs KL_Divergence: {} matches with the KL Function we wrote: {}".format(kl, kl_func(p, q))) """ The cross_entropy computes the Shannons cross entropy defined as: H[P, Q] = E_p[-log q(X)] = -int_F p(x) log q(x) dr(x) """ # We create two normal distributions p = tfd.Normal(loc=1., scale=1.) q = tfd.Normal(loc=0., scale=2.) # Calculating the cross entropy cross_entropy = q.cross_entropy(p) print("TFPs cross entropy: {}".format(cross_entropy))