from texttable import Texttable
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import pandas as pd
import scipy.stats
import warnings
warnings.filterwarnings("ignore")
In this chapter, we will only be dealing with special distributions which are frequently encountered in practice.
We have seen PMF and PDF in last chapter, here we review an example of discrete uniform distribution.
unif_d = sp.stats.randint.rvs(0, 10, size = 1000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(unif_d, density = True, bins = 30)
ax.set_title('Discrete Uniform Frequency Distribution', size = 16)
plt.show()
x = np.arange(1, 11)
unif_pmf = sp.stats.randint.pmf(x, 2, 10)
fig, ax = plt.subplots(figsize = (12, 7))
ax.scatter(x, unif_pmf, s = 100, color = 'green', label = 'Low = 2, High = 9')
ax.plot(x,unif_pmf, lw = 3, color = 'k')
ax.legend(fontsize = 18, loc = 'center')
<matplotlib.legend.Legend at 0x5fee348>
The binomial experiment has 4 properties:
* A sequence of n identical trials * Only two outcomes are possible: success or failure * The probability of success p does not change from trial to trial * Trials are independent eventsThe PMF of binomial ditribution is
We use a simple example to explain the PMF.
Every month, a personal banker might meet 50 people enquiring loans, emperically 30% of them have bad credit history. So calculate probability from 1:50 people that have bad credit history.
First we can asnwer what the probability is that the banker meet exactly 14 people who have bad credit history?
n = 50
k = 14 # what is the prob that exact 14 ppl she met had bad credit history?
b = scipy.special.comb(50, 14)
p = .3
f_bino = b*p**k*(1-p)**(n-k)
print('The prob of meeting {0} persons who have bad credit history is {1:.2f}%.'.format(k, f_bino * 100))
The prob of meeting 14 persons who have bad credit history is 11.89%.
Or we can use scipy.stats.binom.pmf
. To show the probability from 1 to 50 persons.
n = 50
p = .3
bad_credit = np.arange(1, 51)
y = sp.stats.binom.pmf(bad_credit, n, p)
fig, ax = plt.subplots(figsize = (10, 6))
ax.plot(bad_credit, y, lw = 3, color = 'r', alpha = .5)
ax.set_ylim([0, .13])
ax.set_title('The probability that from 1 to 50 persons who have bad credit history', size = 16, x = .5, y = 1.02)
ax.set_xlabel('Number of Person Who Has Bad Credit History', size = 12)
ax.set_ylabel('Probability', size = 12)
plt.show()
Because there are the 30% of people who have bad credit history, thus we can see the mean of the distribution is 15 out of 50.
Next we can formulate another question using scipy.stats.binom.cdf
.
If a trade trades n times a month, he has a p chance of winning the trade, find out the probability that he can win at least k trades a month.
We can also ask what are the probabilites he wins more than k trades, or between (k1, k2) trades.
n = 20 # number of trades
p = .55 # winning odds
k = 12 # at least win k trades
k1 = 14
k2 = 4
win_less = sp.stats.binom.cdf(k, n, p)
win_more = 1- sp.stats.binom.cdf(k, n, p)
win_betw = sp.stats.binom.cdf(k1, n, p) - sp.stats.binom.cdf(k2, n, p)
table = Texttable()
table.set_cols_align([ 'c', 'c', 'c'])
table.set_cols_valign([ 'm', 'm', 'm'])
table.add_rows([['Win Less', ' Win More ', ' Win btw 4~14 '],
[ win_less, win_more, win_betw]])
print(table.draw())
+----------+------------+----------------+ | Win Less | Win More | Win btw 4~14 | +==========+============+================+ | 0.748 | 0.252 | 0.943 | +----------+------------+----------------+
What if the probability of wining changing from 0.1 to 0.8, what is the probability that he wins less than 6 trades, assuming every month he trades 20 times.
chance = np.arange(.1, .81, .05)
win_less = sp.stats.binom.cdf(6, 20, chance)
data_dict = {'win_rate':chance, 'win_less':win_less} # I am using annotation, that's why i am using pandas to locate the index
df = pd.DataFrame(data_dict)
df.plot(figsize = (10, 6))
plt.show()
To show the idea in a scatter plot.
fig, ax = plt.subplots(figsize = (12, 7))
ax.scatter(chance, win_less)
txt = 'This point means if the winning rate is 40%,\n then the probability of winning less\n than 6 trades is 25%.'
ax.annotate(txt, xy = (df.iloc[6][0], df.iloc[6][1]),
xytext = (.35, .5), weight = 'bold', color = 'Tomato', size = 14,
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'))
plt.show()
sp.stats.binom.rvs()
is the randome generator for binomial distribution.
n = 1000 # number of trials
p = 0.3 # probability of success
bino = sp.stats.binom.rvs(n, p, size = 1000)
txt = 'This line is the $y =p \cdot n$ \n and where highest draw should be.'
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(bino,bins= 80)
ax.axvline(p*n, color = 'Tomato', ls = '--', lw = 3)
ax.annotate(txt, xy = (p*n, h.max()*0.7),
xytext = (p*n ,h.max()-5), weight = 'bold',
color = 'Tomato', size = 14,
arrowprops = dict(arrowstyle = '->',
connectionstyle = 'arc3', color = 'b'))
ax.set_title('Generated Binomial R.V.s', size = 18)
plt.show()
According to the example, if success rate p=.3, total trials of 1000.Then the counts of successes have the frequency distribution in the histogram.
We can also calculate all important moments of distributions by using sp.stats.binom.stats(n, p, moments = 'mvsk')
. The text table library is used to format the output.
n = 1000 # number of trials
p = 0.3 # probability of success
bino_stats = sp.stats.binom.stats(n, p, moments = 'mvsk') # mean, variance, skewness, kurtosis
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', 'kurtosis '],
[ bino_stats[0], bino_stats[1], bino_stats[2], bino_stats[3]]])
print(table.draw())
+------+------------+------------+-----------+ | mean | variance | skewness | kurtosis | +======+============+============+===========+ | 300 | 210 | 0.028 | -0.001 | +------+------------+------------+-----------+
When n→∞ and p→0,binomial distribution converges to Poisson distribution, i.e. when n is large and p is small, we can use Poisson to approximate Binomial.
For an ordinary traders, every trade has 1/1000 probabilty to encounter a 'Black Swan' shock, a trader sets 20 trades per month, what is the probability that she will encounter 2 'Black Swan' within 5 years?
This problem can be solved by Binomial, the formulation as below
Number of Trades=20×12×5=1200P(x=2)=(12002)(11000)2(9991000)1198Of course, we can employ Poisson PMF to approximate it, the parameter for Poisson distribution is
λ=np=1200×11000=1.2that means every 5 years, there is in average 1.2 times of Black Swan shock.
P(x=2)=λke−λk!=1.22e−1.22!To use the PMF directly
sp.special.comb(1200, 2)*(1/1000)**2*(999/1000)**1198
0.21698280952603388
Or use the built-in function for Poisson sp.stats.poisson.pmf()
.
k = 2
n = 20 * 12 * 5 # 20 times per month, and 5 years span
p = 1/1000
lambdaP = p * n # lambda in Poisson
p = sp.stats.poisson.pmf(k, lambdaP)
print('The probability of having {0} BS shock(s) in a span of 5 years is {1:.2f}%.'.format(k, p*100))
The probability of having 2 BS shock(s) in a span of 5 years is 21.69%.
Suprisingly high probability of having 1 BS shock, and one BS shock could possibly wipe out the whole account. Take care, traders, the survival is pivotal!
So what's the probability of having more than k times BS shock?
k = 2
prob_sf = 1 - sp.stats.poisson.cdf(k, lambdaP)
prob_sf_inv = sp.stats.poisson.cdf(k, lambdaP)
print('The probability of having more than %1.0f BS shock in 5 years is %3.3f%%.' % (k, prob_sf*100)) # double % can escape formating
The probability of having more than 2 BS shock in 5 years is 12.051%.
poiss = sp.stats.poisson.rvs(lambdaP, size = 10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(poiss, density = True, bins = 10)
ax.set_title('Poisson Frequency Distribution', size = 16)
plt.show()
Again we can compute all most important moments
poiss_stats = sp.stats.poisson.stats(k, moments = 'mvsk') # mean, variance, skewness, kurtosis
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', 'kurtosis '],
[ poiss_stats[0], poiss_stats[1], poiss_stats[2], poiss_stats[3]]])
print(table.draw())
+------+------------+------------+-----------+ | mean | variance | skewness | kurtosis | +======+============+============+===========+ | 2 | 2 | 0.707 | 0.500 | +------+------------+------------+-----------+
The PMF of G-D is f(k)=p(1−p)k, where k is a non-negative integer. p is the probability of success, and k is the number of failures before success.
So G-D is trying to answer 'How many times you have to fail in order to embrace the first success?'
If each trade has 1/1000 chance to encounter a BS shock, what is the prob of encounter a BS shock after trade k times.
k = 10
p = 1/1000
geodist = (1 - 1/1000)**10*1/1000
geodist
print('The probability of observing exact %1.0f times of safe trading before a BS shock is %3.3f.' %(k, geodist))
The probability of observing exact 10 times of safe trading before a BS shock is 0.001.
Or use built-in sp.stats.geom.pmf()
sp.stats.geom.pmf(10, 1/1000)
0.000991035916125874
sp.stats.geom.cdf(k,p)
print('The probability of observing %1.0f or fewer than %1.0f times of safe trading before a BS shock is %3.3f.'%(k,k,geodist))
The probability of observing 10 or fewer than 10 times of safe trading before a BS shock is 0.001.
The moments and randome generator are as following
mean, var, skew, kurt = sp.stats.geom.stats(p, moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c', 'c', 'c'])
table.set_cols_valign([ "m", "m", 'm','m', 'm', 'm'])
table.add_rows([['p','k',"mean", " variance ", ' skewness ', ' kurtosis '],
[ p, k, mean, var, skew, kurt]])
print(table.draw())
+-------+----+------+------------+------------+------------+ | p | k | mean | variance | skewness | kurtosis | +=======+====+======+============+============+============+ | 0.001 | 10 | 1000 | 999000 | 2.000 | 6.000 | +-------+----+------+------------+------------+------------+
geomet = sp.stats.geom.rvs(p, size = 10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(geomet, density = True, bins = 30)
ax.set_title('Geometric Frequency Distribution', size = 16)
plt.show()
The main difference between hypergeometric and binomial is that the former sampling is not independent of each other, i.e. the sampling is without replacement.
The formula is f(x)=(Kx)(M−KN−x)(MN)
There is 100 candies in an urn, 20 are red, 80 are blue, if we take 5 of them out from it. What is the prob of having exact 4 red candies?
Solution:
(204)(801)(1005)To solve it:
C = sp.special.comb(20, 4)*sp.special.comb(80, 1) /sp.special.comb(100, 5)
print('The prob of have 4 red candies by taking 5 out is %1.6f%%.'% (C*100))
The prob of have 4 red candies by taking 5 out is 0.514826%.
Or with built-in function sp.stats.hypergeom.pmf()
# pmf(x, M, N, n, loc=0)
hgeo = sp.stats.hypergeom.pmf(4, 100, 20, 5,loc = 0) # the arg order the same as MATLAB, not recommended
hgeo
0.005148263616599428
What is the probability that at most 4 red candies are taken? i.e. The sum-up of probabilities of drawing 1, 2, 3, and 4 candies.
hgeo_cdf = sp.stats.hypergeom.cdf(4, 100, 20, 5,loc = 0) # the arg order the same as MATLAB, not recommended
print('The prob of have at most 4 red candies by taking 5 out is %1.6f%%.' %(hgeo_cdf*100))
The prob of have at most 4 red candies by taking 5 out is 99.979407%.
hgeo_rv = sp.stats.hypergeom.rvs(100, 20, 5, size = 10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(hgeo_rv, density = True)
ax.set_title('Geometric Frequency Distribution', size = 16)
s = ''' It can be interpreted as: if 100 candies in the urn,
20 are red, take 5 out of 100.
The chance of getting from 1 to 5 red candies,
is shown in the chart.
As we can see it is nearly impossible to get 4 or 5 candies.
But getting 1 red candy is the most possible outcome.'''
ax.text(1.6, .5, s, fontsize=14, color ='red')
plt.show()
mean, var, skew, kurt = sp.stats.hypergeom.stats(100, 20, 5, moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c', 'c', 'c'])
table.set_cols_valign([ "m", "m", 'm','m', 'm', 'm'])
table.add_rows([['M','k',"mean", " variance ", ' skewness ', ' kurtosis '],
[ 100, 20, mean, var, skew, kurt]])
print(table.draw())
+-----+----+------+------------+------------+------------+ | M | k | mean | variance | skewness | kurtosis | +=====+====+======+============+============+============+ | 100 | 20 | 1 | 0.768 | 0.629 | -0.010 | +-----+----+------+------------+------------+------------+
The PDF of Uniform Distribution is
f(x)=1b−aAnd its r.v. generator is one of most frequently used function in NumPy: np.random.rand()
unif = np.random.rand(10000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(unif, density = True, bins = 30)
ax.set_title('Continous Uniform Frequency Distribution', size = 16)
plt.show()
# pdf(x, loc=0, scale=1)
# cdf(x, loc=0, scale=1)
x = np.linspace(-.2, 1.2, 100)
unif_pdf = sp.stats.uniform.pdf(x)
unif_cdf = sp.stats.uniform.cdf(x)
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (17, 7))
ax[0].plot(x,unif_pdf, lw = 4, label = 'PDF of Continouse U-D')
ax[0].set_xlim([-.1, 1.1])
ax[0].set_ylim([0, 2])
ax[0].legend(fontsize = 16)
ax[1].plot(x,unif_cdf, lw = 4, label = 'CDF of Continouse U-D')
ax[1].set_xlim([-.2, 1.2])
ax[1].set_ylim([0, 2])
ax[1].legend(fontsize = 16)
plt.show()
mean, var, skew, kurt = sp.stats.uniform.stats(moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', ' kurtosis '],
[ mean, var, skew, kurt]])
print(table.draw())
+-------+------------+------------+------------+ | mean | variance | skewness | kurtosis | +=======+============+============+============+ | 0.500 | 0.083 | 0 | -1.200 | +-------+------------+------------+------------+
The most convenient method of creating normal distribution plot is to use sp.stats.norm.pdf()
.
The PDF function single normal distribution is
f(x)=1σ√2πe−12(x−μσ)2mu = 2
sigma = 1
x = np.arange(-2, 6, 0.1)
norm_pdf = sp.stats.norm.pdf(x, mu, sigma)
norm_cdf = sp.stats.norm.cdf(x, mu, sigma)
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (17, 7))
ax[0].plot(x,norm_pdf, lw = 4, label = 'PDF of Normal Distribution', ls = '--')
ax[0].legend(fontsize = 16, loc = 'lower left', framealpha=0.2)
ax[1].plot(x,norm_cdf, lw = 4, label = 'CDF of Normal Distribution')
ax[1].legend(fontsize = 16,fancybox=True, framealpha=0.5)
plt.show()
The inverse normal CDF is commonly used in calculating p-value, the plot below is useful to understand the idea.
norm_95_r = sp.stats.norm.ppf(.975) # ppf mean point percentage function, actually inverse CDF
norm_95_l = sp.stats.norm.ppf(.025)
x = np.linspace(-5, 5, 200)
y = sp.stats.norm.pdf(x)
xl = np.linspace(-5, norm_95_l, 100)
yl = sp.stats.norm.pdf(xl)
xr = np.linspace(norm_95_r, 5, 100)
yr = sp.stats.norm.pdf(xr)
fig, ax = plt.subplots(figsize = (17, 7))
ax.plot(x,y, lw = 4, label = 'PDF of Normal Distribution', ls = '-', color = 'orange')
ax.set_ylim([0, .45])
ax.fill_between(x, y, 0, alpha=0.1, color = 'blue')
ax.fill_between(xl,yl, 0, alpha=0.6, color = 'blue')
ax.fill_between(xr,yr, 0, alpha=0.6, color = 'blue')
ax.text(-.2, 0.15, '95%', fontsize = 20)
ax.text(-2.3, 0.015, '2.5%', fontsize = 12, color = 'white')
ax.text(2.01, 0.015, '2.5%', fontsize = 12, color = 'white')
ax.annotate('±%.4f' %norm_95_r, xy = (norm_95_r, 0), xytext = (-.4, .05), weight = 'bold', color = 'r',
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'), fontsize = 16)
ax.annotate('±%.4f' %norm_95_r, xy = (norm_95_l, 0), xytext = (-.4, .05), weight = 'bold', color = 'r',
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3', color = 'b'), fontsize = 16)
ax.set_title('Normal Distribution And 2.5% Shaded Area', size = 20)
plt.show()
To generate a normal distribution histogram
# rvs(loc=0, scale=1, size=1, random_state=None)
norm_rv = sp.stats.norm.rvs(mu, sigma, size = 5000)
fig, ax = plt.subplots(figsize = (12, 7))
h, bins, patches = ax.hist(norm_rv, density = True, bins = 50)
ax.set_title('Normal Frequency Distribution', size = 16)
plt.show()
mean, var, skew, kurt = sp.stats.norm.stats(mu, sigma, moments='mvsk')
table = Texttable()
table.set_cols_align([ "c", "c", 'c','c'])
table.set_cols_valign([ "m", "m", 'm','m'])
table.add_rows([["mean", " variance ", ' skewness ', ' kurtosis '],
[ mean, var, skew, kurt]])
print(table.draw())
+------+------------+------------+------------+ | mean | variance | skewness | kurtosis | +======+============+============+============+ | 2 | 1 | 0 | 0 | +------+------------+------------+------------+
The multivariate normal distribution density function is fX(x1,...,x2)=1√(2π)k|Σ|exp(−(x−μ)TΣ−1(x−μ)2)
%matplotlib inline
mu_x = 0
sigma_x = 2
mu_y = 0
sigma_y = 2
#Create grid and multivariate normal
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X, Y = np.meshgrid(x,y)
pos = np.empty(X.shape + (2,))
pos[:, :, 0] = X; pos[:, :, 1] = Y # more technical than next one
norm = sp.stats.multivariate_normal([mu_x, mu_y], [[sigma_x, 0], [0, sigma_y]]) # frozen
#Make a 3D plot
fig = plt.figure(figsize = (10, 6))
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, norm.pdf(pos),cmap='viridis',linewidth=0)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
plt.show()
#Parameters to set
mu_x = 0
sigma_x = 7
mu_y = 0
sigma_y = 15
x = np.linspace(-10,10,500)
y = np.linspace(-10,10,500)
X,Y = np.meshgrid(x,y)
pos = np.array([X.flatten(),Y.flatten()]).T # more intuitive than last one
rv = sp.stats.multivariate_normal([mu_x, mu_y], [[sigma_x, 0], [0, sigma_y]])
fig = plt.figure(figsize=(10,10))
ax0 = fig.add_subplot(111)
ax0.contourf(X, Y, rv.pdf(pos).reshape(500,500),cmap='viridis')
plt.show()
The PDF of Beta distribution is
f(x,a,b)=Γ(a+b)xa−1(1−x)b−1Γ(a)Γ(b)where 0≤x≤1 and a>0, b>0, Γ is the Gamma function.
Beta distribution is a natural good choice for priors in Bayesian econometrics since its range is bounded in unit.
params[0][0][1]
0.5
x = np.linspace(0, 1, 100)
fig = plt.figure(figsize=(9, 9))
ax = fig.add_subplot(111)
params = np.array([[[.5,.5]],
[[5,1]],
[[1,5]],
[[2,2]],
[[2,3]],
[[3,2]],
[[1,1]]])
for i in range(params.shape[0]):
beta_pdf = sp.stats.beta.pdf(x, params[i][0][0], params[i][0][1])
ax.plot(x, beta_pdf, lw = 3, label = '$a = %.1f, b = %.1f$' % (params[i][0][0], params[i][0][1]))
ax.legend()
ax.axis([0, 1, 0, 3])
(0.0, 1.0, 0.0, 3.0)
fig = plt.figure(figsize=(9, 9))
ax = fig.add_subplot(111)
x = np.linspace(0,1,100)
params = np.array([[[.5,.5]],
[[5,1]],
[[1,5]],
[[2,2]],
[[2,3]],
[[3,2]],
[[1,1]]])
for i in range(params.shape[0]):
beta_cdf = sp.stats.beta.cdf(x, params[i][0][0], params[i][0][1])
ax.plot(x, beta_cdf, lw = 3, label = '$a = %.1f, b = %.1f$' % (params[i][0][0], params[i][0][1]))
ax.legend()
ax.axis([0, 1, 0, 1])
(0.0, 1.0, 0.0, 1.0)
χ2 distribution is closely connected with normal distributions, if z has the standard normal distribution, then z2 has the χ2 distribution with d.f.=1. And futher,if
z1,z2,...,zk∼i.i.d.N(0,1)Then summation has a χ2 distribution of d.f.=k:
k∑i=0z2i∼χ2(k)k = 1
x = np.linspace(0, 5, 100)
chi_pdf = sp.stats.chi2.pdf(x, k)
fig, ax = plt.subplots(figsize = (12, 7))
ax.plot(x, chi_pdf, lw = 3, c = 'r', label = '$\chi^2$ distribution with d.f. = 1')
ax.legend(fontsize = 18)
plt.show()
fig, ax = plt.subplots(figsize = (12, 7))
for i in range(1, 6):
x = np.linspace(0, 5, 100)
chi_pdf = sp.stats.chi2.pdf(x, i)
ax.plot(x, chi_pdf, lw = 3, label = '$\chi^2$ distribution with d.f. = %.0d'%i)
ax.legend(fontsize = 12)
ax.axis([0, 5, 0, 1])
plt.show()
If U1 has a χ2 distribution with ν1 d.f. and U2 has a χ2 distribution with ν2 d.f., then
U1/ν1U2/ν2∼F(ν1,ν2)We are using F distribution for ratios of variances.
x = np.linspace(0, 5, 100)
fig, ax = plt.subplots(figsize = (12, 7))
df1 = 10
df2 = 5
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$' %(df1, df2))
df1 = 5
df2 = 10
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$ '%(df1, df2))
df1 = 8
df2 = 15
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, color = 'red', label = '$df_1 = %.d, df_2 = %.d$' %(df1, df2))
df1 = 15
df2 = 8
f_pdf = sp.stats.f.pdf(x, dfn = df1, dfd = df2)
ax.plot(x, f_pdf, lw =3, label = '$df_1 = %.d, df_2 = %.d$ '%(df1, df2))
ax.legend(fontsize = 15)
plt.show()