%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
# plt.subplot?
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)
linear_data = np.array([1,2,3,4,5,6,7,8])
plt.plot(linear_data, '-o')
[<matplotlib.lines.Line2D at 0x1a09308e780>]
exponential_data = linear_data**2
# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(1, 2, 2)
plt.plot(exponential_data, '-o')
[<matplotlib.lines.Line2D at 0x1a09354a518>]
# plot exponential data on 1st subplot axes
plt.subplot(1, 2, 1)
plt.plot(exponential_data, '-x')
[<matplotlib.lines.Line2D at 0x1a09354e5f8>]
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')
[<matplotlib.lines.Line2D at 0x1a093a4c940>]
plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes
ax5.plot(linear_data, '-')
[<matplotlib.lines.Line2D at 0x1a0939e8a90>]
# set inside tick labels to visible
for ax in plt.gcf().get_axes():
for label in ax.get_xticklabels() + ax.get_yticklabels():
label.set_visible(True)
# necessary on some systems to update the plot
plt.gcf().canvas.draw()
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]
# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
sample_size = 10**(n+1)
sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
axs[n].hist(sample)
axs[n].set_title('n={}'.format(sample_size))
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]
for n in range(0,len(axs)):
sample_size = 10**(n+1)
sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
axs[n].hist(sample, bins=100)
axs[n].set_title('n={}'.format(sample_size))
plt.figure()
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
plt.scatter(X,Y)
<matplotlib.collections.PathCollection at 0x1a095b39cf8>
# use gridspec to partition the figure into subplots
import matplotlib.gridspec as gridspec
plt.figure()
gspec = gridspec.GridSpec(3, 3)
top_histogram = plt.subplot(gspec[0, 1:])
side_histogram = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
lower_right.scatter(X, Y)
top_histogram.hist(X, bins=100)
s = side_histogram.hist(Y, bins=100, orientation='horizontal')
# clear the histograms and plot normed histograms
top_histogram.clear()
top_histogram.hist(X, bins=100, normed=True)
side_histogram.clear()
side_histogram.hist(Y, bins=100, orientation='horizontal', normed=True)
# flip the side histogram's x axis
side_histogram.invert_xaxis()
# change axes limits
for ax in [top_histogram, lower_right]:
ax.set_xlim(0, 1)
for ax in [side_histogram, lower_right]:
ax.set_ylim(-5, 5)
%%HTML
<img src='http://educationxpress.mit.edu/sites/default/files/journal/WP1-Fig13.jpg' />
import pandas as pd
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=10000)
random_sample = np.random.random(size=10000)
gamma_sample = np.random.gamma(2, size=10000)
df = pd.DataFrame({'normal': normal_sample,
'random': random_sample,
'gamma': gamma_sample})
df.describe()
gamma | normal | random | |
---|---|---|---|
count | 10000.000000 | 10000.000000 | 10000.000000 |
mean | 2.013753 | -0.005721 | 0.497925 |
std | 1.406605 | 0.989676 | 0.288927 |
min | 0.009577 | -3.995943 | 0.000096 |
25% | 0.972541 | -0.672833 | 0.245580 |
50% | 1.696437 | -0.020877 | 0.498588 |
75% | 2.724449 | 0.652270 | 0.749074 |
max | 11.150603 | 4.290599 | 0.999745 |
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
_ = plt.boxplot(df['normal'], whis='range')
# clear the current figure
plt.clf()
# plot boxplots for all three of df's columns
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')
plt.figure()
_ = plt.hist(df['gamma'], bins=100)
import mpl_toolkits.axes_grid1.inset_locator as mpl_il
plt.figure()
plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')
# overlay axis on top of another
ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2)
ax2.hist(df['gamma'], bins=100)
ax2.margins(x=0.5)
# switch the y axis ticks for ax2 to the right side
ax2.yaxis.tick_right()
# if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers
plt.figure()
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ] )
plt.figure()
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
_ = plt.hist2d(X, Y, bins=25)
plt.figure()
_ = plt.hist2d(X, Y, bins=100)
# add a colorbar legend
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x1a096fc1400>
import matplotlib.animation as animation
n = 100
x = np.random.randn(n)
x
array([-0.03759665, 1.45863897, -1.44443803, 0.39336824, -0.71695211, -0.61147876, 0.51561 , -1.01991831, -1.87796982, -1.41604796, 0.54248567, -0.43158249, 0.47428211, -0.44332983, -0.65214202, 0.36177694, -0.18835622, 0.77766368, 1.16609066, -0.82255926, 0.41623078, -0.75633347, 0.5926188 , 0.06944215, -0.97627011, 0.9840521 , -1.02636091, -0.48459407, -0.45617385, 0.46414126, 0.28475461, 0.93557336, -0.57783629, 2.32713713, 0.89099526, 0.41492897, 2.09511888, 0.40122221, -0.9819289 , -1.13818711, 2.36315443, 0.96044105, 0.6793982 , 0.06694397, 0.22116432, -0.59966818, -0.53599242, -1.25577794, 0.59879 , -0.9605938 , -0.25479204, -0.65891967, 1.7859198 , -1.41547624, -0.94650769, -0.75785715, -0.28297719, 0.36554587, -0.76302293, 1.00364603, 0.10042745, -1.8577856 , -0.04213539, 0.33147593, 0.0903794 , 1.13818021, -0.22505728, -0.0110968 , -0.86256053, -0.06550504, -0.00484148, 2.03376449, -0.34552664, -0.08727881, 2.71992331, 0.15519167, 0.81441133, -0.69282185, 0.21311791, -0.13059414, 0.68562559, -0.38456265, 0.72397251, -0.4064959 , 1.34213928, -0.87889589, -0.70150215, 1.06750807, 1.85782635, -0.02708023, -1.13684837, -0.88282376, 0.40865931, -0.07577144, 1.42805212, -1.04774049, 0.98784109, 2.0094654 , 0.29365277, -0.44977279])
# create the function that will do the plotting, where curr is the current frame
def update(curr):
# check if animation is at the last frame, and if so, stop the animation a
if curr == n:
a.event_source.stop()
plt.cla()
bins = np.arange(-4, 4, 0.5)
plt.hist(x[:curr], bins=bins)
plt.axis([-4,4,0,30])
plt.gca().set_title('Sampling the Normal Distribution')
plt.gca().set_ylabel('Frequency')
plt.gca().set_xlabel('Value')
plt.annotate('n = {}'.format(curr), [3,27])
fig = plt.figure()
a = animation.FuncAnimation(fig, update, interval=100)
plt.figure()
data = np.random.rand(10)
plt.plot(data)
def onclick(event):
plt.cla()
plt.plot(data)
plt.gca().set_title('Event at pixels {},{} \nand data {},{}'.format(event.x, event.y, event.xdata, event.ydata))
# tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
plt.gcf().canvas.mpl_connect('button_press_event', onclick)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-909921ae9e09> in <module>() ----> 1 plt.figure() 2 data = np.random.rand(10) 3 plt.plot(data) 4 5 def onclick(event): NameError: name 'plt' is not defined
from random import shuffle
origins = ['China', 'Brazil', 'India', 'USA', 'Canada', 'UK', 'Germany', 'Iraq', 'Chile', 'Mexico']
shuffle(origins)
df = pd.DataFrame({'height': np.random.rand(10),
'weight': np.random.rand(10),
'origin': origins})
df
plt.figure()
# picker=5 means the mouse doesn't have to click directly on an event, but can be up to 5 pixels away
plt.scatter(df['height'], df['weight'], picker=5)
plt.gca().set_ylabel('Weight')
plt.gca().set_xlabel('Height')
def onpick(event):
origin = df.iloc[event.ind[0]]['origin']
plt.gca().set_title('Selected item came from {}'.format(origin))
# tell mpl_connect we want to pass a 'pick_event' into onpick when the event is detected
plt.gcf().canvas.mpl_connect('pick_event', onpick)