from pandas import DataFrame
import numpy as np
from lets_plot import *
LetsPlot.setup_html()
# This example was found at: www.cookbook-r.com/Graphs/Plotting_distributions_(ggplot2)
np.random.seed(123)
data = DataFrame(dict(
cond=np.repeat(['A','B'], 200),
rating=np.concatenate((np.random.normal(0, 1, 200), np.random.normal(.8, 1, 200)))
))
# Basic histogram of "rating"
p = ggplot(data, aes(x='rating')) + ggsize(500, 250)
p + geom_histogram(binwidth=.5)
# Histogram overlaid with kernel density curve
# - histogram with density instead of count on y-axis
# - overlay with transparent density plot
(p
+ geom_histogram(aes(y='..density..'), binwidth=.5, colour="black", fill="white")
+ geom_density(alpha=.2, color="#de2d26", fill="#ff6666")
)
(p
+ geom_histogram(binwidth=.5, colour="black", fill="white") \
+ geom_vline(
xintercept=np.mean(data['rating']),
color="red", linetype="dashed", size=1)
)
p1 = ggplot(data, aes(x='rating', fill='cond')) + ggsize(500, 250)
# Default histogram (stacked)
p1 + geom_histogram(binwidth=.5)
# Overlaid histograms
p1 + geom_histogram(binwidth=.5, alpha=.7, position="identity")
# Interleaved histograms
p1 + geom_histogram(binwidth=.5, position="dodge")
# Density plot
p2 = ggplot(data, aes(x='rating', color='cond')) + ggsize(500, 250)
p2 + geom_density()
# Density plot with semi-transparent fill
p2 + geom_density(aes(fill='cond'), alpha=.7)
# Find the mean of each group
cdat = data.groupby(['cond'], as_index=False).mean()
cdat
cond | rating | |
---|---|---|
0 | A | 0.003787 |
1 | B | 0.685638 |
# Overlaid histograms with means
(p2
+ geom_histogram(aes(fill='cond'), alpha=.5, position="identity", size=0)
+ geom_vline(data=cdat,
mapping=aes(xintercept='rating', color='cond'),
linetype="dashed", size=1)
)
# Use frqpoly instead of histogram
(p2
+ geom_freqpoly(aes(fill='cond'))
+ geom_vline(data=cdat,
mapping=aes(xintercept='rating', color='cond'),
linetype="dashed", size=1)
)
# Density plots with means
(p2
+ geom_density()
+ geom_vline(data=cdat,
mapping=aes(xintercept='rating', color='cond'),
linetype="dashed", size=1)
)
(ggplot(data, aes(x='rating'))
+ geom_histogram(binwidth=.5, colour="black", fill="white")
+ facet_grid('cond')
)
# With mean lines, using 'cdat' computed earlier.
(ggplot(data, aes(x='rating'))
+ geom_histogram(binwidth=.5, colour="black", fill="white")
+ geom_vline(data=cdat,
mapping=aes(xintercept='rating'),
linetype="dashed", size=1, colour="red")
+ facet_grid(None, 'cond')
+ ggsize(500, 250)
)
# A basic box plot
p3 = ggplot(data, aes(x='cond', y='rating')) + ggsize(300, 200)
p3 + geom_boxplot()
# A basic box with the conditions colored
p3 + geom_boxplot(aes(fill='cond'))
# Style outliers
p3 + geom_boxplot(outlier_color='red', outlier_shape=8, outlier_size=1.5)