# Necessary import evil
from physt import h1, binnings
import numpy as np
import matplotlib.pyplot as plt
# Some data
np.random.seed(42)
heights1 = np.random.normal(169, 10, 100000)
heights2 = np.random.normal(180, 6, 100000)
numbers = np.random.rand(100000)
X = [int(x) for x in np.logspace(0, 4, 50)]
algos = binnings.bincount_methods
Ys = { algo: [] for algo in algos}
for x in X:
ex_dataset = np.random.exponential(1, x)
for algo in algos:
Ys[algo].append(binnings.ideal_bin_count(ex_dataset, algo))
figure, axis = plt.subplots(figsize=(8, 8))
for algo in algos:
if algo == "default":
axis.plot(X, Ys[algo], ":.", label=algo, alpha=0.5, lw=2)
else:
axis.plot(X, Ys[algo], "-", label=algo, alpha=0.5, lw=2)
axis.set_xscale("log")
axis.set_yscale("log")
axis.set_xlabel("Sample size")
axis.set_ylabel("Bin count")
axis.legend(loc=2);
Uses numpy.logscale to create bins.
figure, axis = plt.subplots(1, 2, figsize=(10, 4))
hist1 = h1(numbers, "exponential", bin_count=10, range=(0.0001, 1))
hist1.plot(color="green", ax=axis[0])
hist1.plot(density=True, errors=True, ax=axis[1])
axis[0].set_title("Absolute scale")
axis[1].set_title("Log scale")
axis[1].set_xscale("log");
Useful for integer values (or something you want to round to integers), creates bins of width=1 around integers (i.e. 0.5-1.5, ...)
# Sum of two dice (should be triangle, right?)
dice = np.floor(np.random.rand(10000) * 6) + np.floor(np.random.rand(10000) * 6) + 2
h1(dice, "integer").plot(ticks="center", density=True);
Based on quantiles, this binning results in all bins containing roughly the same amount of observances.
figure, axis = plt.subplots(1, 2, figsize=(10, 4))
hist2 = h1(heights1, "quantile", bin_count=40)
hist2.plot(ax=axis[0]);
hist2.plot(density=True, ax=axis[1]);
axis[0].set_title("Frequencies")
axis[1].set_title("Density");
hist2
Histogram1D(bins=(40,), total=100000, dtype=int32)
figure, axis = plt.subplots()
h1(heights1, "quantile", bin_count=10).plot(alpha=0.3, density=True, ax=axis, label="Quantile based")
h1(heights1, 10).plot(alpha=0.3, density=True, ax=axis, color="green", label="Equal spaced")
axis.legend(loc=2);
This binning is useful if you want "human-friendly" bin intervals.
hist_fixed = h1(heights1, "fixed_width", bin_width=3)
hist_fixed.plot()
hist_fixed
Histogram1D(bins=(31,), total=100000, dtype=int32)
The width and alignment of bins is guessed from the data with an approximate number of bins as (optional) parameter.
pretty = h1(heights1, "pretty", bin_count=15)
pretty.plot()
pretty
Histogram1D(bins=(19,), total=100000, dtype=int32)
Astropy includes its histogramming tools. If this package is available, we reuse its binning methods. These include:
See http://docs.astropy.org/en/stable/visualization/histogram.html for more details.
middle_sized = np.random.normal(180, 6, 5000)
for n in ["blocks", "scott", "knuth", "freedman"]:
algo = "{0}".format(n)
hist = h1(middle_sized, algo, name=algo)
hist.plot(density=True)