sudo apt-get install ipython ipython-notebook
. The version of IPython needs to be 1.0 or later.julia
in the command line to start the Julia REPL, and install IJulia with: Pkg.add("IJulia")
.Pkg.add("PyPlot")
to install PyPlot, which is a plotting package for Julia based on Python's MatplotlibPkg.update()
, then retry it.ipython notebook --profile julia
(a window will open in your web browser).More comprehensive installation instructions can be found at https://github.com/JuliaLang/IJulia.jl, as well as https://github.com/stevengj/PyPlot.jl. If you are not using the recommended VM, stackoverflow and google will be your best friends.
(borrowed heavily from the installation instructions in CS194-16: Introduction to Data Science http://amplab.github.io/datascience-sp14/setup.html)
Note: While you should also be able to set up a similar environment on your Mac or PC without needing a Virtual Machine, the course staff will not support such configurations - so you’re on your own if you choose to go that route!
sudo bash setup.bash
. Make sure you are in the same directory as setup.bash
before executing this command. Enter the same password again to install lots of packages.julia
in the command line to start the Julia REPL, and install IJulia with: Pkg.add("IJulia")
.Pkg.add("PyPlot")
to install PyPlot, which is a plotting package for Julia based on Python's Matplotlib.Pkg.update()
, then retry it.ipython notebook --profile julia
(a window will open in your web browser).figure()
command. In particular, the code below used figure(figsize=(5, 3))
to indicate that the size of the plot should be 5x3 inchesmatplotlib.pyplot
to Julia. If there are naming conflicts (like the function hist()
which plots a histogram), use the full function call plt.hist()
# Import the plotting package for Julia
using PyPlot
# Declaring some global variables for multiple uses
N = 1000
sequence = [2, 4, 10, 50, 100, 500, 10000, 100000]
Qs = [0.4, 0.1, 0.25, 0.5]
ks = [2, 10, 50, 100, 500, 10000, 100000];
# Returns the number of heads in n flips
# rand() generates a random number between 0 and 1, that number is then rounded to either 0 or 1.
# By summing them up, we essentially are counting the number of heads
flip_coin(n) = sum(round(rand(n)))
flip_coin (generic function with 1 method)
heads = flip_coin(N)
figure(figsize=(5, 3))
bar([0, 1], [heads, N-heads], width=0.2, align="center")
xticks([0, 1], ["Heads", "Tails"])
title("1000 coin tosses");
We simply call flip_coin m times. The result will be a list of the number of heads in m runs
# Simulates a flip of n coins m times
flip_runs(n, m) = [flip_coin(n) for i = 1:m]
flip_runs (generic function with 1 method)
figure(figsize=(5, 3))
plt.hist(flip_runs(N, 1000))
title("Heads frequency in 1000 runs of 1000 coin tosses");
for i = sequence
is Julia's syntax for Python's for i in list
$i
is the Julia's way to do String concatenation by evaluating i
, much like Perl'sfor i = sequence
figure(figsize=(5, 3))
plt.hist(flip_runs(i, 1000))
title("Heads frequency in 1000 runs of $i coin tosses");
end
We shift the center of the horizontal axis to the origin by subtracting half the number of tosses from the number of heads
scaled_run(n, m) = [flip_coin(n) - n/2 for i = 1:m]
scaled_run (generic function with 1 method)
for i = sequence
figure(figsize=(5, 3))
plt.hist(scaled_run(i, 1000));
end
We use a fixed horizontal axis for all plots in this part
for i = sequence
figure(figsize=(5, 3))
plt.hist(scaled_run(i, 1000));
xlim(-500, 500)
end
To normalize, the leftmost point corresponds to tossing 0 heads and all tails, whereas the rightmost point corresponds to tossing all i
heads and 0 tails
for i = sequence
figure(figsize=(5, 3))
plt.hist(flip_runs(i, 1000));
xlim(0, i)
end
Comments on the three parts above:
rand()
push!
is Julia's syntax for appending an element toan array and modifying the first argument
rand_one() = rand() < 0.5 ? -1 : 1
# An n-step random walk starts at 0 by default
function path(n, walk=[0])
for i = 1:n
push!(walk, rand_one() + walk[end])
end
return walk
end
path (generic function with 2 methods)
figure(figsize=(5, 3))
title("20 random walks of 1000 coin flips")
for _ = 1:20
plot(path(1000))
end
figure(figsize=(5, 3))
title("100 normalized random walks of 1000 coin flips")
for _ = 1:100
plot([k/n for (n, k) = enumerate(path(1000))])
end
q
identity(x) = x
# runs and tosses are the number of runs and tosses respectively
# q is the threshold which we're interested in finding the fraction of heads smaller than
# func is the function we want to apply to the vertical axis values
# For this part, that function should do nothing (identity). For next part, it's the log function
function multi_toss_runs(runs, tosses, q, func)
runs_table = Dict()
for k = 1:tosses
runs_table[k] = Float64[]
end
for _ = 1:runs
heads = 0
for k = 1:tosses
heads += round(rand())[1]
push!(runs_table[k], heads)
end
end
# Computes the frequency of the fraction of heads is less than q
# Code explanation: for every run whose fraction of heads is less than q, give that run a 1, otherwise 0
# Then sum up all the values to get the number of runs that satisfies this requirement, and divides
# it by the total number of runs. Repeat for each of the k tosses
return [func(sum([heads/k <= q ? 1 : 0 for heads = runs_table[k]]) / runs) for k = 1:tosses]
end
multi_toss_runs (generic function with 1 method)
figure(figsize=(5, 3))
for q = Qs
plot(multi_toss_runs(10000, 1000, q, identity));
end
When you increase the number of tosses, it gets less and less likely that the fraction of heads will be smaller than a small threshold (like 0.4 and below)
figure(figsize=(5, 3))
for q = Qs
plot(multi_toss_runs(10000, 1000, q, log));
end
function q_curve(m, k)
res = [flip_coin(k) / k for _ = 1:m]
sort!(res)
first_quartile = res[m * 0.25]
second_quartile = res[m * 0.5]
third_quartile = res[m * 0.75]
return res, first_quartile, second_quartile, third_quartile
end
q_curve (generic function with 1 method)
figure(figsize=(5, 3))
title("Ratio of heads as a function of q")
curve, _, _, _ = q_curve(10000, 1000) # don't care about the quartile values here yet
plot(curve, 1:10000);
firsts, seconds, thirds = Float64[], Float64[], Float64[]
figure(figsize=(5, 3))
title("Ratio of heads in k tosses as a function of q")
for k = ks
curve, first_quartile, second_quartile, third_quartile = q_curve(10000, k)
plot(curve, 1:10000)
# Save the quartile values
push!(firsts, first_quartile)
push!(seconds, second_quartile)
push!(thirds, third_quartile)
end
As k increases, the quartile values becomes closer and closer to 0.5. This suggests that the heads/tails distribution tend toward 0.5/0.5
# Ignore k = 2, so we start indexing from 2. Recall that Julia is 1-indexed
function plot_quartiles()
plot(ks[2:], firsts[2:]);
plot(ks[2:], seconds[2:]);
plot(ks[2:], thirds[2:]);
end
plot_quartiles (generic function with 1 method)
figure(figsize=(5, 3))
title("q quartile markers as a function of k")
plot_quartiles();
# Computes the gap between the 0.75 marker and the 0.25 marker
gap = thirds - firsts
function plot_quartiles(plot_function)
plot_function(ks[2:], gap[2:]);
end
plot_quartiles (generic function with 2 methods)
figure(figsize=(5, 3))
title("Linear-linear plot")
plot_quartiles(plot);
figure(figsize=(5, 3))
title("Log-linear plot")
plot_quartiles(semilogx);
figure(figsize=(5, 3))
title("Linear-log plot")
plot_quartiles(semilogy);
figure(figsize=(5, 3))
title("Log-log plot")
plot_quartiles(loglog);
function q_curve_norm(m, k)
res = [(flip_coin(k)/k - 0.5) * sqrt(k) + 0.5 for _ = 1:m]
sort!(res)
return res
end
q_curve_norm (generic function with 1 method)
figure(figsize=(5, 3))
title("Scaled ratio of heads in k tosses as a function of q")
for k = ks
plot(q_curve_norm(10000, k), 1:10000)
end