# update the local copy of METADATA Pkg.update() # install a registered package Pkg.add("DataFrames") # install any other package #Pkg.clone("https://github.com/leethargo/PipeLayout.jl") # checkout a branch of a package (default: master) #Pkg.checkout("PipeLayout") # list installed packages with versions Pkg.status() ;head dowjones2016.csv ?readcsv data = readcsv("dowjones2016.csv") data[1:5,:] using DataFrames df = readtable("dowjones2016.csv") df[1:4, :] df[:price] avg = by(df, :symbol, d -> DataFrame(avgprice = mean(d[:price]))) avg[1:4, :] weights = DataFrame(symbol = avg[:symbol], weight = avg[:avgprice] / sum(avg[:avgprice])) weights[1:4, :] # original dataframe df[1:4, :] # two-way table with symbols as columns # rows columns data prices = unstack(df, :date, :symbol, :price) prices[1:4, 1:4] joined = join(df, weights, on=:symbol) joined[1:4, :] joined[:contribution] = joined[:weight] .* joined[:price] joined[1:4, :] index = by(joined, :date, d -> DataFrame(value = sum(d[:contribution]))) index[1:4, :] using Plots # general plotting pyplot() # backend, based on Python's matplotlib x = cumsum(randn(10, 3)) plot(x) plot(x') plot(x, color=[:red :green]) plot!(x + 3, color=:black, alpha=0.5) using StatPlots # for DataFrames integration with(grid=false, legend=false, xticks=false, ylim=(0,300)) do plot(df, :date, :price, group=:symbol, color=:grey, alpha=0.4) plot!(index, :date, :value, linewidth=2) end bar(weights, :symbol, :weight, xrotation=50, color=:weight, grid=false) using JuMP # modeling using Cbc # solver backend # preparing data for indexing syms = [Symbol(s) for s in weights[:symbol]] days = 1:length(prices[:date]) @show size(syms) size(days); function find_fund(maxstocks; timelimit=10.0, gaplimit=0.01, lastday=200) days = 1:lastday fund = Model(solver=CbcSolver(seconds=timelimit, ratioGap=gaplimit)) # decisions @variable(fund, pick[syms], Bin) # is stock included? @variable(fund, weight[syms] ≥ 0) # what part of the portfolio # auxiliary variables @variable(fund, Δ⁺[days] ≥ 0) # positive slack @variable(fund, Δ⁻[days] ≥ 0) # negative slack # fit to Dow Jones index for d in days @constraint(fund, sum(prices[d,s] * weight[s] for s in syms) == index[d, :value] + Δ⁺[d] - Δ⁻[d]) end # can only use stock if picked for s in syms @constraint(fund, weight[s] ≤ pick[s]) end # few stocks allowed @constraint(fund, sum(pick[s] for s in syms) ≤ maxstocks) # minimize the absolute violation (L1 norm) @objective(fund, :Min, sum(Δ⁺[d] + Δ⁻[d] for d in days)) status = solve(fund) @show status getvalue(weight) end trainingdays = 100 sol = find_fund(3, timelimit=6, lastday=trainingdays) solfund = sum(sol[s] * prices[:, s] for s in syms); with(xticks=[0, trainingdays, length(days)], yticks=[]) do plot(index, :date, :value, label="Dow Jones") plot!(solfund, label="Index Fund") end errors = abs.(index[:value] - solfund) with(bins=20) do histogram(errors[trainingdays:252], label="later", color=:red) histogram!(errors[1:trainingdays], alpha=0.8, label="training", color=:green) end