using Plots
using Statistics
using DataFrames
using CSV
using LinearAlgebra
open("./49_Industry_Portfolios.CSV", "r") do f
global df
csv = join(readlines(f)[2268:2361], '\n')
df = CSV.read(IOBuffer(csv))
end
Column1 | Agric | Food | Soda | Beer | Smoke | Toys | Fun | Books | |
---|---|---|---|---|---|---|---|---|---|
Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | |
1 | 1927 | 26.23 | 41.88 | -99.99 | 64.22 | 47.72 | -22.67 | 15.66 | 8.97 |
2 | 1928 | 2.46 | 27.95 | -99.99 | 38.8 | -2.3 | 44.33 | 46.38 | 35.48 |
3 | 1929 | -16.27 | -14.53 | -99.99 | -16.07 | -7.17 | -57.07 | -36.28 | -4.72 |
4 | 1930 | -47.79 | -11.23 | -99.99 | -53.28 | -3.24 | -20.45 | -19.85 | -15.73 |
5 | 1931 | -48.67 | -33.53 | -99.99 | -43.07 | -20.18 | -67.55 | -74.4 | -65.88 |
6 | 1932 | 11.94 | -5.33 | -99.99 | 13.03 | -6.66 | -42.77 | -33.77 | -46.78 |
7 | 1933 | 156.93 | 41.32 | -99.99 | 189.18 | 52.58 | 87.09 | 55.6 | 53.33 |
8 | 1934 | 33.4 | 9.25 | -99.99 | -11.62 | 30.76 | -15.36 | 17.86 | 36.81 |
9 | 1935 | -7.43 | 23.01 | -99.99 | 31.13 | 18.61 | 60.95 | 78.39 | 16.5 |
10 | 1936 | 31.31 | 19.84 | -99.99 | 5.38 | 4.35 | 79.72 | 58.47 | 3.99 |
11 | 1937 | -27.26 | -25.84 | -99.99 | -28.13 | -19.34 | -60.2 | -44.79 | -66.94 |
12 | 1938 | 21.26 | 21.54 | -99.99 | 24.37 | 25.74 | 62.68 | 28.72 | 13.86 |
13 | 1939 | 36.78 | 9.04 | -99.99 | -17.21 | 2.75 | 78.23 | -35.74 | -13.77 |
14 | 1940 | -12.69 | -10.18 | -99.99 | 0.75 | -9.66 | 21.23 | -7.01 | -23.33 |
15 | 1941 | 10.26 | -3.98 | -99.99 | 14.3 | -21.27 | -34.62 | 30.44 | -30.39 |
16 | 1942 | -2.09 | 13.37 | -99.99 | 31.71 | 2.89 | 10.48 | 40.26 | 91.79 |
17 | 1943 | 17.25 | 33.45 | -99.99 | 78.32 | 23.08 | 40.22 | 54.04 | 142.0 |
18 | 1944 | 23.17 | 21.62 | -99.99 | 18.96 | 18.94 | 56.62 | 32.31 | 60.33 |
19 | 1945 | 30.18 | 37.98 | -99.99 | 159.38 | 32.46 | 40.83 | 75.37 | 106.18 |
20 | 1946 | 41.27 | -6.92 | -99.99 | -19.15 | -1.72 | -14.74 | 0.93 | -28.72 |
21 | 1947 | 12.59 | 1.79 | -99.99 | -20.63 | -4.77 | -2.31 | -29.31 | -15.65 |
22 | 1948 | 0.51 | -10.13 | -99.99 | -2.92 | -1.3 | -18.97 | -0.95 | -19.17 |
23 | 1949 | 18.42 | 28.75 | -99.99 | 41.83 | 23.78 | 25.42 | 23.7 | 16.99 |
24 | 1950 | 15.86 | 4.29 | -99.99 | 34.09 | -6.71 | 2.8 | -6.04 | 18.78 |
25 | 1951 | 14.31 | 3.88 | -99.99 | 12.25 | 0.28 | 19.13 | 17.89 | -7.56 |
26 | 1952 | -4.09 | 10.45 | -99.99 | -13.63 | 21.19 | 22.16 | -4.61 | 18.2 |
27 | 1953 | -15.28 | 8.67 | -99.99 | -0.55 | -1.2 | 8.52 | 15.71 | 10.26 |
28 | 1954 | 35.64 | 26.6 | -99.99 | 34.87 | 13.53 | 25.66 | 67.42 | 56.15 |
29 | 1955 | -0.83 | 11.01 | -99.99 | -1.25 | 26.22 | 8.52 | -3.21 | 13.94 |
30 | 1956 | -7.13 | -2.47 | -99.99 | 7.87 | 1.46 | 37.98 | -0.65 | 24.94 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
# preprocessing
inds = describe(df).min .> -99 # remove missing value
inds[1] = 0 # remove year
inds = inds .* range(1; stop=length(inds))
filter!(x -> x > 0, inds)
df_sub = df[:, inds]
Agric | Food | Beer | Smoke | Toys | Fun | Books | Hshld | Clths | |
---|---|---|---|---|---|---|---|---|---|
Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | |
1 | 26.23 | 41.88 | 64.22 | 47.72 | -22.67 | 15.66 | 8.97 | 43.03 | 43.13 |
2 | 2.46 | 27.95 | 38.8 | -2.3 | 44.33 | 46.38 | 35.48 | 59.07 | 7.74 |
3 | -16.27 | -14.53 | -16.07 | -7.17 | -57.07 | -36.28 | -4.72 | -34.99 | -28.47 |
4 | -47.79 | -11.23 | -53.28 | -3.24 | -20.45 | -19.85 | -15.73 | -42.26 | -20.63 |
5 | -48.67 | -33.53 | -43.07 | -20.18 | -67.55 | -74.4 | -65.88 | -36.51 | -24.27 |
6 | 11.94 | -5.33 | 13.03 | -6.66 | -42.77 | -33.77 | -46.78 | -22.78 | -21.67 |
7 | 156.93 | 41.32 | 189.18 | 52.58 | 87.09 | 55.6 | 53.33 | 38.4 | 93.81 |
8 | 33.4 | 9.25 | -11.62 | 30.76 | -15.36 | 17.86 | 36.81 | 18.21 | 12.55 |
9 | -7.43 | 23.01 | 31.13 | 18.61 | 60.95 | 78.39 | 16.5 | 21.34 | 16.13 |
10 | 31.31 | 19.84 | 5.38 | 4.35 | 79.72 | 58.47 | 3.99 | 24.83 | 10.17 |
11 | -27.26 | -25.84 | -28.13 | -19.34 | -60.2 | -44.79 | -66.94 | -28.24 | -37.92 |
12 | 21.26 | 21.54 | 24.37 | 25.74 | 62.68 | 28.72 | 13.86 | 32.96 | 16.51 |
13 | 36.78 | 9.04 | -17.21 | 2.75 | 78.23 | -35.74 | -13.77 | 16.87 | 19.06 |
14 | -12.69 | -10.18 | 0.75 | -9.66 | 21.23 | -7.01 | -23.33 | -9.31 | -8.75 |
15 | 10.26 | -3.98 | 14.3 | -21.27 | -34.62 | 30.44 | -30.39 | -8.08 | 3.83 |
16 | -2.09 | 13.37 | 31.71 | 2.89 | 10.48 | 40.26 | 91.79 | 8.66 | 16.8 |
17 | 17.25 | 33.45 | 78.32 | 23.08 | 40.22 | 54.04 | 142.0 | 31.02 | 30.59 |
18 | 23.17 | 21.62 | 18.96 | 18.94 | 56.62 | 32.31 | 60.33 | 17.23 | 19.51 |
19 | 30.18 | 37.98 | 159.38 | 32.46 | 40.83 | 75.37 | 106.18 | 32.47 | 37.39 |
20 | 41.27 | -6.92 | -19.15 | -1.72 | -14.74 | 0.93 | -28.72 | -3.46 | -6.61 |
21 | 12.59 | 1.79 | -20.63 | -4.77 | -2.31 | -29.31 | -15.65 | 5.82 | 11.1 |
22 | 0.51 | -10.13 | -2.92 | -1.3 | -18.97 | -0.95 | -19.17 | -3.71 | -8.16 |
23 | 18.42 | 28.75 | 41.83 | 23.78 | 25.42 | 23.7 | 16.99 | 36.77 | 24.65 |
24 | 15.86 | 4.29 | 34.09 | -6.71 | 2.8 | -6.04 | 18.78 | 26.5 | 23.37 |
25 | 14.31 | 3.88 | 12.25 | 0.28 | 19.13 | 17.89 | -7.56 | 5.14 | -2.71 |
26 | -4.09 | 10.45 | -13.63 | 21.19 | 22.16 | -4.61 | 18.2 | 7.35 | 6.52 |
27 | -15.28 | 8.67 | -0.55 | -1.2 | 8.52 | 15.71 | 10.26 | 1.8 | -1.67 |
28 | 35.64 | 26.6 | 34.87 | 13.53 | 25.66 | 67.42 | 56.15 | 52.63 | 29.35 |
29 | -0.83 | 11.01 | -1.25 | 26.22 | 8.52 | -3.21 | 13.94 | 6.9 | 13.53 |
30 | -7.13 | -2.47 | 7.87 | 1.46 | 37.98 | -0.65 | 24.94 | 3.56 | -7.97 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
mat = convert(Matrix, df_sub)
E = mean(mat; dims=1)
v = inv(cov(mat))
n = size(mat)[2]
A = 0.0
for i in 1:n
for j in 1:n
A += v[i,j]E[j]
end
end
B = 0.0
for i in 1:n
for j in 1:n
B += v[i,j]E[i]E[j]
end
end
C = 0.0
for i in 1:n
for j in 1:n
C += v[i,j]
end
end
D = B*C - A^2
function sigma(E)
sqrt((C * E^2 - 2A * E + B)/D)
end
# portfolio
scatter(sqrt.(diag(cov(mat))), E', label="portforio")
# global minimum variance portfolio
y = A/C
x = sigma(y)
scatter!([x], [y], label="global minimum variance portfolio")
# effirencent frontier
E_min = A/C
E_max = max(E...)
ys = range(E_min; stop=E_max, length=1000)
xs = [sigma(y) for y in ys]
plot!(xs, ys, label="efficient frontier")
# minimum variance frontier
E_min = 2 * A/C - max(E...)
E_max= A/C
ys = range(E_min; stop=E_max, length=1000)
xs = [sigma(y) for y in ys]
plot!(xs, ys, linestyle=:dash, xlabel="risk", ylabel="return", label="", title="Modern portfolio theory")