# need to run notebook twice to get draw_tree and contest defined here
try
    draw_tree(contest)
catch
end

p(d) = 1/2 + 3/(2^d)

p2(p) = p^2 * (3-2p)

using Formatting
pd(p,d) = d==1 ? p2(p) : pd(p2(p),d-1)

println("Probability pd(p,d) that TPT outcome == plurality outcome.")
for d in 3:7
    println(format("d = {:d}   n = {:5d}"*
            "    p = {:8.5f}    pd = {:8.5f}", 
            d, 3^d, p(d), pd(p(d),d)))
end

using Pkg
Pkg.add("PyPlot")
Pkg.add("StatsBase")
Pkg.add("Formatting")
using PyPlot
using Formatting
using StatsBase
using SHA

#
# TPT Contest object and contained k-ary tree
#

mutable struct Contest
    n::Int         # number of cast votes (external nodes; leaves)
    ni::Int        # number of internal nodes
    nt::Int        # total number of nodes: internal+external = n+ni
    k::Int         # use k-ary tree (k=3 is default)
    tree           # array of tree nodes (length n+ni)
    seed::String   # used to break ties, permute values
end


mutable struct TreeNode
    index::Int    # index within row (starting with 1)
    audit::Bool   # true if node is to be part of audit
    audited::Bool # true if audited (tval copied to val)    
    rval::Any     # candidate chosen (as reported by scanner)
    aval::Any     # candidate chosen on paper ballot (actual value)
    val::Any      # if audited then aval, else rval
    x::Real       # coordinates of center of node
    y::Real
end

TreeNode() = TreeNode(0,false,false,0,0,0,0,0)


"Compute number of internal nodes in a k-ary tree with n leaves."
function n_internal(n, k)
    return Int(ceil((n-1)/(k-1)))
end


"Compute index of parent of node i in heap-like k-ary tree with root 1"
function parent(i, k)
    return Int(ceil((i-1)/k))
end 


"Compute index of leftmost child of node i in heap-like k-ary 
tree with root 1"
function left_child(i, k)
    return (i-1)*k + 2
end


"Compute index of rightmost child of node i in heap-like k-ary 
tree with root 1"
function right_child(i, k)
    return i*k + 1
end


"Compute depth of node i in a heap-like k-ary tree with root 1"
function depth(i, k)
    d = 0
    while i>1
        i = parent(i, k)
        d += 1
    end
    return d
end


"Make tree object for contest based on length-n arrays
rvals of reported voter choices and avals of actual choices;
val of node is set to rval, and audited of node is set false."
function make_tree!(contest, tree, rvals, avals)
    for i in 1:contest.nt
        tree[i] = TreeNode()
        tree[i].index = (i==1 || depth(i-1, contest.k) != depth(i, contest.k)) ? 1 :
                        tree[i-1].index + 1  
        tree[i].audit = false
        tree[i].audited = false 
        tree[i].rval = i<=contest.ni ? 0 : rvals[i-contest.ni] 
        tree[i].aval = i<=contest.ni ? 0 : avals[i-contest.ni]
        tree[i].val = tree[i].rval
    end
end


"Make contest object for given array of length n of voter choices,
both reported (rvals) and actual (avals).
Repeat each vote a number of times equal to 'replication_factor'."
function make_contest(rvals, avals; seed=0, k=3, replication_factor = 1)
    rvals = vcat([rvals for i=1:replication_factor]...)
    avals = vcat([avals for i=1:replication_factor]...)
    my_shuffle!(rvals, seed)
    my_shuffle!(avals, seed) # note: same shuffle order!
    n = length(rvals)
    ni = n_internal(n, k)
    nt = n + ni
    tree = Array{TreeNode}(undef,nt)
    contest = Contest(n, ni, nt, k, tree, string(seed))
    make_tree!(contest, tree, rvals, avals)
    return contest
end

#
# TPT Tabulation (winner determination)
# 

"Pseudorandom number generator (PRNG, random oracle).
Return nonnegative pseudorandom BigInt based on given 
arbitrary list of seed(s) and parameters."
function PRNG(seeds...)
    seeds = [string(seed) for seed in seeds]
    seed = join(seeds, ":")
    return parse(BigInt, bytes2hex(sha256(seed)), base=16)
end    


"Return pseudorandom element of array L, using given PRNG seed."
function pick_random(L, seed)
    length(L) == 1 && return L[1]
    r = PRNG("pick_random", seed)
    return L[mod1(r, length(L))]
end


"Shuffle array L in place, using given PRNG seed.
Same as Random version, except using our PRNG.
Uses Fisher-Yates (Knuth) algorithm."
function my_shuffle!(L, seed)
    for i in 1:length(L)
        r = mod1(PRNG(seed+i), i)
        L[i], L[r] = L[r], L[i]    # swap!
    end
end


"Return true iff a==b, even if one or both are missing."
function my_match(a, b)
    if ismissing(a)
        return ismissing(b)
    elseif ismissing(b)
        return false
    else
        return a==b
    end
end


"Return list of all plurality winners of given list of choices.
Note that 'missing' may be present but can't win and won't be
in the returned list if there are other choices voted for.  
Winners are returned in order they first occur, to maintain 
neutrality of winner function."
function plurality_winners(vals)
    if length(vals) == 1
        return vals
    elseif length(vals) == 2
        ismissing(vals[1]) && ismissing(vals[2]) && return [missing]
        ismissing(vals[1]) && return [vals[2]]
        ismissing(vals[2]) && return [vals[1]]
        vals[1] != vals[2] && return vals
        return [vals[1]]
    elseif length(vals) == 3
        v1, v2, v3 = vals[1:3]
        ismissing(v1) && ismissing(v2) && ismissing(v3) && return [missing]
        ismissing(v1) && ismissing(v2) && return [v3]
        ismissing(v1) && ismissing(v3) && return [v2]
        ismissing(v2) && ismissing(v3) && return [v1]
        ismissing(v1) && return [v2, v3]
        ismissing(v2) && return [v1, v3]
        ismissing(v3) && return [v1, v2]
        (v1 == v2 || v1 == v3) && return [v1]
        v2 == v3 && return [v2]
        return vals
    end
    # else do general case (which happens only if k>3)
    tally = Dict()
    uniqs = []   # choices in vals, in order of first occurrence
    for c in vals
        tally[c] = 1 + get(tally, c, 0)
        if tally[c]==1
            append!(uniqs, 0) # append c this way in case c == missing
            uniqs[end] = c
        end
    end
    tally[missing] = 0  # reset tally[missing] to 0 so it can't win unless it is only one
    max_count = maximum(values(tally))
    winners = [c for c in uniqs if tally[c]==max_count]                   
    return winners
end
   
            
"Return single plurality winner of given list of choices, using seed to break ties."
function plurality_winner(vals, seed)
    return pick_random(plurality_winners(vals), seed)
end    

            
"Compute winner for node i, from winners of its children"
function winner(contest, i)
    left = left_child(i, contest.k)
    right = min(right_child(i, contest.k), contest.nt)
    vals = [contest.tree[t].val for t in left:right]                    
    return plurality_winner(vals, contest.seed*string(i))
end                

            
"Compute winner (val) for all internal nodes, bottom-up."            
function tabulate!(contest)
    for i in contest.ni:-1:1
        contest.tree[i].val = winner(contest, i)
    end
end

#
# TPT Audit
#

"Mark node i's children for audit as appropriate, based on children's values.
Mark only two out of three children, if they form majority.
Else mark all children for audit."
function mark_is_children_for_audit!(contest, i)
    c1 = left_child(i, contest.k)
    ck = min(right_child(i, contest.k), contest.nt)
    n_children = ck - c1 + 1 
    for j in c1:ck
        contest.tree[j].audit = false
    end
    if contest.tree[i].audit == false
        return
    end
    n_hits = 0
    for j in c1:ck
        if my_match(contest.tree[j].val, contest.tree[i].val)
            n_hits += 1
        end
    end               
    if n_hits <= n_children/2  # no majority winner exists, mark all children
        for j in c1:ck
            contest.tree[j].audit = true
        end
        else      # majority winner exists among children, only mark majority
        n_hits = 0
        for j in c1:ck
            if my_match(contest.tree[j].val, contest.tree[i].val)
                n_hits += 1
                if n_hits <= Int(ceil((n_children+1)/2))
                    contest.tree[j].audit = true
                end
            end
        end 
    end
end
                        
                        
"Mark nodes (leaves and internal nodes) in contest tree that 
should be audited."            
function mark_audit!(contest)
    contest.tree[1].audit = true
    for i in 1:contest.ni
        mark_is_children_for_audit!(contest, i)                                
    end
end
            

"Return index of leaf to audit next, or 0 if no such leaf exists.
Start checking at position i+1."
function next_leaf_to_audit(contest, i)
    for j in i+1:contest.nt
        if contest.tree[j].audit==true && 
            contest.tree[j].audited==false
            return j
        end
    end
    return 0                
end

            
"Audit contest, and return number of ballots manually audited and
number of discrepancies found.
(This function could be implemented substantially more efficiently,
if desired.)"
function audit!(contest)
    n_audited = 0
    n_discrepancies = 0
    i = contest.ni    # index of "last leaf audited"
    while true
        n_audited += 1
        i = next_leaf_to_audit(contest, i)
        if i==0
            print("    Done auditing. $n_audited ballots audited.") 
            println(" $n_discrepancies discrepancies found.")
            return n_audited, n_discrepancies
        end
        # IRL hand-to-eye audit of ballot i would happen here
        # but we just simulate it using aval to give "actual value"
        contest.tree[i].audited = true
        # following code may be inefficient if ballots are replicated
        # since IRL a ballot is audited at most once
        # contest.tree[i].aval is "value returned by audit of ballot i"
        if contest.tree[i].val == contest.tree[i].aval
            # audit just confirmed previously reported value
            continue
        end
        n_discrepancies += 1
        print("    Audit ballot $(i-contest.ni).")
        println(" Reported $(contest.tree[i].rval)." *
                " Actual $(contest.tree[i].aval).")
        contest.tree[i].val = contest.tree[i].aval   # new value!
        j = parent(i, contest.k)
        while j != 0
            contest.tree[j].val = winner(contest, j)
            j = parent(j, contest.k)
        end
        mark_audit!(contest)   # this could be redone to be more efficient!
        i=contest.ni           # reset search for next leaf to audit
    end
end

#
# Test audit performance on mock contest
#

"Return rvals and avals for mock election, given
list of (rvote, avote, count) triples."
function make_mock_contest(triples)
    rvals = []
    avals = []
    for (rvote, avote, count) in triples
        append!(rvals, [rvote for _ in 1:count])
        append!(avals, [avote for _ in 1:count])
    end
    return rvals, avals
end


"Test out TPT audit on plausible votes with discrepancies"
function test_audit(triples; seed)
    rvals, avals = make_mock_contest(triples)
    contest = make_contest(rvals, avals; seed=seed)
    tabulate!(contest)
    println("    Reported winner is: ", contest.tree[1].val)
    mark_audit!(contest)
    audit!(contest)
    println("    Audited (correct) winner is: ", contest.tree[1].val)
end

println("test audit: 1M votes, 3% margin, no interpretation errors")
triples = [('A', 'A', 515000), 
           ('B', 'B', 485000)]
@time test_audit(triples, seed=1)
println()
println("test audit: 1M votes, 3% margin, 0.1% interpretation error rate")
triples = [('A', 'A', 514485), 
           ('A', 'B', 515), 
           ('B', 'A', 485), 
           ('B', 'B', 484515)]
@time test_audit(triples, seed=1)

#
# Draw TPT tree
#

"Draw the TPT tree for a contest using PyPlot"            
function draw_tree(contest)
    pygui(false)       # draw in notebook instead of separate window
    fig, ax = PyPlot.subplots(figsize=(5,4))
    ax.axis("off")
    PyPlot.tight_layout()
    xlim(-5, 10 * contest.k ^ depth(contest.nt, contest.k))
    ylim(-5, 20 * depth(contest.nt, contest.k)+10)
    ax.set_aspect("equal")
    tree = contest.tree
    fontsize = 100 / contest.k ^ depth(contest.nt, contest.k)
    for i in 1:contest.nt
        tree[i].y = (depth(contest.nt, contest.k)-depth(i, contest.k)) * 20
        tree[i].x = (tree[i].index-0.5) * 10 * 
                    contest.k^(depth(contest.nt,contest.k)-depth(i, contest.k))  
        linewidth = tree[i].audit==1 ? 2 : 0.3
        if i>contest.ni
            # leaf (square)
            rect = matplotlib.patches.Rectangle(
            (tree[i].x, tree[i].y),5,5,
            linewidth=linewidth,edgecolor="k",facecolor="w")
            ax.add_patch(rect)
            ax.text(tree[i].x+2.5, tree[i].y+2.5,
                string(tree[i].val),
                fontsize=fontsize,
                horizontalalignment="center",
                verticalalignment="center")
            ax.text(tree[i].x+2.5, tree[i].y-3,
                i-contest.ni,
                fontsize=fontsize,
                horizontalalignment="center",
                verticalalignment="center")
        else
            # internal node (circle)
            circ = matplotlib.patches.Circle(
                (tree[i].x+2.5, tree[i].y+2.5),
                linewidth=linewidth,
                radius = 2.5, facecolor="w", edgecolor="k")
            ax.add_patch(circ)
            ax.text(tree[i].x+2.5, tree[i].y+2.5,
                string(tree[i].val),
                fontsize=fontsize,
                horizontalalignment="center",
                verticalalignment="center")
        end
        if i>1 
            # draw edge from parent
            xs = (tree[i].x+2.5, tree[parent(i, contest.k)].x+2.5)
            ys = (tree[i].y+2.5, tree[parent(i, contest.k)].y+2.5)
            linewidth = tree[i].audit ? 3.5 : 1
            # note use of zorder in next line to hide segment ends
            ax.plot(xs, ys, marker = "o", 
                color="k", linewidth=linewidth, zorder=0)
        end
    end
end

            
vals = collect("ABABABBBB")                           
contest = make_contest(vals, vals, seed=8)
tabulate!(contest)
mark_audit!(contest)
draw_tree(contest)
savefig("basic_fig.svg")

#
# Compute probability that TPT winner is plurality winner
#

"Return list of all length-k combinations of elements from 1:m"
# combinations(4, 3) 
# ==> [(1,1,1), (1,1,2), ..., (4,4,4)]  (64 elements)
function combinations(m, k)
    reverse.(Iterators.product(fill(1:m,k)...))[:]
end


"Compute probability of each outcome through one k-ary plurality node"
function prob_outcomes(pi, k=3)
    # pi = [...] giving input probability of candidates 1...m
    # k = arity of node (number of children)
    m = length(pi)
    comb = combinations(m, k)
    po = fill(0.0, m)
    for c in comb
        pcomb = prod(pi[collect(c)])  # probability of this input combination
        winners = plurality_winners(collect(c))
        for w in winners
            po[w] += pcomb / length(winners)
        end
    end
    return po
end


function California()
    println("California 2016 Presidential Election")
    println("Probability of a node at given height having given winner")
    println("  Height   Clinton        Trump    Other")
    p = [0.6173, 0.3162, 0.0665]  # California: Clinton / Trump / Other  from 2016
    for i in 0:15
        printfmt("    {:6s}  {:10.7f} {:10.7f} {:10.7f}\n", i, p[1], p[2], p[3])
        p = prob_outcomes(p)
    end
    println()
end

California()


function Michigan()
    println("Michigan 2016 Presidential Election")
    println("Probability of a node at given height having given winner")
    println("  Height     Trump      Clinton    Other")
    p = [0.4750, 0.4727, 0.0523]  # Michigan: Trump / Clinton / Other  from 2016
    for i in 0:19
        printfmt("    {:6s}  {:10.7f} {:10.7f} {:10.7f}\n", i, p[1], p[2], p[3])
        p = prob_outcomes(p)
    end
    println()
end

Michigan()