# need to run notebook twice to get draw_tree and contest defined here try draw_tree(contest) catch end p(d) = 1/2 + 3/(2^d) p2(p) = p^2 * (3-2p) using Formatting pd(p,d) = d==1 ? p2(p) : pd(p2(p),d-1) println("Probability pd(p,d) that TPT outcome == plurality outcome.") for d in 3:7 println(format("d = {:d} n = {:5d}"* " p = {:8.5f} pd = {:8.5f}", d, 3^d, p(d), pd(p(d),d))) end using Pkg Pkg.add("PyPlot") Pkg.add("StatsBase") Pkg.add("Formatting") using PyPlot using Formatting using StatsBase using SHA # # TPT Contest object and contained k-ary tree # mutable struct Contest n::Int # number of cast votes (external nodes; leaves) ni::Int # number of internal nodes nt::Int # total number of nodes: internal+external = n+ni k::Int # use k-ary tree (k=3 is default) tree # array of tree nodes (length n+ni) seed::String # used to break ties, permute values end mutable struct TreeNode index::Int # index within row (starting with 1) audit::Bool # true if node is to be part of audit audited::Bool # true if audited (tval copied to val) rval::Any # candidate chosen (as reported by scanner) aval::Any # candidate chosen on paper ballot (actual value) val::Any # if audited then aval, else rval x::Real # coordinates of center of node y::Real end TreeNode() = TreeNode(0,false,false,0,0,0,0,0) "Compute number of internal nodes in a k-ary tree with n leaves." function n_internal(n, k) return Int(ceil((n-1)/(k-1))) end "Compute index of parent of node i in heap-like k-ary tree with root 1" function parent(i, k) return Int(ceil((i-1)/k)) end "Compute index of leftmost child of node i in heap-like k-ary tree with root 1" function left_child(i, k) return (i-1)*k + 2 end "Compute index of rightmost child of node i in heap-like k-ary tree with root 1" function right_child(i, k) return i*k + 1 end "Compute depth of node i in a heap-like k-ary tree with root 1" function depth(i, k) d = 0 while i>1 i = parent(i, k) d += 1 end return d end "Make tree object for contest based on length-n arrays rvals of reported voter choices and avals of actual choices; val of node is set to rval, and audited of node is set false." function make_tree!(contest, tree, rvals, avals) for i in 1:contest.nt tree[i] = TreeNode() tree[i].index = (i==1 || depth(i-1, contest.k) != depth(i, contest.k)) ? 1 : tree[i-1].index + 1 tree[i].audit = false tree[i].audited = false tree[i].rval = i<=contest.ni ? 0 : rvals[i-contest.ni] tree[i].aval = i<=contest.ni ? 0 : avals[i-contest.ni] tree[i].val = tree[i].rval end end "Make contest object for given array of length n of voter choices, both reported (rvals) and actual (avals). Repeat each vote a number of times equal to 'replication_factor'." function make_contest(rvals, avals; seed=0, k=3, replication_factor = 1) rvals = vcat([rvals for i=1:replication_factor]...) avals = vcat([avals for i=1:replication_factor]...) my_shuffle!(rvals, seed) my_shuffle!(avals, seed) # note: same shuffle order! n = length(rvals) ni = n_internal(n, k) nt = n + ni tree = Array{TreeNode}(undef,nt) contest = Contest(n, ni, nt, k, tree, string(seed)) make_tree!(contest, tree, rvals, avals) return contest end # # TPT Tabulation (winner determination) # "Pseudorandom number generator (PRNG, random oracle). Return nonnegative pseudorandom BigInt based on given arbitrary list of seed(s) and parameters." function PRNG(seeds...) seeds = [string(seed) for seed in seeds] seed = join(seeds, ":") return parse(BigInt, bytes2hex(sha256(seed)), base=16) end "Return pseudorandom element of array L, using given PRNG seed." function pick_random(L, seed) length(L) == 1 && return L[1] r = PRNG("pick_random", seed) return L[mod1(r, length(L))] end "Shuffle array L in place, using given PRNG seed. Same as Random version, except using our PRNG. Uses Fisher-Yates (Knuth) algorithm." function my_shuffle!(L, seed) for i in 1:length(L) r = mod1(PRNG(seed+i), i) L[i], L[r] = L[r], L[i] # swap! end end "Return true iff a==b, even if one or both are missing." function my_match(a, b) if ismissing(a) return ismissing(b) elseif ismissing(b) return false else return a==b end end "Return list of all plurality winners of given list of choices. Note that 'missing' may be present but can't win and won't be in the returned list if there are other choices voted for. Winners are returned in order they first occur, to maintain neutrality of winner function." function plurality_winners(vals) if length(vals) == 1 return vals elseif length(vals) == 2 ismissing(vals[1]) && ismissing(vals[2]) && return [missing] ismissing(vals[1]) && return [vals[2]] ismissing(vals[2]) && return [vals[1]] vals[1] != vals[2] && return vals return [vals[1]] elseif length(vals) == 3 v1, v2, v3 = vals[1:3] ismissing(v1) && ismissing(v2) && ismissing(v3) && return [missing] ismissing(v1) && ismissing(v2) && return [v3] ismissing(v1) && ismissing(v3) && return [v2] ismissing(v2) && ismissing(v3) && return [v1] ismissing(v1) && return [v2, v3] ismissing(v2) && return [v1, v3] ismissing(v3) && return [v1, v2] (v1 == v2 || v1 == v3) && return [v1] v2 == v3 && return [v2] return vals end # else do general case (which happens only if k>3) tally = Dict() uniqs = [] # choices in vals, in order of first occurrence for c in vals tally[c] = 1 + get(tally, c, 0) if tally[c]==1 append!(uniqs, 0) # append c this way in case c == missing uniqs[end] = c end end tally[missing] = 0 # reset tally[missing] to 0 so it can't win unless it is only one max_count = maximum(values(tally)) winners = [c for c in uniqs if tally[c]==max_count] return winners end "Return single plurality winner of given list of choices, using seed to break ties." function plurality_winner(vals, seed) return pick_random(plurality_winners(vals), seed) end "Compute winner for node i, from winners of its children" function winner(contest, i) left = left_child(i, contest.k) right = min(right_child(i, contest.k), contest.nt) vals = [contest.tree[t].val for t in left:right] return plurality_winner(vals, contest.seed*string(i)) end "Compute winner (val) for all internal nodes, bottom-up." function tabulate!(contest) for i in contest.ni:-1:1 contest.tree[i].val = winner(contest, i) end end # # TPT Audit # "Mark node i's children for audit as appropriate, based on children's values. Mark only two out of three children, if they form majority. Else mark all children for audit." function mark_is_children_for_audit!(contest, i) c1 = left_child(i, contest.k) ck = min(right_child(i, contest.k), contest.nt) n_children = ck - c1 + 1 for j in c1:ck contest.tree[j].audit = false end if contest.tree[i].audit == false return end n_hits = 0 for j in c1:ck if my_match(contest.tree[j].val, contest.tree[i].val) n_hits += 1 end end if n_hits <= n_children/2 # no majority winner exists, mark all children for j in c1:ck contest.tree[j].audit = true end else # majority winner exists among children, only mark majority n_hits = 0 for j in c1:ck if my_match(contest.tree[j].val, contest.tree[i].val) n_hits += 1 if n_hits <= Int(ceil((n_children+1)/2)) contest.tree[j].audit = true end end end end end "Mark nodes (leaves and internal nodes) in contest tree that should be audited." function mark_audit!(contest) contest.tree[1].audit = true for i in 1:contest.ni mark_is_children_for_audit!(contest, i) end end "Return index of leaf to audit next, or 0 if no such leaf exists. Start checking at position i+1." function next_leaf_to_audit(contest, i) for j in i+1:contest.nt if contest.tree[j].audit==true && contest.tree[j].audited==false return j end end return 0 end "Audit contest, and return number of ballots manually audited and number of discrepancies found. (This function could be implemented substantially more efficiently, if desired.)" function audit!(contest) n_audited = 0 n_discrepancies = 0 i = contest.ni # index of "last leaf audited" while true n_audited += 1 i = next_leaf_to_audit(contest, i) if i==0 print(" Done auditing. $n_audited ballots audited.") println(" $n_discrepancies discrepancies found.") return n_audited, n_discrepancies end # IRL hand-to-eye audit of ballot i would happen here # but we just simulate it using aval to give "actual value" contest.tree[i].audited = true # following code may be inefficient if ballots are replicated # since IRL a ballot is audited at most once # contest.tree[i].aval is "value returned by audit of ballot i" if contest.tree[i].val == contest.tree[i].aval # audit just confirmed previously reported value continue end n_discrepancies += 1 print(" Audit ballot $(i-contest.ni).") println(" Reported $(contest.tree[i].rval)." * " Actual $(contest.tree[i].aval).") contest.tree[i].val = contest.tree[i].aval # new value! j = parent(i, contest.k) while j != 0 contest.tree[j].val = winner(contest, j) j = parent(j, contest.k) end mark_audit!(contest) # this could be redone to be more efficient! i=contest.ni # reset search for next leaf to audit end end # # Test audit performance on mock contest # "Return rvals and avals for mock election, given list of (rvote, avote, count) triples." function make_mock_contest(triples) rvals = [] avals = [] for (rvote, avote, count) in triples append!(rvals, [rvote for _ in 1:count]) append!(avals, [avote for _ in 1:count]) end return rvals, avals end "Test out TPT audit on plausible votes with discrepancies" function test_audit(triples; seed) rvals, avals = make_mock_contest(triples) contest = make_contest(rvals, avals; seed=seed) tabulate!(contest) println(" Reported winner is: ", contest.tree[1].val) mark_audit!(contest) audit!(contest) println(" Audited (correct) winner is: ", contest.tree[1].val) end println("test audit: 1M votes, 3% margin, no interpretation errors") triples = [('A', 'A', 515000), ('B', 'B', 485000)] @time test_audit(triples, seed=1) println() println("test audit: 1M votes, 3% margin, 0.1% interpretation error rate") triples = [('A', 'A', 514485), ('A', 'B', 515), ('B', 'A', 485), ('B', 'B', 484515)] @time test_audit(triples, seed=1) # # Draw TPT tree # "Draw the TPT tree for a contest using PyPlot" function draw_tree(contest) pygui(false) # draw in notebook instead of separate window fig, ax = PyPlot.subplots(figsize=(5,4)) ax.axis("off") PyPlot.tight_layout() xlim(-5, 10 * contest.k ^ depth(contest.nt, contest.k)) ylim(-5, 20 * depth(contest.nt, contest.k)+10) ax.set_aspect("equal") tree = contest.tree fontsize = 100 / contest.k ^ depth(contest.nt, contest.k) for i in 1:contest.nt tree[i].y = (depth(contest.nt, contest.k)-depth(i, contest.k)) * 20 tree[i].x = (tree[i].index-0.5) * 10 * contest.k^(depth(contest.nt,contest.k)-depth(i, contest.k)) linewidth = tree[i].audit==1 ? 2 : 0.3 if i>contest.ni # leaf (square) rect = matplotlib.patches.Rectangle( (tree[i].x, tree[i].y),5,5, linewidth=linewidth,edgecolor="k",facecolor="w") ax.add_patch(rect) ax.text(tree[i].x+2.5, tree[i].y+2.5, string(tree[i].val), fontsize=fontsize, horizontalalignment="center", verticalalignment="center") ax.text(tree[i].x+2.5, tree[i].y-3, i-contest.ni, fontsize=fontsize, horizontalalignment="center", verticalalignment="center") else # internal node (circle) circ = matplotlib.patches.Circle( (tree[i].x+2.5, tree[i].y+2.5), linewidth=linewidth, radius = 2.5, facecolor="w", edgecolor="k") ax.add_patch(circ) ax.text(tree[i].x+2.5, tree[i].y+2.5, string(tree[i].val), fontsize=fontsize, horizontalalignment="center", verticalalignment="center") end if i>1 # draw edge from parent xs = (tree[i].x+2.5, tree[parent(i, contest.k)].x+2.5) ys = (tree[i].y+2.5, tree[parent(i, contest.k)].y+2.5) linewidth = tree[i].audit ? 3.5 : 1 # note use of zorder in next line to hide segment ends ax.plot(xs, ys, marker = "o", color="k", linewidth=linewidth, zorder=0) end end end vals = collect("ABABABBBB") contest = make_contest(vals, vals, seed=8) tabulate!(contest) mark_audit!(contest) draw_tree(contest) savefig("basic_fig.svg") # # Compute probability that TPT winner is plurality winner # "Return list of all length-k combinations of elements from 1:m" # combinations(4, 3) # ==> [(1,1,1), (1,1,2), ..., (4,4,4)] (64 elements) function combinations(m, k) reverse.(Iterators.product(fill(1:m,k)...))[:] end "Compute probability of each outcome through one k-ary plurality node" function prob_outcomes(pi, k=3) # pi = [...] giving input probability of candidates 1...m # k = arity of node (number of children) m = length(pi) comb = combinations(m, k) po = fill(0.0, m) for c in comb pcomb = prod(pi[collect(c)]) # probability of this input combination winners = plurality_winners(collect(c)) for w in winners po[w] += pcomb / length(winners) end end return po end function California() println("California 2016 Presidential Election") println("Probability of a node at given height having given winner") println(" Height Clinton Trump Other") p = [0.6173, 0.3162, 0.0665] # California: Clinton / Trump / Other from 2016 for i in 0:15 printfmt(" {:6s} {:10.7f} {:10.7f} {:10.7f}\n", i, p[1], p[2], p[3]) p = prob_outcomes(p) end println() end California() function Michigan() println("Michigan 2016 Presidential Election") println("Probability of a node at given height having given winner") println(" Height Trump Clinton Other") p = [0.4750, 0.4727, 0.0523] # Michigan: Trump / Clinton / Other from 2016 for i in 0:19 printfmt(" {:6s} {:10.7f} {:10.7f} {:10.7f}\n", i, p[1], p[2], p[3]) p = prob_outcomes(p) end println() end Michigan()