In [149]:
import Base.Collections: heapify!, heappush!, heappop!
using DataStructures
include("gray_calhoun.jl")
include("io_util.jl"); import IOUtil
#include("bio_util.jl"); import BioUtil
#include("graph_util.jl"); import GraphUtil


## The Turnpike Problem (2i)¶

Consider

Suppose there is $N$ increasing integers $A_1 < A_2 , ... < A_n$, we are given the difference of between every pair of points. The Turnpike reconstruction problem is to reconstruct the point set $A$ from the distances of the form $|A_i − A_j|$.

Consider this example

A = [0 2 4 7 10]

ΔA = [-10 -8 -7 -6 -5 -4 -3 -3 -2 -2 0 0 0 0 0 2 2 3 3 4 5 6 7 8 10]

We can oragnize the values of $\Delta A$ into a matrix $D$, where

$D_{i,j} = A_j - A_i \hspace{3em}\text{for }1 \le i, j \le N$

 1  0 2 4 7 10
2    0 2 5  8
3      0 3  6
4        0  3
5           0

1 2 3 4  5



Note that $A + c$ for any $c$ has the same $ΔA$. So we choose $A_1 = 0$ to constraint the result. As such the top row $D_{1,j}$ is exactly equal to $A$. The matrix diagonal $D_{i,i}$ are all 0. Instead we refer to row immediate above, $D_{i-1,i}$, as the diagonal of $D$ in the context of this algorithm. Notice the diagonal $D_{i-1,i}$ are the first order difference of $A$. It has all the information needed to reconstruct $A$.

(clarify all discussion below concern only the elements in the upper right triangle above or on the diagonal)

From the definition of D, we have the identity

$D_{i,j} = D_{i,k} + D_{k,j} \hspace{2em} \text{for } i \le k \le j$

From this we can construct the chain rule

$D_{i,j} = \sum_{k=i}^{j-1} {D_{k,k+1}}$

which can be see geometically that for any element, form a triangle with this element at the upper right hand corner, then its value is equal to the sum of the diagonal elements in the triangle. Also this value is the maximum value for all elements in the triangle, i.e.

$D_{r,s} \le D_{i,j} \hspace{2em} \text{for } r \ge i \text{ and } s \le j$

### The Algorithm¶

We know that the top right corner $D_{1,N}$ has the maximum value of $ΔA$. Starting from the anchor at the top right corner, it attempts to search for value to place on the top row toward the left, and the right column toward the bottom while checking for consistency to the construction of $D$. Once the top row and right column are filled, we can recover the whole $D$ and thus $A$. (Reminder, the top row equals to $A$).

At each step, we have assign a value either to the top row $D_{1, n..N}$ or the right column $D_{N,1..m}$. From the known value of the top row and right column, we will show than we can also derive the values in the rectangle $D_{1..m, n..N}$ (up to the diagonal) and the subset of the diagonal $D_{i-1,i}$ where $1 \le i \lt m$ or $n \le i \lt N$.

This condition holds at the first step where only the top right corner $D_{1,N}$ is known (i.e. $m=1, n=N$). Assume this hold for $m, n$, we will show that it also holds for $m+1, n$ and $m, n-1$.

In the first case, we place a value below the right column at $D_{m+1, N}$. From this it can derive another value at the diagonal

$D_{m, m+1} = D_{m, N} - D_{m+1, N}$

We can also filled another row below the rectangle.

$D_{m+1, k} = D_{m, k} - D_{m, m+1} \hspace{2em}\text{where } n \le k \lt N \text{ and } k > m+1$

Thus the condition continues to hold for $m+1, n$.

In the second case, we place a value to the left of the top row at $D_{1, n-1}$. From this it can derive another value at the diagonal

$D_{n-1, n} = D_{1, n} - D_{1, n-1}$

We can also filled another column to the left of the rectangle.

$D_{k, n-1} = D_{k, n} - D_{n-1, n} \hspace{2em}\text{where } 1 \lt k \le m \text{ and } k < n-1$

Thus the condition continues to hold for $m, n-1$. By induction this holds for all $m, n$ (in the valid range).

This also gives us an algorithm to construct $D$. Starting from $D_{1,N}$, at each step, we find the maximum number remain in $ΔA$. It is easy to show the maximum number can only be placed at the next location on the top row or the next location in the right column. It cannot be at any other place in the matrix. We create a search tree to attempt to put the maximum value at each place. For each derived values, we remove the corresponding value in $ΔA$. If the value is not avilable in $ΔA$, then the attempt fails. It backtracks to try the next possiblity. This algorithm ends when the diagonal is filled after $N$ (or is it N-1, N-2?) successful attempts. The maximum number of trials is $2^N$. But the search tree is expected to be pruned greatly.

In [181]:
INPUT = """\
-10 -8 -7 -6 -5 -4 -3 -3 -2 -2 0 0 0 0 0 2 2 3 3 4 5 6 7 8 10
"""
fp = IOBuffer(INPUT)
fp = open("rosalind_2i.txt");

close(fp)

#ΔA = sort([18,14,16,12,12,14,7,10,10,11,4,5,8,7,6,2,2,3,5,2,4])
#ΔA = [ΔA, -ΔA, [0, 0, 0, 0, 0, 0, 0]]

println("ΔA=$(size(ΔA))") #------------------------------------------------------------------------ N = int(sqrt(length(ΔA))) @assert N^2 == length(ΔA) @assert N > 2 @assert sum(ΔA) == 0 @assert count(x -> x== 0, ΔA) == N positive_ΔA = filter(x -> x > 0, ΔA) d_max = maximum(positive_ΔA) @assert d_max < 99999  ΔA=(17956,)  In [184]: function turnpike(positive_ΔA, N) ordered_values = sort(unique(positive_ΔA), rev=true) # return max unused value in ΔA, move ptr to next function find_next_unused_value(ptr) # linear search for next unused, probably not most efficient for i in ptr:length(ordered_values) if counts[ordered_values[i]] > 0 return i end end throw(ErrorException("no more usused value")) end function try_assign(i, j, value, stack) if value <= 0 || counts[value] <= 0 # println("Fail to find$value -> $i,$j")
return false
end
D[i, j] = value
counts[value] -= 1
push!(stack, (i, j, value))
return true
end

function turnpike_next(m, n, unused_ptr, place_right)
if m == n
# success, all diagonal values are found
return true
end

stack = (Int, Int, Int)[]
max_value = ordered_values[unused_ptr]
#=
println("m=$(m) n=$(n)\n$D") #$D
@printf("\nPlace %s max_value=%d -> (%d, %d)\n",
place_right ? "right" : "top",
max_value,
place_right ? m+1 : 1,
place_right ? N : n-1
)
=#

if place_right

# place left
if m+2 >= N
return false    # don't place on or below diagonal
end
try_assign(m+1, N, max_value, stack)

# assign diagonal value
d_m = D[m,N] - D[m+1, N]
#            println(d_m <= 0 ?
#                "diagonal value = $(d_m)?" : # "diagonal value =$(d_m)$(counts[d_m]>0 ? '!' : '?') ->$(m),$(m+1)") if try_assign(m, m+1, d_m, stack) # assign bottom row, empty list mean true from_idx = max(n, m+2) # println("bottom row$(m+1),$(from_idx:N-1)") if @all [try_assign(m+1, k, D[m,k] - d_m, stack) for k in from_idx:N-1] # next recusion step unused_ptr = find_next_unused_value(unused_ptr) if turnpike_next(m+1, n, unused_ptr, false) || turnpike_next(m+1, n, unused_ptr, true) return true end end end else # place top if n-1 <= 2 return false # don't place on or below diagonal end try_assign(1, n-1, max_value, stack) # assign diagonal value d_n = D[1,n] - D[1,n-1] # println(d_n <= 0 ? # "diagonal value =$(d_n)?" :
#                "diagonal value = $(d_n)$(counts[d_n]>0 ? '!' : '?') -> $(n-1),$(n)")
if try_assign(n-1, n, d_n, stack)

# assign left column, empty list mean true
to_idx = min(m, n-3)
#                println("left column $(2:to_idx),$(n-1)")
if @all [try_assign(k, n-1, D[k,n] - d_n, stack) for k in 2:to_idx]

# next recusion step
unused_ptr = find_next_unused_value(unused_ptr)
if turnpike_next(m, n-1, unused_ptr, true) ||  turnpike_next(m, n-1, unused_ptr, false)
return true
end
end
end
end

# failed, backtrack
for (i,j,v) in stack
D[i,j] = 0
counts[v] += 1
end
return false
end

#counts = Dict(counter(positive_ΔA))
# use sparse array for counts rather than dictionary
d_max = maximum(positive_ΔA)
@assert d_max < 99999
counts = zeros(Int, d_max)
for x in positive_ΔA
counts[x] += 1
end

D = zeros(Int, (N,N))
# assign d_max to top right corner
d_max = ordered_values[1]
D[1,N] = d_max
counts[d_max] -= 1
unused_ptr = 2

result = turnpike_next(1, N, unused_ptr, true) || turnpike_next(1, N, unused_ptr, false)
return result, D, counts
end

function build_A(D, N)
[0, [cumsum([D[i,i+1] for i in 1:N-1])]]
end

Out[184]:
build_A (generic function with 1 method)
In [185]:
#result, D, counts = turnpike(positive_ΔA, N)
result, D, counts = turnpike(positive_ΔA, N)
if result
A = build_A(D, N)
IOUtil.write_vector("out", A)
else
println("failed")
D
end

Written 134 items to out