In [149]:
import Base.Collections: heapify!, heappush!, heappop!
using DataStructures
include("gray_calhoun.jl")
include("io_util.jl"); import IOUtil
#include("bio_util.jl"); import BioUtil
#include("graph_util.jl"); import GraphUtil

The Turnpike Problem (2i)

Consider

Suppose there is $N$ increasing integers $ A_1 < A_2 , ... < A_n$, we are given the difference of between every pair of points. The Turnpike reconstruction problem is to reconstruct the point set $A$ from the distances of the form $|A_i − A_j|$.

Consider this example

A = [0 2 4 7 10]

ΔA = [-10 -8 -7 -6 -5 -4 -3 -3 -2 -2 0 0 0 0 0 2 2 3 3 4 5 6 7 8 10]

We can oragnize the values of $\Delta A$ into a matrix $D$, where

$D_{i,j} = A_j - A_i \hspace{3em}\text{for }1 \le i, j \le N$

 1  0 2 4 7 10
 2    0 2 5  8
 3      0 3  6
 4        0  3
 5           0

    1 2 3 4  5


Note that $A + c$ for any $c$ has the same $ΔA$. So we choose $A_1 = 0$ to constraint the result. As such the top row $D_{1,j}$ is exactly equal to $A$. The matrix diagonal $D_{i,i}$ are all 0. Instead we refer to row immediate above, $D_{i-1,i}$, as the diagonal of $D$ in the context of this algorithm. Notice the diagonal $D_{i-1,i}$ are the first order difference of $A$. It has all the information needed to reconstruct $A$.

(clarify all discussion below concern only the elements in the upper right triangle above or on the diagonal)

From the definition of D, we have the identity

$D_{i,j} = D_{i,k} + D_{k,j} \hspace{2em} \text{for } i \le k \le j$

From this we can construct the chain rule

$D_{i,j} = \sum_{k=i}^{j-1} {D_{k,k+1}}$

which can be see geometically that for any element, form a triangle with this element at the upper right hand corner, then its value is equal to the sum of the diagonal elements in the triangle. Also this value is the maximum value for all elements in the triangle, i.e.

$D_{r,s} \le D_{i,j} \hspace{2em} \text{for } r \ge i \text{ and } s \le j$

The Algorithm

We know that the top right corner $D_{1,N}$ has the maximum value of $ΔA$. Starting from the anchor at the top right corner, it attempts to search for value to place on the top row toward the left, and the right column toward the bottom while checking for consistency to the construction of $D$. Once the top row and right column are filled, we can recover the whole $D$ and thus $A$. (Reminder, the top row equals to $A$).

At each step, we have assign a value either to the top row $D_{1, n..N}$ or the right column $D_{N,1..m}$. From the known value of the top row and right column, we will show than we can also derive the values in the rectangle $D_{1..m, n..N}$ (up to the diagonal) and the subset of the diagonal $D_{i-1,i}$ where $1 \le i \lt m$ or $n \le i \lt N$.

This condition holds at the first step where only the top right corner $D_{1,N}$ is known (i.e. $m=1, n=N$). Assume this hold for $m, n$, we will show that it also holds for $m+1, n$ and $m, n-1$.

In the first case, we place a value below the right column at $D_{m+1, N}$. From this it can derive another value at the diagonal

$ D_{m, m+1} = D_{m, N} - D_{m+1, N} $

We can also filled another row below the rectangle.

$ D_{m+1, k} = D_{m, k} - D_{m, m+1} \hspace{2em}\text{where } n \le k \lt N \text{ and } k > m+1 $

Thus the condition continues to hold for $m+1, n$.

In the second case, we place a value to the left of the top row at $D_{1, n-1}$. From this it can derive another value at the diagonal

$ D_{n-1, n} = D_{1, n} - D_{1, n-1} $

We can also filled another column to the left of the rectangle.

$ D_{k, n-1} = D_{k, n} - D_{n-1, n} \hspace{2em}\text{where } 1 \lt k \le m \text{ and } k < n-1 $

Thus the condition continues to hold for $m, n-1$. By induction this holds for all $m, n$ (in the valid range).

This also gives us an algorithm to construct $D$. Starting from $D_{1,N}$, at each step, we find the maximum number remain in $ΔA$. It is easy to show the maximum number can only be placed at the next location on the top row or the next location in the right column. It cannot be at any other place in the matrix. We create a search tree to attempt to put the maximum value at each place. For each derived values, we remove the corresponding value in $ΔA$. If the value is not avilable in $ΔA$, then the attempt fails. It backtracks to try the next possiblity. This algorithm ends when the diagonal is filled after $N$ (or is it N-1, N-2?) successful attempts. The maximum number of trials is $2^N$. But the search tree is expected to be pruned greatly.

In [181]:
INPUT = """\
-10 -8 -7 -6 -5 -4 -3 -3 -2 -2 0 0 0 0 0 2 2 3 3 4 5 6 7 8 10
"""
fp = IOBuffer(INPUT)
fp = open("rosalind_2i.txt");

ΔA = map(parseint, split(readline(fp)))
close(fp)

#ΔA = sort([18,14,16,12,12,14,7,10,10,11,4,5,8,7,6,2,2,3,5,2,4])
#ΔA = [ΔA, -ΔA, [0, 0, 0, 0, 0, 0, 0]]

println("ΔA=$(size(ΔA))")

#------------------------------------------------------------------------

N = int(sqrt(length(ΔA)))
@assert N^2 == length(ΔA)
@assert N > 2
@assert sum(ΔA) == 0
@assert count(x -> x== 0, ΔA) == N

positive_ΔA = filter(x -> x > 0, ΔA)
d_max = maximum(positive_ΔA)

@assert d_max < 99999
ΔA=(17956,)
In [184]:
function turnpike(positive_ΔA, N)
    
    ordered_values = sort(unique(positive_ΔA), rev=true)
    
    # return max unused value in ΔA, move ptr to next 
    function find_next_unused_value(ptr)
        # linear search for next unused, probably not most efficient
        for i in ptr:length(ordered_values)
            if counts[ordered_values[i]] > 0
                return i
            end
        end
        throw(ErrorException("no more usused value"))
    end        
        
    function try_assign(i, j, value, stack)    
        if value <= 0 || counts[value] <= 0
#            println("Fail to find $value -> $i,$j")
            return false
        end
        D[i, j] = value
        counts[value] -= 1
        push!(stack, (i, j, value))
        return true
    end        
        
    function turnpike_next(m, n, unused_ptr, place_right)
        if m == n
            # success, all diagonal values are found
            return true
        end

        stack = (Int, Int, Int)[]        
        max_value = ordered_values[unused_ptr]
#=
        println("m=$(m) n=$(n)\n$D") # $D
        @printf("\nPlace %s max_value=%d -> (%d, %d)\n", 
            place_right ? "right" : "top",
            max_value, 
            place_right ? m+1 : 1,
            place_right ? N : n-1
        )
=#
        
        if place_right

            # place left
            if m+2 >= N                
                return false    # don't place on or below diagonal
            end        
            try_assign(m+1, N, max_value, stack)

            # assign diagonal value
            d_m = D[m,N] - D[m+1, N]
#            println(d_m <= 0 ? 
#                "diagonal value = $(d_m)?" : 
#                "diagonal value = $(d_m)$(counts[d_m]>0 ? '!' : '?') -> $(m),$(m+1)")            

            if try_assign(m, m+1, d_m, stack)
                
                # assign bottom row, empty list mean true
                from_idx = max(n, m+2)
#                println("bottom row $(m+1),$(from_idx:N-1)")
                if @all [try_assign(m+1, k, D[m,k] - d_m, stack) for k in from_idx:N-1]
                
                    # next recusion step
                    unused_ptr = find_next_unused_value(unused_ptr)
                    if turnpike_next(m+1, n, unused_ptr, false) ||  turnpike_next(m+1, n, unused_ptr, true)
                        return true
                    end                        
                end
            end            
        else

            # place top
            if n-1 <= 2                
                return false    # don't place on or below diagonal
            end        
            try_assign(1, n-1, max_value, stack)

            # assign diagonal value
            d_n = D[1,n] - D[1,n-1]
#            println(d_n <= 0 ?
#                "diagonal value = $(d_n)?" : 
#                "diagonal value = $(d_n)$(counts[d_n]>0 ? '!' : '?') -> $(n-1),$(n)")            
            if try_assign(n-1, n, d_n, stack)

                # assign left column, empty list mean true
                to_idx = min(m, n-3)
#                println("left column $(2:to_idx),$(n-1)")
                if @all [try_assign(k, n-1, D[k,n] - d_n, stack) for k in 2:to_idx]
                    
                    # next recusion step
                    unused_ptr = find_next_unused_value(unused_ptr)
                    if turnpike_next(m, n-1, unused_ptr, true) ||  turnpike_next(m, n-1, unused_ptr, false)
                        return true
                    end
                end
            end                            
        end

        # failed, backtrack
        for (i,j,v) in stack
            D[i,j] = 0
            counts[v] += 1
        end            
        return false
    end
                        
    #counts = Dict(counter(positive_ΔA))
    # use sparse array for counts rather than dictionary
    d_max = maximum(positive_ΔA)
    @assert d_max < 99999
    counts = zeros(Int, d_max)
    for x in positive_ΔA
        counts[x] += 1
    end
    
    D = zeros(Int, (N,N))
    # assign d_max to top right corner
    d_max = ordered_values[1]
    D[1,N] = d_max
    counts[d_max] -= 1
    unused_ptr = 2
    
    result = turnpike_next(1, N, unused_ptr, true) || turnpike_next(1, N, unused_ptr, false)
    return result, D, counts
end

function build_A(D, N)
    [0, [cumsum([D[i,i+1] for i in 1:N-1])]]
end
Out[184]:
build_A (generic function with 1 method)
In [185]:
#result, D, counts = turnpike(positive_ΔA, N)
result, D, counts = turnpike(positive_ΔA, N)
if result
    A = build_A(D, N)
    IOUtil.write_vector("out", A)
else
    println("failed")
    D
end    
Written 134 items to out