#!/usr/bin/env python # coding: utf-8 # In[3]: get_ipython().run_line_magic('pylab', 'inline') import pandas as pd import numpy as np from __future__ import division import itertools import matplotlib.pyplot as plt import seaborn as sns import logging logger = logging.getLogger() # 15 Dynamic Programming # ============ # # for optimization problems: # # 1. divide-and-conquer # partition into **disjoint** subproblems, solve recursively, and then combine. # # 2. dynamic programming # subploems **overlap** # developing by four steps: # 1. Characterize the **structure** of an optimal solution. # 2. **Recursively** define the value of an optimal solution. # 3. Compute the value of an optimal solution, typically in a **bottom-up** fashion. # 4. [opt] Construct an optimal solution from computed information. # # # ### 15.1 Rod cutting # # ##### rod-cutting problem # Given: # 1. a rod of length $n$ inches. # 2. a table of prices $p_i$ for $i = 1, 2, \cdots, n$ # # Problem: # determine the __maximum revenue__ $r_n$ obtainable by cutting up the rod and selling the pieces. # # Solution: # $$r_n = max_{1 \leq i \leq n} (p_i + r_{n-i})$$ # # Implementation: # 1. recursive top-down # 2. dynamic programming # + **optimal substructure**: optimal solutions to a problem incorporate optimal solutions to related subproblems. # + uses additional memory to save computation time # + two ways: # 1. top-down with memorization # + it “remembers” what results it has computed previously. # 2. bottom-up method. # + We sort the subproblems by size and solve them in size order, smallest first. # + The bottom-up approach often has much better constant factors, since it has less overhead for procedure calls. # + **subproblems graphs**, as shown in Fig (15.4). # - A dynamic-programming approach runs in **polynomial** time when **the number of distinct subproblems** involved is **polynomial** in the input size and we can solve each such subproblem in **polynomial** time. # - namely, the running time of dynamic programming is **linear** in the number of vertices and edges. # + **Reconstructing a solution**: # We can extend the dynamic-programming approach to record not only the optimal value computed for each subproblem, but also a choice that led to the optimal value. With this information, we can readily print an optimal solution. #  # In[4]: price = np.array([1, 5, 8, 9, 10, 17, 17, 20, 24, 30]) print('length i: {}'.format(range(len(price)))) print('price p_i: {}'.format(price)) def bottom_up_cut_rod(p_, n): """Solve cut_rod problem by bottom-up dynamic programming. """ assert (len(p_) >= n), 'n should be less than the max length {}'.format(len(p)) p = np.insert(p_, 0, 0) #set price_0 = 0 r = np.zeros(n+1) for k in range(1, n+1): q = -np.inf for m in range(1, k+1): q = max(q, p[m]+r[k-m]) r[k] = q return r[-1] print('max price: {}'.format([bottom_up_cut_rod(price, n) for n in range(1, len(price)+1)])) # ### 15.2 Martrix-chain multiplication # # ##### matrix-chain multiplication problem # **given** a chain $$ of $n$ matrices, where for $i = 1, 2, \cdots, n$, matrix $A_i$ has dimension $p_{i-1} \times p_i$, # **problem**: fully parenthesize the product $A_1 A_2 \cdots A_n$ in a way that minimizes the number of scalar multiplicatons. # # **solutions**: # # 1. exhaustively checking all possible parenthesizations. $\Omega(2^n)$ # \begin{equation} # P(n) = \begin{cases} # 1 & \quad \text{if } n = 1 \\ # \sum_{k=1}^{n-1} P(k) P(n-k) & \quad \text{if } n \geq 2 \\ # \end{cases} # \end{equation} # # 2. Applying dynamic programming. $\Omega (n^3)$ # define $A_{i \dotso j} = A_i A_{i+1} \dotsm A_j$ # # 1. The structure of an optimal parenthesization. # suppose $A_{i \dotso k} A_{k \dotso j}$ is the optimal solution of $$, where $i \leq k < j$. # then $A_{i \dotso k}$ must be the optimal solution of $$, as well as $A_{k \dots j}$. # # Proof: # if existing $Cost(\hat{A}_{i \dotso k}) < Cost(A_{i \dotso k})$, # then $\hat{A}_{i \dotso k} A_{k \dotso j}$ should be the optimal solution ==> contradiction. # # 2. A recursive solution. # Let $m[i,j]$ be the minimum number of scalar multiplications needed to compute the matrix $A_{i \dotso j}$. # # \begin{equation} # m[i,j] = \begin{cases} # 0 & \quad \text{if } i = j \\ # min_{i \leq k < j} {m[i,k] + m[k+1,j] + p_{i-1} p_k p_j} & \quad \text{if } i < j # \end{cases} # \end{equation} # # 3. Computing the optimal costs. # $m[i,j]$ depends on all $m[i,k], m[k+1,j]$. # hence, the algorithm should fill in the talbe $m$ in a manner that corresponds to solving the parenthesizaiton problem on matrix chains of incresing lenghth. # # 4. Constructing an optimal solution. # construct table $s$ whose rows is $1, \dotsm, n-1$ and columns is $2, \dotsm, n$. # Each entry $s[i,j]$ records the optimal solution $k$ for $A_{i \dotso j}$. # In[5]: #todo # ### 15.3 Elements of dynamic programming # # Two keys: optimal substructure and overlapping subproblems. # # #### Optimal substructure # ##### common pattern # 1. A solution to the problem consists of making a choice. # # 2. Suppose: for a given problem, you are given the choice # # 3. Given the choice, you determine which subproblems ensue and how to best characterize the resulting space of subproblems. # # 4. By using "cut-and-paste" technique, you show that the solutions to the subproblems used within an optimal solution to the problem must themselves be optimal. # # To characterize the space of subproblems, a good rule of thumb says to try to keep the space **as simple as possible** and then **expand it as necessary**. # # # Optimal substructure **varies** across problem domains in two ways: # # 1. how many subproblems an optimal solution to the original problem use. # # 2. how many choices we have in detering which subproblem(s) to use in an optimal solution. # # # the **running time** of a dynamic-programming algorithm depends on the product of two factors: # # 1. the number of subproblems overall. # # 2. how many choices we look at for each subproblem. # # Dynamic programming often uses optimal substructure in a **bottom-up fashion**. # # The **cost** of the problem solution is usually the subproblem costs plus a cost that is directly attributable to the choice itself. # # # ##### subtleites # Be careful NOT to assume that the optimal substructure applies when it does not. # eg: Unweighted shortest path VS Unweighted longest simple path. # # subproblems are **independent**: # The solution to one subproblem does not affect the solution to another subproblem of the same problem. # in another word, **the subproblems do not share resources**. # # # #### Overlapping subproblems # # # #### Reconstructing an optimal solution # store which choice we made in each subproblem in a table. # # #### Memoization # maintains an entry in a table for the solution to each subproblem. # # In general practice, we prefer to use bottom-up rather than top-down memoized algorithm. While if some subproblems need not be solved at all, the memoized solution has the advantage. # ### 15.4 Longest common subsequence # # Steps: # # 1. Characterizing the optimal substructure: # Let $X = $ and $Y = $ be sequences, and let $Z = $ be any LCS of $X$ and $Y$. # # 1. If $x_m = y_n$, then $z_k = x_m = y_n$, and $Z_{k-1}$ is an LCS of $X_{m-1}$ and $Y_{n-1}$. # # 2. If $X_m \neq y_n$, then $z_k \neq x_m$ implies that $Z$ is an LCS of $X_{m-1}$ and $Y$. # # 3. If $x_m \neq y_n$, then $z_k \neq y_n$ implies that $Z$ is an LCS of $X$ and $Y_{n-1}$. # # 2. A recursive solution. # # \begin{equation} # c[i,j] = \begin{cases} # 0 \quad & \text{if } i = 0 \text{ or } j = 0 \\ # c[i-1,j-1] + 1 \quad & \text{if } i, j > 0 \text{ and } x_i = y_j \\ # max(c[i,j-1], c[i-1,j]) \quad & \text{if } i, j > 0 \text{ and } x_i \neq y_j # \end{cases} # \end{equation} # # 3. Computing the length of an LCS. # code. # # 4. Constructing. # $b[i,j]$ points to the table entry corresponding to the optimal subproblem solution chosen when computing $c[i,j]$. # # # ##### improving the code # 1. eliminate the $b$ table. # # 2. leave only two rows of table $c$ at a time. # In[6]: #todo # ### 15.5 Optimal binary search trees # # **Given**: # a sequence $K = $ of $n$ distinct keys in sorted order. For each $k_i$, we have a probability $p_i$ that a serch will be for $k_i$. And we also have $n+1$ "dummy keys" $$ representing values not in $K$, and the probability $q_i$ for each $d_i$. # # $$\sum_{i=1}^{n} p_i + \sum_{i=0}^{n} q_i = 1 $$ # # We want to build a search trees $T$ for $K$, and define the expected cost of a search in $T$ is: # \begin{align} # E[cost] &= \sum_{i=1}^n (depth_T(k_i) + 1) p_i + \sum_{i=0}^n (depth_T(d_i) + 1) q_i \\ # &= 1 + \sum_{i=1}^n depth_T(k_i) p_i + \sum_{i=0}^n depth_T(d_i) q_i # \end{align} # # **Problem**: # We wish the expected cost of $T$ is the smallest. # # # #### steps # 1. Optimal substructure # If an optimal binary search tree $T$ has a subtree $T'$ containing keys $k_i, \dotsc, k_j$, then this subtree $T'$ must be optimal as well for the subproblem with keys $k_i, \dotsc, k_j$ and dummy keys $d_{i-1}, \dotsc, d_j$. # # 2. A recursive solution # # define: $w[i,j] = \sum_{l=i}^{j} p_l + \sum_{l=i-1}^{j} q_l$ # # \begin{equation} # e[i,j] = \begin{cases} # q_{i-1} \quad & \text{if } j = i - 1 \\ # min_{i \leq r \leq j} \{e[i,r-1] + e[r+1,j] + w[i,j]\} \quad & \text{if } i \leq j # \end{cases} # \end{equation} # # 3. Computing # $$w[i,j] = w[i,j-1] + p_j + q_j$$ # In[7]: #todo