Notebook

This notebook contains a few canonically dynamic programming problems and demonstrates that they can be solved recursively.

Most Computer Science programs teach dynamic programming. You figure out your problem and you determine a recurrance relation for that problem which is usually a recursive formula. Then you take that recursive formula and turn it into this disgusting collection of nested loops that iterative populate this data structure until eventually you've calculated the answer in a bottom-up fashion.

Most Computer Science programs also teach recursion. You figure out your problem and you determine a recurrance relation for that problem which is usually a recursive formula. Then you take that recursive formula and write it in code. Then your teacher has you try to calculate the 100th fibinocci number and it takes forever. Then the teacher goes on to tell you that recursion is evil and that you should always use dynamic programming instead.

What your Computer Science program probably didn't tell you is that if you just cache the result of that recursive formula then you wouldn't have to wait so long to figure out the 100th fibinocci number.

Edit Distance¶

In [1]:

MISMATCH_COST = 2
GAP_COST = 1

In [2]:

"""
Edit distance using recursive approach
"""

import functools

@functools.lru_cache(maxsize=None)
def get_cost(a, b):
    """
    get_cost is memoized - prior computations are cached
    """
    if len(a) == 0:
        return len(b) * GAP_COST
    if len(b) == 0:
        return len(a) * GAP_COST
    
    if a[0] == b[0]:
        return get_cost(a[1:], b[1:])
    else:
        return min(
            get_cost(a[1:], b[1:]) + MISMATCH_COST,
            get_cost(a, b[1:]) + GAP_COST,
            get_cost(a[1:], b) + GAP_COST
        )
    
def print_edit_join(a, b):
    if not a and not b:
        pass
    elif not a:
        print("{}  {}".format("-", b[0]))
        print_edit_join(a, b[1:])
    elif not b:
        print("{}  {}".format(a[0], "-"))
        print_edit_join(a[1:], b)
    elif a[0] == b[0]:
        print("{}  {}".format(a[0], b[0]))
        print_edit_join(a[1:], b[1:])
    else:
        mismatch_cost = get_cost(a[1:], b[1:]) + MISMATCH_COST
        a_gap_cost = get_cost(a, b[1:]) + GAP_COST
        b_gap_cost = get_cost(a[1:], b) + GAP_COST
        
        if mismatch_cost < a_gap_cost and mismatch_cost < b_gap_cost:
            print("{}  {}".format(a[0], b[0]))
            print_edit_join(a[1:], b[1:])
        elif a_gap_cost < b_gap_cost:
            print("{}  {}".format("-", b[0]))
            print_edit_join(a, b[1:])
        else:
            print("{}  {}".format(a[0], "-"))
            print_edit_join(a[1:], b)

print_edit_join("corect", "correct")

c  c
o  o
r  r
-  r
e  e
c  c
t  t

In [3]:

"""
Classig Edit distance algorithm
"""

def dp_edit_distance(str_a, str_b):
    subsol = [[None for _ in range(len(str_b) + 1)] for _ in range(len(str_a) + 1)]
    
    """
    Populate the first row and col (represents the case where there
    is absolutely no commonality between the two strings)
    """
    for i in range(len(str_b) + 1):
        subsol[0][i] = i * GAP_COST
    for i in range(len(str_a) + 1):
        subsol[i][0] = i * GAP_COST
    
    for a_idx in range(len(str_a)):
        for b_idx in range(len(str_b)):
            if str_a[a_idx] == str_b[b_idx]:
                match_cost = subsol[a_idx][b_idx]
            else:
                match_cost = subsol[a_idx][b_idx] + MISMATCH_COST
                
            a_gap_cost = subsol[a_idx][b_idx + 1] + GAP_COST
            b_gap_cost = subsol[a_idx + 1][b_idx] + GAP_COST
            
            subsol[a_idx + 1][b_idx + 1] = min(match_cost, a_gap_cost, b_gap_cost)
                
    a_idx = len(str_a)
    b_idx = len(str_b)
    fmtstring = "{a_char}  {b_char}"
    print(fmtstring.format(a_char="A", b_char="B"))
    print("-" * len(fmtstring.format(a_char="A", b_char="B")))
    while a_idx > 0 or b_idx > 0:
        if a_idx == 0:
            b_idx -= 1
            print(fmtstring.format(a_char="-", b_char=str_b[b_idx]))
        elif b_idx == 0:
            a_idx -= 1
            print(fmtstring.format(a_char=str_a[a_idx], b_char="-"))
        else:
            match_cost = subsol[a_idx - 1][b_idx - 1] + (
                MISMATCH_COST if str_a[a_idx - 1] != str_b[b_idx - 1] else 0)
            a_gap_cost = subsol[a_idx - 1][b_idx] + GAP_COST
            b_gap_cost = subsol[a_idx][b_idx - 1] + GAP_COST
            
            if match_cost < a_gap_cost and match_cost < b_gap_cost:
                a_idx -= 1
                b_idx -= 1
                print(fmtstring.format(a_char=str_a[a_idx], b_char=str_b[b_idx]))
            elif a_gap_cost < b_gap_cost:
                a_idx -= 1
                print(fmtstring.format(a_char=str_a[a_idx], b_char="-"))
            else:
                b_idx -= 1
                print(fmtstring.format(a_char="-", b_char=str_b[b_idx]))
                
dp_edit_distance("apple", "aple")

A  B
----
e  e
l  l
p  -
p  p
a  a

Fibonocci¶

In [4]:

"""
Recursive fibonocci number
"""

@functools.lru_cache(maxsize=None)
def rec_fib(n):
    if n < 2:
        return 1
    else:
        return rec_fib(n - 2) + rec_fib(n - 1)
    
print(rec_fib(100))

573147844013817084101

In [5]:

"""
Dynamic Programming fibonocci number
"""

def dp_fib(n):
    subsol = [None for _ in range(n + 1)]
    subsol[0] = subsol[1] = 1
    for i in range(2, n + 1):
        subsol[i] = subsol[i - 1] + subsol[i - 2]
    return subsol[n]
    
print(dp_fib(100))

573147844013817084101

Knapsack¶

In [6]:

from collections import namedtuple
Item = namedtuple("Item", ["name", "weight", "value"])

KNAPSACK_ITEMS = [
    Item(name="...Baby One More Time", weight=10, value=60),
    Item(name="Oops!... I Did it Again", weight=20, value=100),
    Item(name="Britney", weight=30, value=75),
    Item(name="In the Zone", weight=30, value=150),
    Item(name="Blackout", weight=40, value=200),
    Item(name="Circus", weight=35, value=190),
    Item(name="Femme Fatale", weight=30, value=180),
    Item(name="Britney Jean", weight=40, value=120),
    Item(name="Glory", weight=25, value=150),
]

MAX_CAPACITY=110
NUM_ITEMS=len(KNAPSACK_ITEMS)

In [7]:

"""
Recursive Knappsack problem
"""

@functools.lru_cache(maxsize=None)
def rec_knapsack(capacity, num_items):
    if capacity == 0:
        return 0
    elif num_items == 0:
        return 0
    elif KNAPSACK_ITEMS[num_items - 1].weight > capacity:
        return rec_knapsack(capacity, num_items - 1)
    else:
        nth_item = KNAPSACK_ITEMS[num_items - 1]
        return max(
            rec_knapsack(capacity - nth_item.weight, num_items - 1) + nth_item.value,
            rec_knapsack(capacity, num_items - 1)
        )
    
rec_knapsack(MAX_CAPACITY, NUM_ITEMS)

Out[7]:

In [8]:

"""
Dynamic Programming Knappsack problem
"""

def dp_knapsack(capacity_max, num_items):
    subsol = [[0 for _ in range(capacity_max + 1)] for _ in range(num_items + 1)]
    
    for item_idx in range(1, num_items + 1):
        for capacity in range(1, capacity_max + 1):
            nth_item = KNAPSACK_ITEMS[item_idx - 1]
            if capacity < nth_item.weight:
                subsol[item_idx][capacity] = subsol[item_idx - 1][capacity]
            else:
                subsol[item_idx][capacity] = max(
                    subsol[item_idx - 1][capacity - nth_item.weight] + nth_item.value,
                    subsol[item_idx - 1][capacity],
                )
    
    return subsol[num_items][capacity_max]

dp_knapsack(MAX_CAPACITY, NUM_ITEMS)

Out[8]: