#!/usr/bin/env python # coding: utf-8 # # From last time # Picking up from part 1, we have the following two examples: # In[1]: A = 'ABCBDAB' B = 'BDCABA' # There are two longest common subsequences: # # 1. B, C, B, A # # 2. B, D, A, B # In[2]: occidental = "occidental" superdelicately = "superdelicately" # One longest common subsequence (LCS) is: "deal". # Some code from part 1: # In[3]: from functools import lru_cache # In[4]: def pretty_print(arr): '''A hacky function that makes every string in an array equal length, then prints it row by row.''' longest_length = len(arr[:-1][:-1]) nicer = [] for i, row in enumerate(arr): new_row = [] for j, cell in enumerate(row): cell_length = len(cell) padding_needed = longest_length - cell_length new_cell = cell + (' ' * padding_needed) new_row.append(new_cell) nicer.append(new_row) [print(r) for r in nicer] # In[5]: def lcs4(A, B, print_table=False): # Create a lookup table. This is a 2D array with the letters of A as columns # and the letters of B as rows. It includes a buffer row and column filled with empty strings. columns = len(A) + 1 rows = len(B) + 1 lookup = [['' for n in range(columns)] for n in range(rows)] # Since the lookup table is initialized with '' # the base case (len(A) == 0 or len(B) == 0) is "baked-in". # What's left is to go row by row, column by column and # apply the iterative version of the recursive cases in lcs(). for row_id in range(1, rows): for column_id in range(1, columns): letter_in_A = A[column_id-1] letter_in_B = B[row_id-1] if letter_in_A == letter_in_B: # "Recursive case 1" lookup[row_id][column_id] = lookup[row_id - 1][column_id - 1] + letter_in_A else: # "Recursive case 2" option1 = lookup[row_id][column_id-1] option2 = lookup[row_id-1][column_id] if len(option1) > len(option2): lookup[row_id][column_id] = option1 else: lookup[row_id][column_id] = option2 if print_table: pretty_print(lookup) return lookup[rows-1][columns-1] # # A connection between LCS and LIS? # # There is an interesting connection between longest common subsequence (LCS) and longest increasing subsequence (LIS). # # >The longest increasing subsequence of a list of numbers is equal to the longest common subsequence of that list of numbers and its sorted counterpart. # # Source: "Course15.pdf" by [Jean-Lou De Carufel](http://cglab.ca/~jdecaruf/CSI3105.html) (probably taken from Dasgupta et. al.). # In[6]: nums = [3, 2, 1, 4] nums # In[7]: # Copy and sort nums sorted_nums = nums[:] sorted_nums.sort() sorted_nums # In[8]: # Convert both to strings nums_str = [str(n) for n in nums] sorted_nums_str = [str(n) for n in sorted_nums] # In[9]: # Find the longest common subsequence of S and S_sorted. lcs4(nums_str, sorted_nums_str) # [1, 4] is a longest **increasing** subsequence of `nums`. # # That said, this doesn't seem to work for lists with duplicate elements. # In[10]: dupe_nums = [2, 2] # In[11]: dupe_nums_str = ''.join([str(n) for n in dupe_nums]) dupe_nums_str # In[12]: lcs4(dupe_nums_str, dupe_nums_str) # Whether $2 \rightarrow 2$ is "increasing" depends on how you define it. It is not strictly increasing and will fail to pass certain tests in [this leetcode question](https://leetcode.com/problems/longest-increasing-subsequence/). # # Extending LCS to numbers # # Thinking about it, my current implementation of longest common subsequence only works with lists of numbers where no number is greater than 9 (only single digits). With stringified number lists, there is no way to tell "10" from "1", "0". # # The solution is simple: use lists instead of strings. # In[13]: def lcs5(A, B): # Create a lookup table. This is a 2D array with the letters of A as columns # and the letters of B as rows. It includes a buffer row and column filled with empty strings. columns = len(A) + 1 rows = len(B) + 1 lookup = [[[] for n in range(columns)] for n in range(rows)] # Since the lookup table is initialized with '' # the base case (len(A) == 0 or len(B) == 0) is "baked-in". # What's left is to go row by row, column by column and # apply the iterative version of the recursive cases in lcs(). for row_id in range(1, rows): for column_id in range(1, columns): letter_in_A = A[column_id-1] letter_in_B = B[row_id-1] if letter_in_A == letter_in_B: # "Recursive case 1" lookup[row_id][column_id] = lookup[row_id - 1][column_id - 1] + [letter_in_A] else: # "Recursive case 2" option1 = lookup[row_id][column_id-1] option2 = lookup[row_id-1][column_id] if len(option1) > len(option2): lookup[row_id][column_id] = option1 else: lookup[row_id][column_id] = option2 return lookup[rows-1][columns-1] # In[14]: numbers_a = ['10', '22', '5', '11', '10'] numbers_b = ['100', '22', '3', '11', '14'] # In[15]: lcs5(numbers_a, numbers_b) # Of course, letters still work: # In[16]: occidental_letters = list(occidental) occidental_letters # In[17]: superdelicate_letters = list(superdelicately) superdelicate_letters # In[18]: lcs5(occidental_letters, superdelicate_letters) # This combined with some input processing was enough to solve [this HackerRank question](https://www.hackerrank.com/challenges/dynamic-programming-classics-the-longest-common-subsequence/problem).