#!/usr/bin/env python # coding: utf-8 # In[24]: from __future__ import print_function import sys # ### Searching via sorting # Consider the phone book for Addis Ababa. Suppose that it has 1 million names in it. # But still, we can find a number easily because it is __alphabetically sorted__. # What would happen if the names were listed in the phone book in random order? # This is true in general - we can find items much faster in arrays that are __sorted__: # Many times we want to search something in a given list. For example, we might store the names of students digitally in a list, and we might want to know whether or not a student is in the list. Lets see an example. # In[15]: student_names = ['abinet mulugeta', 'urgie huseien', 'yonatan wosenyeleh', 'amanuel asfaw', 'tibebu solomon', 'hailegbrel wudneh', 'gatluk chuol', 'elsabet buzuneh', 'eden ketema', 'maeden seid', 'mikyas legese', 'meskerem birhanu demeke', 'kumneger worku', 'shambel abate', 'hailmeskel shimeles', 'tsega hailu', 'dawit fikeru', 'asmare habitamo', 'zelalem ades', 'betelehem eshetu', 'yosef tadiwos', 'haymanot gidena', 'henock mersha', 'binyam kidane', 'mohammed nur', 'bethelehem walelegn', 'lewi mekonnen', 'wondimu yohanes', 'hodo mukitar', 'yonas adugna', 'tigabu gebrecherkos', 'nardos gesese', 'mohammed nur', 'abdurezak temam', 'shambel elena', 'adem mohamed', 'zakira tebarek', 'lidya gegnaw', 'knesa desta', 'ibrahim ahmed', 'betlehem desalegn', 'adonay geremew', 'kalkidan muluneh', 'haile gebreselasie', 'eden tekilu tilahun', 'ayantu aleneh', 'yosef nosha', 'mebrihity girmay', 'finet hailu', 'elisa feloh', 'bezawit gebremariam', 'nigusu terefe', 'amina bedrie', 'kiflom leuel', 'hana tariku', 'nejat beshir', 'mesfen tamiru', 'shafi abdi', 'kelbesa ambesa', 'abrham tuna', 'daniel hagos', 'yordanos jemberu', 'aman musa', 'habene abdi', 'kawuser jemal', 'tariku erina', 'mesigina gebretsadik', 'yetnayet birhanu', 'semer abrar', 'nur ahmed', 'eman hasen', 'natol gizaw', 'banchayehu asrat', 'hilina thewodros', 'hasen ali', 'mebrihatu lebelo', 'yosef enawgaw', 'nesera teyib', 'mekdes muluneh', 'surafel sewutu', 'mentesenot tefera'] # In[108]: print(len(student_names)) # In[17]: for name in student_names: print(name) # I want to know if a student named **haile gebreselasie** is in the list (yes). # I want to know if a student named **yosef nosha** is in the list (yes). # I want to know if a student names **timnit gebru** is in the list (no). # In[113]: def search (L, i): #L=[0,9,10,12,20,-1,200], i=12 for j in range(len(L)): #for j in [0,1,2,3,4,5,6] if i==L[j]: return j return -1 x=[0,9,10,12,20,-1,200] y=12 search(x, y) #examples #L=[0,9,10,12,20,-1,200] #i=12 #--->return 3 #L=[0,9,10,12,20,-1,200] # i=200 #--->6 #L=[0,9,10,12,20,-1,200] # i=50 #--->-1 #L=['Timnit', 'Arash', 'Heather', 'Jelani'] #i='timnit' #--->-1 #L=['Timnit', 'Arash', 'Heather', 'Jelani'] #i='Timnit' #--->0 # **Class exercise**: Write a function **search** which takes in a list **L** and an item **i**, and returns the index of the item **i** if **i** is in the list **L**. If not, return **-1** # In[25]: def search(L,item): """Search in an unsorted list. We have to search through the entire list.""" for i in range(len(L)): sys.stdout.write('*') #Ignore this it just prints the '*' if L[i]==item: return i return -1 # In[26]: L = range(200) # In[36]: search(L,100) # In[121]: search(student_names, 'yosef nosha') def sort_list2(L): if L[0] >L[1]: return [L[1]]+[L[0]] else: return L print(sort_list2(['Yosef','Timnit'])) #def sort_list3(L): # Can we do it faster using the fact that ```L``` is __sorted__? # Turns out the answer is __yes__ (think about the phone book example). # Since we know it is faster to search through lists after they are sorted, first lets write a function called **sort_list** that, given a list **L**, returns the list in sorted order. Lets first write this function without recursion, and then using recursion. # If I told you the the list **L** had only 2 values, how would you write the function? # In[47]: #code here def sort_list2(L): if L[0]>L[1]: L[0],L[1] = L[1],L[0] return L sort_list2([1,2]) # If I told you that the list **L** had only 3 values, how would you write the function? # In[53]: #L=[3,0,1] #-->[0,1,3] #1. Find the minimum value in L #it is zero #2. I swap the first element with the minium value #I swap 3 with 0 #so now L is [0,3,1] #3. I sort a list with the last 2 elements using #sort_list2 #[0] + sort_list2[3,1] #['Zelalem', 'Abeba', 'Wolde'] #-->['Abeba', 'Wolde', 'Zelalem'] #-->[0,1,3] #code here def sort_list3(L): #L=[3,0,1] #================================================= if L[0]>L[1]: #if 3>0 L[0],L[1] = L[1],L[0] #swap L[0] & L[1] # L=[0,3,1] if L[0]>L[2]: #if 0>1? L[0],L[2] = L[2],L[0] #===>CODE ABOVE JUST ENSURES MINIMUM VALUE IS L[0] #now L is [0,3,1] return [L[0]] + sort_list2(L[1:3]) #L[1:3]=[3,1] #return [0] + sort_list2([3,1]) L=[3,0,1] sort_list3(L) # In[ ]: #code here def sort_list3(L): #First find the index of the minimum value in L #Then swap L[0] with the minimum value in L #Now L[0] is the minimum value in L #Return L[0] + sort_list2(L[1:]) #Becuase we have learned how to sort a list of 2 numbers return [L[0]] + sort_list2(L[1:3]) #L[1:3]=[3,1] # In[ ]: #code here def sort_list3(L): #First find the minimum value in L min_index=find_min_index(L) #we haven't created this function yet #Then swap L[0] with the minimum value in L L[0],min_index=L[min_index], L[0] #Now L[0] is the minimum value in L #And we have learned how to sort a list of 2 numbers return [L[0]] + sort_list2(L[1:3]) # In[ ]: #How do we create this function find_min_index? #Create a function find_min_index that takes in a list L #and returns the index of the minimum value in L # In[ ]: #This function returns the index of the minimum element in list L def find_min_index(L): current_index = 0 current_min = L[0] for j in range(1,len(L)): if current_min > L[j]: current_min = L[j] current_index = j return current_index # In[54]: sort_list3([9,5,8]) # In[56]: sort_list3(['cat','apple','dog']) # ### Curious fact: # In[122]: 'apple' < 'cat' # In[125]: 'yosef' < 'timnit' # ### How do we sort a list? # 1. We find the minimum element in **L** # 2. We swap **L[0]** with the minimum element in the list such that **L[0]** is now the minimum element in **L**. # 3. We swap **L[1]** with the minimum element in **L[1:]** such that **L[1]** is now the minimum element in **L[1:]**. # 4. We swap **L[2]** with the minimum element in **L[2:]** such that **L[2]** is now the minimum element in **L[2:]**. # 5. We continue this until we reach the last element of **L** and return **L**. # In[ ]: #With recursion def sort_list(L): if len(L)<=1: return L # a one-element list is always sorted min_idx = find_min_index(L) #non-recursive helper function L[0], L[min_idx] = L[min_idx], L[0] return [L[0]] + sort_list(L[1:len(L)]) sort_list([5,1,10,3]) # In[64]: #Without recursion def sort_list(L): for i in range(len(L)): #*****This line is not code**** min_idx = Find the index of the minimum element in L[i:] #****************************** L[i], L[min_idx] = L[min_idx], L[i] return L # In[71]: #This function returns the index of the minimum element in list L def find_min_index(L): current_index = 0 current_min = L[0] for j in range(1,len(L)): if current_min > L[j]: current_min = L[j] current_index = j return current_index # In[78]: #Without recursion def sort_list(L): for i in range(len(L)): #*****This line is not code**** min_idx = i+find_min_index(L[i:]) #****************************** L[i], L[min_idx] = L[min_idx], L[i] return L # In[81]: sort_list([5,1,10,3]) # In[82]: sort_list(student_names) # Now that we have a sorted list we can search through the list the way we did before # In[84]: def search(L,item): """Search in an unsorted list. We have to search through the entire list.""" for i in range(len(L)): sys.stdout.write('*') #Ignore this it just prints the '*' if L[i]==item: return i return -1 # However, we can search through the list faster than this. # ## Binary Search # # __Input:__ Sorted list $L$ of length $n$, item $item$ # # __Output:__ Index $i$ such that $L[i]==item$ or $-1$ if no such $i$ exists. # # __Operation:__ Check if $L[n/2]>item$. # # If YES, then check if $L[n/4]>item$, if NO then check if $L[3n/4]>item$. # # If first check was YES and second YES, check if $L[n/8]>item$. # # If first check was YES and second NO, check if $L[3n/8]>item$. # # If first check was NO and second NO, check if $L[7n/8]>item$. # # If first check was NO and second YES, check if $L[5n/8]>item$. # # .... # # continue in this way # In[5]: #overview of binary search # In[ ]: #lets see an example #How do we check if 10 is in list [1,3,5,6,9,10,11,14] without using binary search? #How do we check if 10 is in list [1,3,5,6,9,10,11,14] using binary search? # ## Binary Search # # (a bit more formal operation) # # __Input:__ Sorted list $L$ of length $n$, item $item$ # # __Output:__ Index $i$ such that $L[i]==item$ or $-1$ if no such $i$ exists. # # __Operation:__ # # Check if $L[n/2]item: return bin_search(L[:m],item) #Search left half res = bin_search(L[m+1:n],item) #Search right half if res==-1: return -1 return m+1+res # ## Now Binary Search Without Recursion # In[96]: #Without recursion def bin_search_nr(L,item): left = 0 right= len(L) while right-left >0: sys.stdout.write('*') m = int((left+right)/2) if L[m]==item: return m if L[m]>item: #Search left half right = m else: left = m+1 #Search right half return -1 # In[97]: search(L,100) # In[98]: bin_search(L,100) # In[99]: bin_search_nr(L,100) # If you run a binary search on a string of length $n$, then in one step we reduce the problem to a string of length $n/2$, in another step to a string of length $n/4$, and so on. # So the number of steps is the number of items in the sequence $n,n/2,n/4,n/8,\ldots,1$. # In other words, the number of steps binary search takes is the number $t$ such that $n/2^t \leq 1$, which means $t=\rceil log_2 n \rceil \leq \log_2 n + 1$. # $\log_2 n$ is much much smaller than $n$. # In[26]: # compare n with log_2 n # For example, Facebook can have a list of all the emails of their users, sorted by their name. # Now, given any string ```name```, in 30 steps they can find the email corresponding to this user.