#!/usr/bin/env python # coding: utf-8 # # Breakout 2 Solutions # Note, you can find this notebook on Github here: # # http://bit.ly/advanced_data_structures_2016 # # # Sorting airline data # ### First, copy over the airport and flight information # You can find the data in [airline.py](https://raw.github.com/profjsb/python-bootcamp/master/DataFiles_and_Notebooks/02_AdvancedDataStructures/airline.py) # In[1]: # Note that we could do this with the following command too: # %load https://raw.githubusercontent.com/profjsb/python-bootcamp/master/DataFiles_and_Notebooks/02_AdvancedDataStructures/airline.py # In[2]: airports = {"DCA": "Washington, D.C.", "IAD": "Dulles", "LHR": "London-Heathrow", \ "SVO": "Moscow", "CDA": "Chicago-Midway", "SBA": "Santa Barbara", "LAX": "Los Angeles",\ "JFK": "New York City", "MIA": "Miami", "AUM": "Austin, Minnesota"} # airline, number, heading to, gate, time (decimal hours) flights = [("Southwest",145,"DCA",1,6.00),("United",31,"IAD",1,7.1),("United",302,"LHR",5,6.5),\ ("Aeroflot",34,"SVO",5,9.00),("Southwest",146,"CDA",1,9.60), ("United",46,"LAX",5,6.5),\ ("Southwest",23,"SBA",6,12.5),("United",2,"LAX",10,12.5),("Southwest",59,"LAX",11,14.5),\ ("American", 1,"JFK",12,11.3),("USAirways", 8,"MIA",20,13.1),("United",2032,"MIA",21,15.1),\ ("SpamAir",1,"AUM",42,14.4)] # In[3]: # airports is a dictionary that maps airport code to airport city airports # In[4]: # flights is a collection of tuples, each one specifying one flight # Columns are: airline, number, heading to, gate, time (decimal hours) flights[:3] # # Sorting by Airline # Python lists have a `.sort` function that automatically sorts the contents that are inside. If the list contains collections (like tuples), then it will sort based on the first item in each collection. # # Since airline is the first element of our `flights` tuples, we can sort the flight information by airline by running a simple sort on the list. # # In[5]: # Sort the list of flights. flights.sort() # Note that if there are multiple items in `flights` that have the same first item, it will then go on to the next item. # # Moreover, note that calling the `.sort` method of `flights` didn't return anything, it modified the contents of `flights` in place. # In[6]: for flight in flights: print(flight) # # Printing the list # Now we want to print out the sorted list in a nicer format. This is tricky, because different names have different lengths, so the challenge is making sure that each category begins printing at the same point. # # First, we'll figure out how to print our header information nicely # In[7]: # Print out the header. the \t character prints a tab. header = "Flight \tDestination\t\tGate\tTime" print(header) # `expandtabs` will convert the `\t` character to spaces so we know the length print("-" * len(header.expandtabs())) # By using tabs (`\t`), we can space out the categories. We should use more tabs if the category will have longer values. # # Next, we'll print our data underneath this header. First we'll have a quick aside on **inserting variables into a string**: # In[8]: # First, note that we can insert variables into strings # We accomplish this with the characters `{}` + the `format` method print('{}'.format('hi')) # In[9]: # This will auto-convert items like ints or floats to a string print('{} blind mice'.format(3)) # In[10]: # You can also specify multiple variables to insert print('{} shall be the number thou shalt count, and' ' the number of the counting shall be {}'.format(3, 3)) # In[11]: # We can also use the `%` symbol like so: number, word = 2, 'crazy' print('Wait, there are %s ways to do this, that is %s!!!' % (number, word)) # And we can also directly convert variables to string format, then use + print("Wow a " + str(3) + "rd way, truly python is " + str(2) + " cool " + str(4) + " school...") # Now, on to printing our data. We'll just print whatever is in the dataset: # # *Note that when printing, we lookup the destination name by the airport code key in the airports dictionary.* # In[12]: # Print the header print(header) print("-" * len(header.expandtabs())) # Now print the flight information for flight in flights: dest = airports[flight[2]] # Print the nicely formatted string. Don't forget to convert int and float types to strings using str() print('{} {} \t {} \t {} \t {}'.format( flight[0], flight[1], dest, flight[3], flight[4])) # Hmmm, it's close, but there are problems related to differing lenghts of the flight information. In particular it seems like the `Destination` column is giving us problems. # # To get around this, we can define a custom amount of whitespace for this column, depending on the length of the string (rather than using tabs). # In[13]: # Print the header print(header) print("-" * len(header.expandtabs())) # Now print the flight information destination_column_length = 20 for flight in flights: dest = airports[flight[2]] # add the appropriate amount of whitespace after the Destination string dest += " " * (destination_column_length - len(dest)) print('{} {} \t {} \t {} \t {}'.format( flight[0], flight[1], dest, flight[3], flight[4])) # # Sorting by Departure Time # We'll do this three ways (there are often many ways to solve a problem in coding)...you can decide which one makes more sense to you. # ## Sorting the information by time manually # Rather than using python's sorting functionality, we also could have done this ourselves manually. In this case, it requires about the same amount of code but has some tricky logic. I won't go into details on how to do this, but you can find the solution below... # # First, we create a new list, time_ordered_flights, which initially just contains the first element of the list flights. # In[14]: # Create a new list, time_ordered, which initially just contains the first element of the list flights time_ordered_flights = [flights[0]] print(time_ordered_flights) # We then loop through the remaining flights and insert it into the proper position in time_ordered_flights by comparing the time element in each flight tuple (at the fifth index position). # # We determine where the current flight belongs by manually comparing the times of the flights already added to time_ordered_flights. (This is really trivial with lambda functions, which you'll learn later.) # In[15]: # Iterate through each of the remaining elements in flights to see where it should go in the sorted list ix_time = 4 for flight in flights[1:]: this_time = flight[ix_time] first_time = time_ordered_flights[0][ix_time] last_time = time_ordered_flights[-1][ix_time] # Does it belong in the beginning? # Is current flight's time less than the time in the first list element? if this_time < first_time: # insert the flight tuple at position 0 in the list time_ordered_flights.insert(0,flight) continue ## ... or the end? # is current flight's time greater than the time in the last list element? if this_time > last_time: # append the flight tuple to the end of the list time_ordered_flights.append(flight) continue ## Or is it in the middle? ## Loop through each element and see if the cur flight is b/w two others ## note that range(N) returns a list [0, 1, ... , N-1] for ii in range(len(time_ordered_flights) - 1): if (this_time >= time_ordered_flights[ii][ix_time] and this_time <= time_ordered_flights[ii + 1][ix_time]): # insert the flight tuple at position i + 1 in the list time_ordered_flights.insert(ii + 1, flight) break # The printing procedure is the same as before. # In[16]: print("Flight \tDestination\t\tGate\tTime") print("-"*53) for flight in time_ordered_flights: dest = airports[flight[2]] dest += " "*(20 - len(dest)) print('{} {} \t {} \t {} \t {}'.format( flight[0], flight[1], dest, flight[3], flight[4])) # ## By re-ordering our flight information + using `sort` # If we want to use the list's `sort` method, we can take advantage of the fact that it operates on the first item of each list by re-ordering the iterms of our flight information. # In[17]: # We'll swap flight number and airline flights_swap = [] for flight in flights: this_flight = [] for index in [4, 1, 2, 3, 0]: this_flight.append(flight[index]) flights_swap.append(this_flight) flights_swap[:5] # In[18]: # Now, we'll sort this list flights_swap.sort() # And finally we will swap back flights_sort = [] for flight in flights_swap: this_flight = [] for index in [4, 1, 2, 3, 0]: this_flight.append(flight[index]) flights_sort.append(this_flight) # In[19]: # Print the header print("Flight \tDestination\t\tGate\tTime") print("-"*53) #53 instances of the "-" character # Now print the flight information destination_column_length = 20 for flight in flights_sort: dest = airports[flight[2]] # add the appropriate amount of whitespace after the Destination string dest += " " * (destination_column_length - len(dest)) print('{} {} \t {} \t {} \t {}'.format( flight[0], flight[1], dest, flight[3], flight[4])) # ## By using a special function to sort in one line # To use even less code, we can use another python function called `operator.itemgetter()` as the key in sort. This will sort the object by the fifth element (time), similar to our first solution. # # This brings up a general tip in python: often there are simpler ways to do something if you look for modules / classes / functions that other people have already written. This often saves lines of code, makes your code more readable, and is less prone to bugs. # In[20]: import operator # Sort flights using the fifth item flights.sort(key=operator.itemgetter(4)) # Now print print("Flight \tDestination\t\tGate\tTime") print("-"*53) for flight in flights: dest = airports[flight[2]] dest += " "*(20 - len(dest)) print('{} {} \t {} \t {} \t {}'.format( flight[0], flight[1], dest, flight[3], flight[4])) # # # Appendix # ## Alternate printing solution # For the sake of completeness, here's another way that we could have accomplished the same printing that we performed above. Instead of using tab characters (`\t`), we'll using some python string syntax to define how large we want each section of the string to be. # # Define how many spaces you want each string to occupy. Add enough trailing spaces to each element to fill this number. We'll go over string formatting more tomorrow. # In[21]: print("%.20s %.20s %.6s %.5s" % ("Flight" + 20*' ', "Destination" + 20*' ', "Gate" + 20*' ', "Time" + 20*' ')) print("-"*53) for flight in flights: print("%.20s %.20s %.6s %5.2f" % (flight[0] + ' ' + str(flight[1]) + 20*' ', airports[flight[2]]+20*' ', str(flight[3])+20*' ', int(flight[4]))) # In[ ]: