#!/usr/bin/env python # coding: utf-8 # # Extract The Data # # We've learned how to create a database, insert data and then update or delete it. # # We've also seen how to view the content of a table using pandas. # # But we've only seen how to display the entire contents of a single table. What if we only want a subset of the records? What if we want to extract a set based on records in more than one table? # # It's time to learn about the SELECT statement. # # First, let's connect to our database, empty both tables and create records for three flights. # # We'll need to enable foreign keys again as this setting applies to the connection: # In[7]: import sqlalchemy as sa import pandas as pd engine = sa.create_engine('sqlite:///flight.db') connection = engine.connect() connection.execute("PRAGMA foreign_keys=on") connection.execute("DELETE FROM flights") sql = """ INSERT INTO flights(name, country_code, latitude, longitude) VALUES ('hab1', 'GB', 51.50722, 0), ('hab2', 'GB', 53.46667, -2.23333), ('hab3', 'US', 40.7648, -73.9808) """ connection.execute(sql) pd.read_sql('flights', connection) # And then generate 20 randomised readings for each flight: # In[8]: from collections import namedtuple from random import uniform, randint Reading = namedtuple('Reading', 'flight, ts, temp, pressure, humidity') readings = [ Reading( flight=flight, ts=f'2015-01-01 09:{str(i+1).zfill(2)}:00', temp=round(uniform(23, 27), 1), pressure=randint(1020, 1025), humidity=randint(30, 50)) for flight in ['hab1', 'hab2', 'hab3'] for i in range(20) ] sql = """ INSERT INTO readings (flight, ts, temp, pressure, humidity) VALUES (?, ?, ?, ?, ?) """ for reading in readings: values = (reading.flight, reading.ts, reading.temp, reading.pressure, reading.humidity) connection.execute(sql, values) # Let's extract and display the readings for flight hab2. # # We've seen previously that we can pass a table name to `read_sql`. Now, we'll see that we can also pass a SELECT statement: # # ```sql # SELECT flight, ts, temp, pressure, humidity # FROM readings # WHERE flight = 'hab2' # ``` # # The SELECT clause defines the columns we want to see. # # The FROM clause defines the table(s) we want to query. # # We've seen the WHERE clause used previously in INSERT, UPDATE and DELETE statements. It applies a filter for us to define the records we want returned. # # Let's run that query: # In[9]: sql = """ SELECT flight, ts, temp, pressure, humidity FROM readings WHERE flight = 'hab2' """ pd.read_sql(sql, connection) # Let's try another example where we might be interested only in the temperature of readings taken before 09:15: # In[10]: sql = """ SELECT temp FROM readings WHERE flight = 'hab2' AND ts < '2015-01-01 09:15:00' """ pd.read_sql(sql, connection) # But what if we wanted to see the first few readings for any flights that took place in the UK? # # We'll use a JOIN clause to join the readings and flights tables together # # ```sql # SELECT readings.* # FROM readings JOIN flights # ON readings.flight = flights.name # WHERE # flights.country_code = 'GB' # AND readings.ts < '2015-01-01 09:05:00' # ``` # # Here, we're telling the database engine that, for any record in the readings table, it can use the value in the 'flight' column to find a record in the flights table by matching on the 'name' column. # # It's common in SQL to use aliases for table names in queries using joins: # # ```sql # SELECT r.* # FROM readings AS r JOIN flights AS f # ON r.flight = f.name # WHERE # f.country_code = 'GB' # AND r.ts < '2015-01-01 09:05:00' # ``` # # We can also include fields from both tables in the select clause: # # ```sql # SELECT r.flight, r.ts, r.temp, r.pressure, f.longitude, f.latitude # FROM readings AS r JOIN flights AS f # ON r.flight = f.name # WHERE # f.country_code = 'GB' # AND r.ts < '2015-01-01 09:05:00' # ``` # # Let's try running that last example: # In[11]: sql = """ SELECT r.flight, r.ts, r.temp, r.pressure, f.longitude, f.latitude FROM readings AS r JOIN flights AS f ON r.flight = f.name WHERE f.country_code = 'GB' AND r.ts < '2015-01-01 09:05:00' """ pd.read_sql(sql, connection)