#!/usr/bin/env python # coding: utf-8 # ![](http://www.sqlitetutorial.net/wp-content/uploads/2016/01/SQLite-Python.jpg) # # SQLite with python # # References: # # * [SQLite tutorial](http://www.sqlitetutorial.net/) # * [SQLite and python](http://www.sqlitetutorial.net/sqlite-python/) # * [SQLite and python](https://www.tutorialspoint.com/sqlite/sqlite_python.htm) # # ## What is SQLite? # # from [What is SQLite?](http://www.sqlitetutorial.net/what-is-sqlite/) # # SQLite is a software library that provides a relational database management system. The lite in SQLite means light weight in terms of setup, database administration, and required resource. # # SQLite has the following noticeable features: self-contained, serverless, zero-configuration, transactional. # Serverless # # Normally, an RDBMS such as MySQL, PostgreSQL, etc., requires a separate server process to operate. The applications that want to access the database server use TCP/IP protocol to send and receive requests. This is called client/server architecture. # # SQLite does NOT work this way. SQLite does NOT require a server to run. SQLite database is integrated with the application that accesses the database. The applications interact with the SQLite database read and write directly from the database files stored on disk. # # The following diagram illustrates the SQLite server-less architecture: # # ![](http://www.sqlitetutorial.net/wp-content/uploads/2015/12/What-is-SQLite.jpg) # # SQLite has three important features: # # * Self-Contained # * Zero-configuration # * Transactional # # # ## Full example # ### Connect To Database # # Following Python code shows how to connect to an existing database. If the database does not exist, then it will be created and finally a database object will be returned. # # Te database of the examples has been dowloaded from [SQLite sample database](http://www.sqlitetutorial.net/sqlite-sample-database/) # # The following database diagram illustrates the chinook database tables and their relationships: # # ![](http://www.sqlitetutorial.net/wp-content/uploads/2015/11/sqlite-sample-database-color.jpg) # In[1]: #!/usr/bin/python import sqlite3 sqlite_conn = sqlite3.connect('./db/chinook.db') print("Opened database successfully") # We show the tables in database # # [Show tables in sqlite database in python](https://stackoverflow.com/questions/31986520/show-tables-in-sqlite-database-in-python) # In[2]: cur = sqlite_conn.cursor() cur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name") available_table = (cur.fetchall()) for table in available_table: print(table) # We show the column names in table # # [See the column names for table](https://www.daniweb.com/programming/software-development/threads/124403/sqlite3-how-to-see-column-names-for-table) # In[3]: cur.execute("PRAGMA table_info(tracks)") rows = cur.fetchall() for row in rows: print(row) # [PRAGMA Statements](www.sqlite.org/pragma.html) # # Others examples with PRAGMA statements are: # In[4]: cur.execute("PRAGMA page_count") print(cur.fetchall()) cur.execute("PRAGMA function_list") print(cur.fetchall()) cur.execute("PRAGMA database_list") print(cur.fetchall()) # ### SELECT sentence # # The SELECT statement is used to select data from a database. # # The data returned is stored in a result table, called the result-set. # In[5]: cur = sqlite_conn.execute("SELECT * FROM tracks LIMIT 5;") rows = cur.fetchall() for row in rows: print(row) # In[6]: cur = sqlite_conn.execute("SELECT * FROM tracks LIMIT 5;") row = cur.fetchone() print(row) row = cur.fetchone() print(row) row = cur.fetchone() print(row) row = cur.fetchone() print(row) row = cur.fetchone() print(row) # ### Fetch # # There are three `fetch` functions in python with SQLite: # # * **fetchone()**: This method fetches the next row of a query result set, returning a single sequence, or None when no more data is available. # # # * **fetchmany([size = cursor.arraysize])**: This routine fetches the next set of rows of a query result, returning a list. An empty list is returned when no more rows are available. The method tries to fetch as many rows as indicated by the size parameter. # # # * **fetchall()**: This routine fetches all (remaining) rows of a query result, returning a list. An empty list is returned when no rows are available. # # # In[7]: cur = sqlite_conn.execute("SELECT * FROM tracks LIMIT 5;") rows = cur.fetchmany(2) print("fetchmany 2 ----------------") for row in rows: print(row) row = cur.fetchone() print("fetchone ----------------") print(row) print("fetchall ----------------") rows = cur.fetchall() for row in rows: print(row) # In[8]: cur = sqlite_conn.execute(""" --==== SELECT Name, Composer, Milliseconds FROM tracks WHERE Milliseconds >= ? AND Milliseconds <= ? ORDER BY Milliseconds; --====""", (200000, 201000)) rows = cur.fetchall() for row in rows: print(row) # ### SQLite and pandas # # References: # # * [pandas with databases](https://www.dataquest.io/blog/python-pandas-databases/) # # yotube # # * [SQL y Data Science #1: Introduccion a SQL](https://www.youtube.com/watch?v=7NSr6C8IhCo) # * [SQL for Data Science #2: SQLite+pandas to analyze 10 million NYC citibike records](https://www.youtube.com/watch?v=fL53-abGSuM) # * [SQL for Data Science #3a: SQLite+pandas to analyze 10 million NYC citibike records](https://www.youtube.com/watch?v=dUtBqDqmyQg) # * [SQL for Data Science #3b: SQLite+pandas to analyze 10 million NYC citibike records](https://www.youtube.com/watch?v=rB5di9XjC94) # # # * [Python Pandas connect directly to SQLite, Oracle, IBM Db2, MS SQL Server, PostgreSQL, MySQL](https://www.youtube.com/watch?v=gC-0CaRzR48) # # # In[10]: import pandas as pd # In[12]: df = pd.read_sql_query(sql = """ --==== SELECT * FROM tracks LIMIT 5; --====""", con = sqlite_conn) df # In[13]: df = pd.read_sql_query(sql = """ --==== SELECT * FROM tracks LIMIT %d; --====""" % (10), con = sqlite_conn) df # In[14]: df = pd.read_sql_query(sql = """ SELECT Name, Composer, Milliseconds FROM tracks WHERE Milliseconds >= %d AND Milliseconds <= %d ORDER BY Milliseconds; --====""" % (200000, 201000), con = sqlite_conn) df # ### Join # # A JOIN clause is used to combine rows from two or more tables, based on a related column between them. # # ![](https://i.stack.imgur.com/1UKp7.png) # # Different Types of SQL JOINs # # Here are the different types of the JOINs in SQL: # # * **(INNER) JOIN**: Returns records that have matching values in both tables # # ![](https://www.w3schools.com/sql/img_innerjoin.gif) # # * **LEFT (OUTER) JOIN**: Return all records from the left table, and the matched records from the right table # # ![](https://www.w3schools.com/sql/img_leftjoin.gif) # # * **RIGHT (OUTER) JOIN**: Return all records from the right table, and the matched records from the left table # # ![](https://www.w3schools.com/sql/img_rightjoin.gif) # # * **FULL (OUTER) JOIN**: Return all records when there is a match in either left or right table # # ![](https://www.w3schools.com/sql/img_fulljoin.gif) # # In the following sentence we will join three tables: tracks, albums and artists. The result will be a table with the name of the artis/group, the title of the album and the name of the track. We will sort teh results by artis and album. # In[33]: cur = sqlite_conn.execute(""" --==== ==================================================== --==== INICIO DE LA QUERY SELECT artists.Name, albums.Title, tracks.Name FROM tracks LEFT JOIN albums ON albums.Albumid = tracks.Albumid LEFT JOIN artists ON albums.Artistid = artists.Artistid ORDER BY artists.Name, albums.Title ASC; --==== FIN DE LA QUERY --==== ==================================================== """) rows = cur.fetchmany(20) # Only the 20 first results for row in rows: print(row) rows = cur.fetchall() # A example with analysis. We will calculate the time average by album in milliseconds. # In[35]: cur = sqlite_conn.execute(""" --==== ==================================================== --==== INICIO DE LA QUERY SELECT artists.Name, albums.Title, tracks.Name, ROUND(AVG(tracks.Milliseconds),0) FROM tracks -- TABLA PRINCIPAL LEFT JOIN albums ON albums.Albumid = tracks.Albumid LEFT JOIN artists ON albums.Artistid = artists.Artistid GROUP BY artists.Name, albums.Title ORDER BY artists.Name, albums.Title ASC; --==== FIN DE LA QUERY --==== ==================================================== """) rows = cur.fetchmany(10) for row in rows: print(row) rows = cur.fetchall() # We will calculate the number of tracks in each album. # In[34]: cur = sqlite_conn.execute(""" --==== ==================================================== --==== INICIO DE LA QUERY SELECT artists.Name, albums.Title, COUNT(tracks.Name) AS N FROM tracks LEFT JOIN albums ON albums.Albumid = tracks.Albumid LEFT JOIN artists ON albums.Artistid = artists.Artistid GROUP BY artists.Name, albums.Title ORDER BY N DESC; --==== FIN DE LA QUERY --==== ==================================================== """) rows = cur.fetchmany(10) for row in rows: print(row) rows = cur.fetchall() # ### SQL statement from file # # In the previous examples we have seen SQL codes very large, with comments and several analysis. If is a code that will be used in several examples or analysis, then we will be more efficient if we will save the SQL code in a file. # # #### Example 1 # # If the SQL statement is very large then is a better way to save the SQL in a file \*.sql and load it in the `read_sql_query`. In the file _example.sql_ is written the following SQL code. # # ``` # --==== # SELECT Name, Composer, Milliseconds # FROM tracks # WHERE Milliseconds >= 200000 # AND # Milliseconds <= 201000 # ORDER BY Milliseconds; # --==== # ``` # In[24]: f = open("example.sql") sql_statement = f.read() f.close() # In[25]: print(sql_statement) # In[26]: df = pd.read_sql_query(sql = sql_statement, con = sqlite_conn) df # #### Example 2 # # If the SQL statement is very large then is a better way to save the SQL in a file \*.sql and load it in the `read_sql_query`. In the file _example2.sql_ is written the following SQL code. # # ``` # --==== # SELECT Name, Composer, Milliseconds # FROM %s # WHERE Milliseconds >= %d # AND # Milliseconds <= %d # ORDER BY Milliseconds; # --==== # ``` # # We can see the %s (string) and %d (integer) insertions. These parameters allow the reutilitation of the SQL sentence. # In[27]: f = open("example2.sql") sql_statement = f.read() f.close() # In[30]: df = pd.read_sql_query(sql = sql_statement % ('tracks', 200000, 200500), con = sqlite_conn) df # In[31]: df = pd.read_sql_query(sql = sql_statement % ('tracks', 200500, 201000), con = sqlite_conn) df # ### Close de connect # # We close the conexion with database with the following command: # In[15]: sqlite_conn.close()