#!/usr/bin/env python # coding: utf-8 # # *Analyzing CIA Factbook Data Using SQLite and Python* # # **In this Project we're working with data from the CIA World Factbook, a compendium of statistics about all of the countries on Earth. You can download the SQLite database, factbook.db, from this [GitHub repo](https://github.com/factbook/factbook.sql/releases).

The Factbook contains demographic information like:** # # > name - The name of the country.
# > area - The total land and water area.
# > area_land - The country's land area in square kilometers.
# > area_water - The country's waterarea in square kilometers.
# > population - The population as of 2015.
# > population_growth - The annual population growth rate, as a percentage.
# > birth_rate - The country's birth rate, or the number of births a year per 1,000 people.
# > death_rate - The country's death rate, or the number of death a year per 1,000 people.
# # ### Aim # # **Here, we'll explore the Python SQLite workflow to explore, analyze, and visualize data from this database** # ### Overview # # - Importing pandas and sqlite3. # - Connecting to factbook.db and use pandas.read_sql_query() to return information on the tables in the database. # - We will run another query to return first 5 rows of the facts table in the database. # In[5]: import sqlite3 import pandas as pd import warnings warnings.filterwarnings('ignore') conn = sqlite3.connect('factbook.db') query = "SELECT * FROM sqlite_master WHERE type='table';" pd.read_sql_query(query, conn) # In[6]: query1 = "SELECT * FROM facts LIMIT 5" pd.read_sql_query(query1, conn) # ### Summary Statistics # # We will write a single query that returns the: # # - minimum population # - maximum population # - minimum population growth # - maximum population growth # In[7]: query2 = '''SELECT MIN(population) 'min_pop', MAX(population) 'max_pop', MIN(population_growth) 'min_pop_growth', MAX(population_growth) 'max_pop_growth' FROM facts''' pd.read_sql_query(query2, conn) # ### Exploring Outliers # # - We will write a query that return the countries with a population of 0. # - We will write a query that return the countries with a population of 7256490011. # In[8]: query3 = '''SELECT * FROM facts WHERE population == 0 or population == 7256490011''' pd.read_sql_query(query3, conn) # **Here, we can observe that country listed with zero population is Antartica, since there is nobody living there.
And, the country listed with 7.2 Billion people is actually the world itself where whole world's population gets counted in just a single country.** # ### Histograms # # - Using just the non-outlier rows, we will generate a 2 by 2 grid of histograms for the following columns: # # - population # - population_growth # - birth_rate # - death_rate # In[9]: import matplotlib.pyplot as plt import seaborn as sns get_ipython().run_line_magic('matplotlib', 'inline') fig = plt.figure(figsize =(10, 5)) ax = fig.add_subplot(1, 1, 1) query4 = '''SELECT population, population_growth, birth_rate, death_rate FROM facts WHERE population != 0 and population != 7256490011;''' pd.read_sql_query(query4, conn).hist(ax = ax) plt.show() # ### Further Exploration # # ***Which countries have the highest population density?*** # In[10]: query5 = '''SELECT name, cast(population as float)/cast(area_land as float) 'population_density' FROM facts ORDER BY population_density desc''' pd.read_sql_query(query5, conn) # ***Histogram of population densities.*** # In[11]: fig1 = plt.figure(figsize=(10, 5)) ax1 = fig1.add_subplot(1, 1, 1) pd.read_sql_query(query5, conn).hist(ax = ax1) plt.show() # ***Which countries have the highest ratios of water to land?*** # In[12]: query6 = '''SELECT name, cast(area_water as float)/cast(area_land as float) 'water_land_ratio' FROM facts ORDER BY water_land_ratio desc''' pd.read_sql_query(query6, conn) # ***Which countries have more water than land?*** # In[13]: query7 = '''SELECT name, cast(area_water as float) - cast(area_land as float) 'diff' FROM facts WHERE diff > 0 ORDER BY diff desc''' pd.read_sql_query(query7, conn) # ***British Indian Ocean Territory and Virgin Islands are the only two countries with more water area than land.*** # In[ ]: