#!/usr/bin/env python
# coding: utf-8
#
Analyzing CIA Factbook Data Using SQL
# In this project, we'll work with data from the CIA World Factbook, a compendium of statistics about all of the countries on Earth. The Factbook contains demographic information like:
#
# - population - The population as of 2015.
# - population_growth - The annual population growth rate, as a percentage.
# - area - The total land and water area.
# Connecting Jupyter Notebook to our Database File
# In[1]:
get_ipython().run_cell_magic('capture', '', '%load_ext sql\n%sql sqlite:///factbook.db\n')
# Description of Tables in the Database
# In[5]:
get_ipython().run_cell_magic('sql', '', "SELECT * FROM sqlite_master WHERE type='table';\n")
# First 5 rows of the Facts Table in the Database
# In[6]:
get_ipython().run_cell_magic('sql', '', 'SELECT * FROM facts limit 5;\n')
# Here are the descriptions for some of the columns:
#
# - name - The name of the country.
# - area - The total land and sea area of the country.
# - population - The country's population.
# - population_growth- The country's population growth as a percentage.
# - birth_rate - The country's birth rate, or the number of births a year per 1,000 people.
# - death_rate - The country's death rate, or the number of death a year per 1,000 people.
# - area- The country's total area (both land and water).
# - area_land - The country's land area in square kilometers.
# - area_water - The country's waterarea in square kilometers.
# Total Number Of Rows In Facts Table
# In[34]:
get_ipython().run_cell_magic('sql', '', 'SELECT COUNT(*) AS ROWS FROM facts;\n')
# Summary Statistics of Facts Table
# In[8]:
get_ipython().run_cell_magic('sql', '', 'SELECT MIN(population) AS MIN_POPULATION,\n MAX(population) AS MAX_POPULATION,\n MIN(population_growth) AS MIN_POPULATION_GROWTH,\n MAX(population_growth) AS MAX_POPULATION_GROWTH FROM facts;\n')
# Outlier Countries
# It seems like the table contains a row for the whole world, which explains the population of over 7.2 billion. It also seems like the table contains a row for Antarctica, which explains the population of 0.
This seems to match the CIA Factbook page for Antarctica:
# In[14]:
get_ipython().run_cell_magic('sql', '', 'SELECT name,population FROM facts where population=(SELECT MIN(population) FROM facts);\n')
# In[15]:
get_ipython().run_cell_magic('sql', '', 'SELECT name,population FROM facts where population=(SELECT MAX(population) FROM facts);\n')
# Exploring Average population and Average Area.
# In[16]:
get_ipython().run_cell_magic('sql', '', 'SELECT AVG(population) as AVG_POPULATION,\nAVG(area) AS AVG_AREA FROM facts;\n')
# Finding countries that are densely populated.
# In[17]:
get_ipython().run_cell_magic('sql', '', 'SELECT * FROM facts\nWHERE population > (\n SELECT AVG(population)\n FROM facts\n)\n AND area < (\n SELECT AVG(area)\n FROM facts\n)\n')
# Bangladesh,Germany,Japan,Philippines,Thailand,United Kingdom, Vietnam are the densely populated countries as of 2015 stated in Central Intelligence Agency(CIA) Factbook Data.
# Finding countries that are sparsely populated.
# In[18]:
get_ipython().run_cell_magic('sql', '', 'SELECT * FROM facts\nWHERE population < (\n SELECT AVG(population)\n FROM facts\n)\n AND area > (\n SELECT AVG(area)\n FROM facts\n)\n')
# These are the sparsely populated countries as of 2015 stated in Central Intelligence Agency(CIA) Factbook Data.
# Countries having more water than land
# In[20]:
get_ipython().run_cell_magic('sql', '', 'SELECT * FROM facts WHERE area_water>area_land;\n')
# Countries having highest ratios of water to land
# In[29]:
get_ipython().run_cell_magic('sql', '', 'SELECT * from facts\nWHERE ((area_water*1000)/area_land)>(SELECT AVG((area_water*1000)/area_land) FROM facts);\n')
# Countries having higher death rate than birth rate
# In[23]:
get_ipython().run_cell_magic('sql', '', 'SELECT name, death_rate,birth_rate FROM facts WHERE death_rate>birth_rate;\n')
# Countries which will add the most people to their population next year
# In[30]:
get_ipython().run_cell_magic('sql', '', 'SELECT * from facts\nWHERE ((birth_rate*1000)/death_rate)>(SELECT AVG((birth_rate*1000)/death_rate) FROM facts);\n')
# In[ ]: