#!/usr/bin/env python # coding: utf-8 # # CIA World Factbook SQL Project # # This CIA World Factbook is a collection of statistics and data about every country on Earth. It contains demographic, geographic, ecomomic, and government related data. # # In this project we will use SQL do analyze the data from the database. # # # Here are the descriptions for some of the columns: # # - ```name``` — the name of the country. # - ```area``` — the country's total area (both land and water) in square kilometers # - ```area_land``` — the country's land area in square kilometers. # - ```area_water``` — the country's waterarea in square kilometers. # - ```population``` — the country's population. # - ```population_growth``` — the country's population growth as a percentage. # - ```birth_rate``` — the country's birth rate, or the number of births per year per 1,000 people. # - ```death_rate``` — the country's death rate, or the number of death per year per 1,000 people. # # # ### Install ipython-sql # In[1]: # !conda install -yc conda-forge ipython-sql # ### Connect this notebook to the database # In[2]: get_ipython().run_cell_magic('capture', '', '%load_ext sql\n%sql sqlite:///factbook.db\n') # ## Data Overview # In[3]: get_ipython().run_cell_magic('sql', '', 'SELECT *\n FROM facts\n LIMIT 5;\n') # ## Summary Statistics # In[4]: get_ipython().run_cell_magic('sql', '', 'SELECT MIN(population) AS min_pop,\n MAX(population) AS max_pop,\n MIN(population_growth) AS min_pop_growth,\n MAX(population_growth) max_pop_growth \n FROM facts;\n') # There is one country with a population of zero and one country with over 7.2 billion people. Clearly these are unrealistic so let's take a closer look. # ## Exploring Outliers # In[5]: get_ipython().run_cell_magic('sql', '', 'SELECT name, population\nFROM facts\nWHERE population == (SELECT MIN(population)\n FROM facts\n );\n') # In[6]: get_ipython().run_cell_magic('sql', '', 'SELECT name, population\nFROM facts\nWHERE population == (SELECT MAX(population)\n FROM facts\n );\n') # The database includes a row for the total world population and also includes the continent of Antartica which has no permanant residents. Let's re-calculate our summary statistics without including 'World' and 'Antartica' # In[7]: get_ipython().run_cell_magic('sql', '', "SELECT MIN(population) AS min_pop,\n MAX(population) AS max_pop,\n MIN(population_growth) AS min_pop_growth,\n MAX(population_growth) max_pop_growth \n FROM facts\nWHERE name <> 'Antarctica'\n AND name <> 'World';\n") # In[8]: get_ipython().run_cell_magic('sql', '', "SELECT AVG(population) AS avg_pop,\n AVG(area) AS avg_area \n FROM facts\nWHERE name <> 'Antarctica'\n AND name <> 'World';\n") # The average population per country is 32,377,011 and the average total area is 555,093 sq. km. # ## Population Density # # Let's take a look at this in 2 ways: # # - Countries with above average ```population``` and below average ```area``` #
# - ```population```/```area``` - traditional population density measurement # # In[9]: get_ipython().run_cell_magic('sql', '', "SELECT name\n FROM facts\n WHERE population > (SELECT AVG(population)\n FROM facts\n WHERE name <> 'Antarctica'\n AND name <> 'World'\n )\n AND area < (SELECT AVG(area)\n FROM facts\n WHERE name <> 'Antarctica'\n AND name <> 'World'\n );\n") # In[10]: get_ipython().run_cell_magic('sql', '', ' SELECT name, population/area AS pop_area_ratio\n FROM facts\nORDER BY pop_area_ratio DESC\n LIMIT 20;\n') # In[11]: get_ipython().run_cell_magic('sql', '', ' SELECT name, population\n FROM facts\n WHERE name IN ("Macau", "Monaco", "Singapore", "Hong Kong", "Gaza Strip");\n') # As we can see that many of the countries with the highest populatio density (traditional ```population```/```area``` meansurement) are not on our first list of Countries with above average ```population``` and below average ```area```. # # This is due to the fact that the countries with the highest population density have a total population that is typically less than the worldwide average of 32,377,011. As such Banglasesh is the only country from our first list that is also on the list of the top 20 most densely populated countries. # # In conclusion, the first metric omits most of the densely populated countries and as such isn't a reliable metric for determining population density. # ### China is the most populated country # In[12]: get_ipython().run_cell_magic('sql', '', "SELECT name, population\n FROM facts\n WHERE population = (SELECT MAX(population)\n FROM facts\n WHERE name <> 'Antarctica'\n AND name <> 'World'\n );\n") # ### South Sudan has the highest population growth rate # In[13]: get_ipython().run_cell_magic('sql', '', "SELECT name, population_growth\n FROM facts\n WHERE population_growth = (SELECT MAX(population_growth)\n FROM facts\n WHERE name <> 'Antarctica'\n AND name <> 'World'\n );\n") # ### Which countries have the highest ratios of water to land? # In[14]: get_ipython().run_cell_magic('sql', '', " SELECT name, CAST(area_water AS FLOAT)/area_land AS ratio_water\n FROM facts\n WHERE name <> 'Antarctica'\n AND name <> 'World'\nORDER BY ratio_water DESC\n LIMIT 10;\n") # The British Indian Ocian Territory has an extreme water to land ratio. This is due to the fact that the Territory includes seven atolls and several very small islands. # ### Which countries have more water than land? # In[15]: get_ipython().run_cell_magic('sql', '', "SELECT name, ROUND(CAST(area_water AS FLOAT)/CAST(area AS FLOAT), 3) AS ratio_water\n FROM facts\n WHERE ratio_water > 0.5\n AND name <> 'Antarctica'\n AND name <> 'World';\n\n### Which countries will add the most people to their populations next year? \n") # The British Indian Ocean Territory and the Virgin Islands have more water than land. # ## Which Countries Will Add The Most People to Their Populations Next year? # # To answer this we will take the population and multiply it by the population growth rate. # In[16]: get_ipython().run_cell_magic('sql', '', " SELECT name, population, population_growth, ROUND((population * (population_growth/100)), 0)\n AS pop_increase\n FROM facts\n WHERE name <> 'Antarctica'\n AND name <> 'World'\nORDER BY pop_increase DESC\n LIMIT 10;\n") # India will add the most people to its population next year with an increase ov over 15 million. Dispite China having nearly the same population as India, India will add over twice the amonnt of people than China. This is due to India's growth rate being almost triple than China's. # ### Which countries have a higher death rate than birth rate? # # To answer this we will subtract the ```death_rate``` from the ```birth_rate``` to find the net birth rate. Then we can find which ones are negative (decreasing population). # In[17]: get_ipython().run_cell_magic('sql', '', ' SELECT name, ROUND((birth_rate - death_rate), 2) AS net_growth_rate\n FROM facts\n WHERE death_rate > birth_rate\nORDER BY net_growth_rate;\n') # This list is dominated by Eastern European & Balkan countries