In this analysis I will be looking at the dataset Climate Change: Earth Surface Temperature Data by Berkley Earth. This dataset consists of temperature readings from around the world that go back to at least the 1760's in some areas and up until 2013.
Note: This analysis does not include the 95% confidence interval for temperatures.
This notebook is best viewed with Jupyter nbviewer. The Contents links will not work on Github.
import pandas
import altair as alt
import os
from datetime import date
This dataset consists of five sources. I have downloaded them into a directory named /data/BerkleyEarth
. The datafiles
list below consists of the names of each of the five .csv files.
datafiles = []
for directory, _, files in os.walk('data_sta'):
for file in files:
filename = os.path.join(directory, file)
datafiles.append(filename)
print(datafiles)
['data_sta/GlobalLandTemperaturesByCountry.csv', 'data_sta/GlobalLandTemperaturesByMajorCity.csv', 'data_sta/GlobalLandTemperaturesByState.csv', 'data_sta/GlobalTemperatures.csv', 'data_sta/GlobalLandTemperaturesByCity.csv']
print_datafiles()
- Orderly print each datafile and index within the datafiles
list
def print_datafiles():
for index, filename in enumerate(datafiles):
print(str(index) + ". " + filename)
print_datafiles()
0. data_sta/GlobalLandTemperaturesByCountry.csv 1. data_sta/GlobalLandTemperaturesByMajorCity.csv 2. data_sta/GlobalLandTemperaturesByState.csv 3. data_sta/GlobalTemperatures.csv 4. data_sta/GlobalLandTemperaturesByCity.csv
# Load country data into DataFrame named `country_temps`
country_temps = pandas.read_csv('data_sta/GlobalLandTemperaturesByCountry.csv')
# Drop any rows with a NaN/Empty values for any of the attributes
country_temps = country_temps.dropna()
# Save each unique country name into a set
all_countries = set(country_temps['Country'])
# Display DataFrame
country_temps
dt | AverageTemperature | AverageTemperatureUncertainty | Country | |
---|---|---|---|---|
0 | 1743-11-01 | 4.384 | 2.294 | Åland |
5 | 1744-04-01 | 1.530 | 4.680 | Åland |
6 | 1744-05-01 | 6.702 | 1.789 | Åland |
7 | 1744-06-01 | 11.609 | 1.577 | Åland |
8 | 1744-07-01 | 15.342 | 1.410 | Åland |
... | ... | ... | ... | ... |
577456 | 2013-04-01 | 21.142 | 0.495 | Zimbabwe |
577457 | 2013-05-01 | 19.059 | 1.022 | Zimbabwe |
577458 | 2013-06-01 | 17.613 | 0.473 | Zimbabwe |
577459 | 2013-07-01 | 17.000 | 0.453 | Zimbabwe |
577460 | 2013-08-01 | 19.759 | 0.717 | Zimbabwe |
544811 rows × 4 columns
# Load state data into DataFrame named `state_temps`
state_temps = pandas.read_csv('data_sta/GlobalLandTemperaturesByState.csv')
# Drop any rows with a NaN/Empty value for any of the attributes
state_temps = state_temps.dropna()
# Save each unique state name into a set
all_states = set(state_temps['State'])
# Display DataFrame
state_temps
dt | AverageTemperature | AverageTemperatureUncertainty | State | Country | |
---|---|---|---|---|---|
0 | 1855-05-01 | 25.544 | 1.171 | Acre | Brazil |
1 | 1855-06-01 | 24.228 | 1.103 | Acre | Brazil |
2 | 1855-07-01 | 24.371 | 1.044 | Acre | Brazil |
3 | 1855-08-01 | 25.427 | 1.073 | Acre | Brazil |
4 | 1855-09-01 | 25.675 | 1.014 | Acre | Brazil |
... | ... | ... | ... | ... | ... |
645669 | 2013-04-01 | 15.710 | 0.461 | Zhejiang | China |
645670 | 2013-05-01 | 21.634 | 0.578 | Zhejiang | China |
645671 | 2013-06-01 | 24.679 | 0.596 | Zhejiang | China |
645672 | 2013-07-01 | 29.272 | 1.340 | Zhejiang | China |
645673 | 2013-08-01 | 29.202 | 0.869 | Zhejiang | China |
620027 rows × 5 columns
# Load state data into DataFrame named `major_city_temps`
major_city_temps = pandas.read_csv('data_sta/GlobalLandTemperaturesByMajorCity.csv')
# Drop any rows with a NaN/Empty value for any of the attributes
major_city_temps = major_city_temps.dropna()
# Save each unique major city name into a set
all_major_cities = set(major_city_temps['City'])
# Display DataFrame
major_city_temps
dt | AverageTemperature | AverageTemperatureUncertainty | City | Country | Latitude | Longitude | |
---|---|---|---|---|---|---|---|
0 | 1849-01-01 | 26.704 | 1.435 | Abidjan | Côte D'Ivoire | 5.63N | 3.23W |
1 | 1849-02-01 | 27.434 | 1.362 | Abidjan | Côte D'Ivoire | 5.63N | 3.23W |
2 | 1849-03-01 | 28.101 | 1.612 | Abidjan | Côte D'Ivoire | 5.63N | 3.23W |
3 | 1849-04-01 | 26.140 | 1.387 | Abidjan | Côte D'Ivoire | 5.63N | 3.23W |
4 | 1849-05-01 | 25.427 | 1.200 | Abidjan | Côte D'Ivoire | 5.63N | 3.23W |
... | ... | ... | ... | ... | ... | ... | ... |
239171 | 2013-04-01 | 12.563 | 1.823 | Xian | China | 34.56N | 108.97E |
239172 | 2013-05-01 | 18.979 | 0.807 | Xian | China | 34.56N | 108.97E |
239173 | 2013-06-01 | 23.522 | 0.647 | Xian | China | 34.56N | 108.97E |
239174 | 2013-07-01 | 25.251 | 1.042 | Xian | China | 34.56N | 108.97E |
239175 | 2013-08-01 | 24.528 | 0.840 | Xian | China | 34.56N | 108.97E |
228175 rows × 7 columns
The 'Temperature By City' dataset is much larger than the others and takes significantly more time to load. I have commented out the loading of the dataset to save run time, but left this in for possible future analysis of each city.
# Load state data into DataFrame named `major_city_temps`
# city_temps = pandas.read_csv('data/BerkleyEarth/GlobalLandTemperaturesByCity.csv')
# Drop any rows with a NaN/Empty value for any of the attributes
# city_temps = city_temps.dropna()
# Save each unique major city name into a set
# all_cities = set(city_temps['City'])
# Display DataFrame
# city_temps
yearly_temps(location)
- Returns a DataFrame of average temperatures recorded over each year for given location.
Parameters:
location
- String of location, name of Country, State, or Major Citydataset
- Optional argument to force lookup in specific dataset. For example, New York
appears in state_temps
and major_city_temps
. dataset
can equal one of the following three values (defaults to None
if no dataset specified):'Country'
'State'
'Major City'
examples
new_york_temps = yearly_temps('New York', dataset = 'State')
new_york_city_temps = yearly_temps('New York', dataset = 'Major City')
def get_yearly_temps(location, dataset = None):
# Search location in country dataset
if location in all_countries and (dataset is None or (dataset == 'Country')):
# Filter country_temps by given country
filtered_temps = country_temps[country_temps['Country'] == location]
# Search location in state dataset
elif location in all_states and (dataset is None or (dataset == 'State')):
# Filter state_temps by given state
filtered_temps = state_temps[state_temps['State'] == location]
# Search location in major city dataset
elif location in all_major_cities and (dataset is None or (dataset == 'Major City')):
# Filter major_city_temps by given city
filtered_temps = major_city_temps[major_city_temps['City'] == location]
# Location not found
else:
raise ValueError("Location not found in any loaded dataset")
# Sort recorded temperatures by date
filtered_temps = filtered_temps.sort_values(by = ['dt'])
# Result data to be returned
yearly_temps = {'Year': [], 'AverageTemperature': []}
# Get year from ISO date
first_recorded_year = date.fromisoformat(filtered_temps.iloc[0]['dt']).year
current_year = first_recorded_year
# Sum and total used for averaging
sum_temps = 0.0
total_dates_in_year = 0
for index, row in filtered_temps.iterrows():
# Convert the `dt` value into a Python Date object
current_date = date.fromisoformat(row['dt'])
# If we have switched over into a new year
if current_date.year != current_year:
# Average temperatures over previous year
yearly_avg_temp = sum_temps / total_dates_in_year
# Add to result data
yearly_temps['Year'].append(current_year)
yearly_temps['AverageTemperature'].append(yearly_avg_temp)
# Update current_year
current_year = current_date.year
# Reset sum/total
sum_temps = 0.0
total_dates_in_year = 0
# Increase sum/total
sum_temps += row['AverageTemperature']
total_dates_in_year += 1
# Convert resulting data into DataFrame and return
return pandas.DataFrame(data = yearly_temps)
location = 'United States'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Canada'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Mexico'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'North America'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'North America'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Brazil'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Chile'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Argentina'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Venezuela'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'South America'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'United Kingdom'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'France'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Germany'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Spain'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Sweden'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Europe'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Egypt'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'South Africa'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Kenya'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Uganda'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Africa'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'China'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'India'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Russia'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Japan'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Asia'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Australia'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'New Zealand'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Papua New Guinea'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Oceania'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'New York'
temps = get_yearly_temps(location, dataset = 'Major City')
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Los Angeles'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Chicago'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'London'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Paris'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Illinois'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'California'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Florida'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'Texas'
temps = get_yearly_temps(location)
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)
location = 'New York'
temps = get_yearly_temps(location, dataset = 'State')
# Line plot using Altair
alt.Chart(temps).mark_line().encode(
x = 'Year:O',
y = alt.Y('AverageTemperature', scale=alt.Scale(zero=False))
).properties(
title = 'Yearly Average Temperatures (C) of ' + location,
width = 800
)