import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/delhi_climate.csv")
df = df.rename(columns={"meantemp": "mean temperature", "wind_speed": "wind speed"})
df.date = pd.to_datetime(df.date)
df["day"] = df.date.dt.day
df["month"] = df.date.dt.month
df["year"] = df.date.dt.year
df = df[df.year < 2017]
ggplot(df, aes("date", "mean temperature")) + \
geom_line(aes(group="year", color=as_discrete("year")), size=1) + \
scale_x_datetime(breaks=df[df.date.dt.day == 1].date, format="%b %Y") + \
facet_grid(x="year", scales='free') + \
ggtitle("Mean Temperature Along Period Under Review") + \
ggsize(1000, 500) + \
theme(legend_position='bottom')
p1 = ggplot() + \
geom_boxplot(aes(x="year", y="mean temperature", \
fill=as_discrete("year")), \
data=df, size=2, alpha=.5) + \
scale_x_discrete(name="year") + \
ggtitle("Mean Temperature Aggregated") + \
theme(legend_position='bottom', panel_grid='blank')
p2 = ggplot() + \
geom_boxplot(aes(x="month", y="mean temperature", \
fill=as_discrete("year")), \
data=df, size=.75, alpha=.5) + \
scale_x_continuous(breaks=list(range(1, 13))) + \
facet_grid(x="year") + \
ggtitle("Mean Temperature by Month") + \
theme(legend_position='none', panel_grid='blank')
w, h = 1000, 300
bunch = GGBunch()
bunch.add_plot(p1, 0, 0, w, h)
bunch.add_plot(p2, 0, h, w, h)
bunch.show()
ggplot(df, aes("day", "mean temperature")) + \
geom_line(aes(group="year", color=as_discrete("year")), size=2, \
tooltips=layer_tooltips().title("@year")\
.format("@{mean temperature}", ".2f")\
.line("@|@{mean temperature}")\
.line("date|@month/@day/@year")) + \
scale_x_continuous(breaks=list(range(1, 32))) + \
facet_grid(y="month", scales='free') + \
ylab("month") + \
ggtitle("Mean Temperature for Each Month") + \
theme(legend_position='bottom')
ggplot(df, aes(x="mean temperature")) + \
geom_histogram(aes(group="year", fill=as_discrete("year")), \
color='black', bins=15, size=.5, alpha=.5, \
tooltips=layer_tooltips().line("count|@..count..")\
.format("@{mean temperature}", ".2f")\
.line("@|@{mean temperature}")\
.line("@|@month")\
.line("@|@year")) + \
facet_grid(x="month", y="year") + \
xlab("month") + ylab("year") + \
ggtitle("Most Common Temperature") + \
ggsize(1000, 500) + \
theme_classic() + theme(legend_position='bottom')
int_mean_temp_df = df[["mean temperature", "month", "year"]].copy()
int_mean_temp_df["mean temperature"] = int_mean_temp_df["mean temperature"].astype(int)
ggplot(int_mean_temp_df, aes("month", "mean temperature", fill="mean temperature")) + \
geom_bin2d(stat='identity', size=.5, color='white', alpha=.2,
tooltips=layer_tooltips().format("@{mean temperature}", ".2f")\
.line("@|@{mean temperature}")\
.format("@month", "d")
.line("@|@month")\
.title("@year")) + \
scale_x_continuous(breaks=list(range(1, 13))) + \
scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
facet_grid(x="year") + \
coord_fixed(ratio=.5) + \
xlab("") + \
ggtitle("Heatmap of Temperatures by Year") + \
ggsize(1000, 500) + \
theme_classic() + theme(legend_position='bottom')
ggplot(df, aes("wind speed", y="mean temperature")) + \
geom_point(aes(color="mean temperature", fill="mean temperature"), \
shape=21, size=3, alpha=.2) + \
scale_color_gradient(low='#abd9e9', high='#d7191c') + \
scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
facet_grid(x="year") + \
ggtitle("Relation Between Mean Temperature and Wind Speed") + \
ggsize(1000, 500) + \
theme_classic()
ggplot(df, aes("humidity", "mean temperature")) + \
geom_point(aes(color="humidity", fill="humidity"), \
shape=21, size=3, alpha=.2) + \
scale_color_gradient(low='#fdae61', high='#2c7bb6') + \
scale_fill_gradient(low='#fdae61', high='#2c7bb6') + \
facet_grid(x="year") + \
ggtitle("Relation Between Mean Temperature and Humidity") + \
ggsize(1000, 500) + \
theme_classic()
df_shifted_by_day = df[["mean temperature", "year"]].copy()
df_shifted_by_day["shifted mean temperature"] = df["mean temperature"].shift(-1)
df_shifted_by_day = df_shifted_by_day.dropna()
p1 = ggplot(df_shifted_by_day, aes("mean temperature", "shifted mean temperature")) + \
geom_point(aes(color="mean temperature", fill="mean temperature"), \
shape=21, size=3, alpha=.2) + \
scale_color_gradient(low='#abd9e9', high='#d7191c') + \
scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
facet_grid(x="year") + \
coord_fixed(ratio=1) + \
ggtitle("One Day Lag Scatter Plot") + \
theme_classic()
df_shifted_by_month = df[["mean temperature", "year"]].copy()
df_shifted_by_month["shifted mean temperature"] = df["mean temperature"].shift(-30)
df_shifted_by_month = df_shifted_by_month.dropna()
p2 = ggplot(df_shifted_by_month, aes("mean temperature", "shifted mean temperature")) + \
geom_point(aes(color="mean temperature", fill="mean temperature"), \
shape=21, size=3, alpha=.2) + \
scale_color_gradient(low='#abd9e9', high='#d7191c') + \
scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
facet_grid(x="year") + \
coord_fixed(ratio=1) + \
ggtitle("One Month Lag Scatter Plot") + \
theme_classic()
df_shifted_by_year = df[["mean temperature", "year"]].copy()
df_shifted_by_year["shifted mean temperature"] = df["mean temperature"].shift(-365)
df_shifted_by_year = df_shifted_by_year.dropna()[:-1]
p3 = ggplot(df_shifted_by_year, aes("mean temperature", "shifted mean temperature")) + \
geom_point(aes(color="mean temperature", fill="mean temperature"), \
shape=21, size=3, alpha=.2) + \
scale_color_gradient(low='#abd9e9', high='#d7191c') + \
scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
facet_grid(x="year") + \
coord_fixed(ratio=1) + \
ggtitle("One Year Lag Scatter Plot") + \
theme_classic()
w, h = 1000, 300
bunch = GGBunch()
bunch.add_plot(p1, 0, 0, w, h)
bunch.add_plot(p2, 0, h, w, h)
bunch.add_plot(p3, 0, 2 * h, w, h)
bunch.show()
mean_df = df.groupby(by=["year", "month"]).mean(numeric_only=True)[["mean temperature", "humidity"]].reset_index()
ggplot(mean_df, aes("humidity", "mean temperature")) + \
geom_path(color='#99d8c9', size=1) + \
geom_point(aes(fill="month"), shape=21, size=3, color='#00441b',
tooltips=layer_tooltips().title("@year")\
.line("month|@month")\
.format("@humidity", ".2f")\
.line("@|@humidity")\
.format("@{mean temperature}", ".2f")\
.line("mean temperature|@{mean temperature}")) + \
scale_fill_gradient(name="", low='#e5f5f9', high='#2ca25f') + \
facet_grid(x="year") + \
ylab("mean temperature") + \
ggtitle("Annual Path of Mean Temperature and Humidity") + \
ggsize(1000, 500) + \
theme_classic()
acf_df = pd.DataFrame([
(lag, df["mean temperature"].autocorr(lag=lag), df["wind speed"].autocorr(lag=lag), df.humidity.autocorr(lag=lag))
for lag in range(365 * 3)
], columns=["lag", "mean temperature acf", "wind speed acf", "humidity acf"]).melt(
id_vars=["lag"],
value_vars=["mean temperature acf", "wind speed acf", "humidity acf"],
var_name="acf_type", value_name="acf_value"
)
ggplot(acf_df, aes("lag", "acf_value")) + \
geom_point(aes(color="acf_value"), size=3) + \
scale_color_gradient(low='#fc8d59', high='#91cf60') + \
facet_grid(y="acf_type") + \
ylab("ACF value") + \
ggtitle("Autocorrelation Functions") + \
ggsize(1000, 600) + \
theme(legend_position='none')