#!/usr/bin/env python # coding: utf-8 # In[178]: import glob import pandas as pd import os # In[179]: allFiles = glob.glob(os.getcwd() + "/*.csv") # In[181]: df_list = [] for files in allFiles: filename = os.path.split(files)[-1].split('.csv')[0] df = pd.read_csv(files, names=["date","station", "impressions"], encoding="utf-8-sig") df['new_index'] = df[['date', 'station']].apply(lambda x: '_'.join(map(str, x)), axis=1) df = df[['new_index','impressions']] df.set_index('new_index', inplace=True) df.rename(columns={'impressions' : filename}, inplace=True) df_list.append(df) # In[182]: df = pd.concat([frame for frame in df_list], axis=1) df = df.reset_index() df['date'] = df['new_index'].apply(lambda x: x.split('_')[0]) df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d") df['station'] = df['new_index'].apply(lambda x: x.split('_')[1]) del df['new_index'] df.sort_values(by=['station', 'date'], inplace=True) # In[183]: heat_df = pd.DataFrame(index=df.station.unique(), columns=list(df.columns[:12])) # In[204]: from datetime import datetime for station in df.station.unique(): for i in xrange(12): pre = df[(df.station == station) & (df.date >= datetime(year=2014, day=2, month=12)) & (df.date <= datetime(year=2015, day=22, month=3))] post = df[(df.station == station) & (df.date >= datetime(year=2015, day=1, month=12)) & (df.date <= datetime(year=2016, day=20, month=3))] pre['delta'] = pre['date'].apply(lambda x: x - datetime(year=2014, day=2, month=12)) post['delta'] = post['date'].apply(lambda x: x - datetime(year=2015, day=1, month=12)) post_join = post.set_index('delta') post_join = post_join[[i]] pre_join = pre.set_index('delta') pre_join = pre_join[[i]] calc = pd.concat([pre_join, post_join], axis=1) calc['t'] =((calc[[1]] - calc[[0]])/((calc[[0]])))*100 heat_df.set_value(station, df.columns[i], calc.t.mean()) # In[283]: import numpy as np import seaborn as sns cm = sns.light_palette("green", as_cmap=True) heat_df['aud'] = 0 df['total_adults'] = df.iloc[:,0] + df.iloc[:,1] for j in heat_df.index: heat_df.set_value(j, 'aud', df[df.station == j]['total_adults'].mean()) with pd.option_context('display.precision', 4): html = heat_df.dropna()[[12,2,4,6,8,10,0,3,5,7,9,11,1]].sort_values(by='aud', ascending=False).style.background_gradient(cmap=cm) # In[284]: html