#!/usr/bin/env python # coding: utf-8 # ## Analysing Traffic flow indicator # ##### The project is about finding some key indicators to hel us analyse flow of traffic I-94 Interstate Highway, from the given data set. # ##### The goal of this project is to practice skills that we have learnt so far, also this project will be helpful getting more fluent on using using different trechniques # In[1]: # Import Pandas to read file import pandas as pd traffic_flow = pd.read_csv('Metro_Interstate_Traffic_Volume.csv') traffic_flow.info() # In[2]: # importy MatPlotLib to use it's features and adding '%matplotlib inline' to use its features in Jupyter import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') traffic_flow['traffic_volume'].plot.hist() traffic_flow['traffic_volume'].describe() # ##### Comparing Traffic flow day time vs night time # In[3]: traffic_flow['date_time'] = pd.to_datetime(traffic_flow['date_time']) day_time = traffic_flow[(traffic_flow['date_time'].dt.hour >= 7) & (traffic_flow['date_time'].dt.hour < 19)] night_time = traffic_flow[(traffic_flow['date_time'].dt.hour >= 19) & (traffic_flow['date_time'].dt.hour <= 23)] day_time.head() # In[4]: night_time.head() # ##### Plotting Histogram to compare both Night and Day time data # In[5]: plt.figure(figsize = (10, 10)) plt.subplot(1,2,1) plt.hist(day_time['traffic_volume']) plt.title('Day Time Traffic Volume') plt.xlim([0, 7500]) plt.ylim([0, 8000]) plt.subplot(1,2,2) plt.hist(night_time['traffic_volume']) plt.title('Night Time Traffic Volume') plt.xlim([0, 7500]) plt.ylim([0, 8000]) traffic_flow['traffic_volume'].describe() # #### Isolating Data by months for the Day time because we previously saw that there is not much traffic at night time as compare to Day Time so keeping in mind goal of our project we will stick to Day Time. # In[6]: day_time['month'] = day_time['date_time'].dt.month by_month = day_time.groupby('month').mean() by_month['traffic_volume'] # In[7]: plt.plot(by_month['traffic_volume']) plt.show() # #### Isolating data by Week Days # In[8]: day_time['dayofweek'] = day_time['date_time'].dt.dayofweek by_dayofweek = day_time.groupby('dayofweek').mean() by_dayofweek['traffic_volume'] # 0 is Monday, 6 is Sunday # In[9]: plt.plot(by_dayofweek['traffic_volume']) plt.show() # #### Isolating Data by Hour time , Business Days and weekends # In[32]: day_time['hour'] = day_time['date_time'].dt.hour bussiness_days = day_time.copy()[day_time['dayofweek'] <= 4] # 4 == Friday weekend = day_time.copy()[day_time['dayofweek'] >= 5] # 5 == Saturday by_hour_business = bussiness_days.groupby('hour').mean() by_hour_weekend = weekend.groupby('hour').mean() print(by_hour_business['traffic_volume']) print(by_hour_weekend['traffic_volume']) # In[35]: plt.figure(figsize = (8, 3)) plt.subplot(1 , 2, 1) plt.plot(by_hour_business['traffic_volume']) plt.title('Business Days Traffic Volume') plt.xlim([7, 18]) plt.ylim([1500, 6300]) plt.subplot(1 , 2, 2) plt.plot(by_hour_weekend['traffic_volume']) plt.title('WeekEnds Traffic Volume') plt.xlim([7, 18]) plt.ylim([1500, 4500]) # #### Comparing all correlation Values to find higher one # In[62]: corr_temp = day_time['traffic_volume'].corr(day_time['temp']) print("Temp vs Traffic Volume:", corr_temp) #traffic_flow['traffic_volume'].corr(traffic_flow['weather_main']) #traffic_flow['traffic_volume'].corr(traffic_flow['weather_description']) # In[63]: corr_snow = day_time['traffic_volume'].corr(day_time['snow_1h']) print("Snow vs Traffic Volume:", corr_snow) # In[64]: corr_clouds = day_time['traffic_volume'].corr(day_time['clouds_all']) print("Clouds vs Traffic Volume:", corr_clouds) # In[65]: corr_rain = day_time['traffic_volume'].corr(day_time['rain_1h']) print("Rain vs Traffic Volume:", corr_rain) # ##### We compared 4 coumns correlation value and found highest one is with temperature so will will plot a graph between traffic volume and Temperature # In[67]: plt.scatter(x = day_time['temp'], y = day_time['traffic_volume']) plt.title('Traffic Flow vs. Temp') plt.xlabel('Temperature') plt.ylabel('Traffic Volume') plt.show() # In[68]: by_weather_main = day_time.groupby('weather_main').mean() by_weather_description = day_time.groupby('weather_description').mean() # In[69]: by_weather_main['traffic_volume'].plot.barh() # In[75]: by_weather_description['traffic_volume'].plot.barh(figsize = (6, 9)) # ##### Finally, We found that traffic volume during day times is gihy when it light rain or shower snow or its a drizzling weather. # In[ ]: