import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%cd ~/minibook/chapter2/
!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/nyc_taxi.zip
!unzip nyc_taxi.zip
%ls data
nyc_data.csv nyc_fare.csv [...]
data_filename = 'data/nyc_data.csv'
fare_filename = 'data/nyc_fare.csv'
data = pd.read_csv(data_filename, parse_dates=['pickup_datetime',
'dropoff_datetime'])
fare = pd.read_csv(fare_filename, parse_dates=['pickup_datetime'])
data.head(3)
data.columns
Index(['medallion', ... 'pickup_datetime', 'dropoff_datetime', 'passenger_count', 'trip_time_in_secs', 'trip_distance', 'pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude'], dtype='object')
p_lng = data.pickup_longitude
p_lat = data.pickup_latitude
d_lng = data.dropoff_longitude
d_lat = data.dropoff_latitude
p_lng
0 -73.955925 1 -74.005501 ... 846943 -73.978477 846944 -73.987206 Name: pickup_longitude, Length: 846945, dtype: float64
def lat_lng_to_pixels(lat, lng):
lat_rad = lat * np.pi / 180.0
lat_rad = np.log(np.tan((lat_rad + np.pi / 2.0) / 2.0))
x = 100 * (lng + 180.0) / 360.0
y = 100 * (lat_rad - np.pi) / (2.0 * np.pi)
return (x, y)
px, py = lat_lng_to_pixels(p_lat, p_lng)
px
0 29.456688 1 29.442916 ... 846943 29.450423 846944 29.447998 Name: pickup_longitude, dtype: float64
plt.scatter(px, py)
plt.figure(figsize=(8, 6))
plt.scatter(px, py, s=.1, alpha=.03)
plt.axis('equal')
plt.xlim(29.40, 29.55)
plt.ylim(-37.63, -37.54)
plt.axis('off')
px.count(), px.min(), px.max()
(846945, 29.417137499999995, 29.714313055555561)
px.mean(), px.median(), px.std()
(29.451345807768575, 29.449418333333337, 0.0097616942794720614)
!conda install seaborn -q -y
import seaborn as sns
sns.__version__
'0.6.0'
data.trip_distance.hist(bins=np.linspace(0., 10., 100))