# To run this notebook as done in the README GIFs, you must first locally download the 2015 NYC Taxi Trip Data.
import urllib.request
url_path = "https://modin-datasets.s3.amazonaws.com/testing/yellow_tripdata_2015-01.csv"
urllib.request.urlretrieve(url_path, "taxi.csv")
from modin.config import Engine
Engine.put("dask")
from dask.distributed import Client
client = Client(n_workers=12)
from modin.config import BenchmarkMode
BenchmarkMode.put(True)
import modin.pandas as pd
%time df = pd.read_csv("taxi.csv", parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"], quoting=3)
CPU times: user 1.57 s, sys: 683 ms, total: 2.26 s Wall time: 14.2 s
%time isnull = df.isnull()
CPU times: user 138 ms, sys: 27.3 ms, total: 166 ms Wall time: 404 ms
%time rounded_trip_distance = df[["pickup_longitude"]].applymap(round)
CPU times: user 175 ms, sys: 28.4 ms, total: 203 ms Wall time: 663 ms