import pandas as pd
df = pd.read_csv('/Users/chengjun/bigdata/soda-2017-sample/2017/Mobike_SODA_Sample/shanghai_sample.csv')
df.head()
orderid | bikeid | userid | start_time | start_location_x | start_location_y | end_time | end_location_x | end_location_y | track | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 57099 | 4717 | 4717 | 2016-08-02 17:23:43 | 121.410 | 31.306 | 2016-08-02 17:31:08 | 121.402 | 31.311 | 121.402,31.310#121.402,31.311#121.403,31.309#1... |
1 | 165262 | 413 | 413 | 2016-08-04 20:17:09 | 121.407 | 31.288 | 2016-08-04 20:19:58 | 121.408 | 31.291 | 121.407,31.288#121.408,31.289#121.408,31.290#1... |
2 | 8023 | 3993 | 3993 | 2016-08-02 08:16:52 | 121.462 | 31.312 | 2016-08-02 08:25:10 | 121.449 | 31.305 | 121.449,31.305#121.450,31.305#121.451,31.305#1... |
3 | 148871 | 1340 | 1340 | 2016-08-05 18:37:20 | 121.531 | 31.313 | 2016-08-05 18:45:19 | 121.519 | 31.308 | 121.519,31.308#121.519,31.309#121.520,31.309#1... |
4 | 10222 | 4174 | 4174 | 2016-08-06 11:04:50 | 121.401 | 31.232 | 2016-08-06 11:21:04 | 121.376 | 31.237 | 121.374,31.234#121.375,31.233#121.375,31.235#1... |
len(df)
101259
df["userid"].value_counts()
5054 37 6913 36 101 35 1718 35 726 35 5290 35 6590 33 3627 33 6354 33 1011 32 5235 32 2439 32 3651 32 197 31 2899 31 5447 31 5928 31 4591 31 5858 31 5663 31 6131 31 4518 31 20 30 4550 30 1491 30 2483 30 6337 30 4714 30 2869 30 6239 30 .. 7627 1 9137 1 800 1 9073 1 8868 1 7947 1 3913 1 8075 1 2676 1 8831 1 8612 1 8867 1 8548 1 8973 1 9008 1 2366 1 8305 1 5259 1 1289 1 7836 1 3046 1 1439 1 8462 1 2379 1 7268 1 8590 1 714 1 9038 1 9166 1 9056 1 Name: userid, dtype: int64
starts = []
ends = []
edges = []
for i in df.index:
starts.append((df.start_location_y[i], df.start_location_x[i]))
ends.append((df.end_location_y[i], df.end_location_x[i]))
edges.append([(df.start_location_y[i], df.start_location_x[i]), (df.end_location_y[i], df.end_location_x[i])])
df5054 = df[df['userid']==20]
edges5054 = []
for i in df5054.index:
edges5054.append([(df5054.start_location_y[i], df5054.start_location_x[i]), \
(df5054.end_location_y[i], df5054.end_location_x[i])])
starts[:5]
[(31.305999999999997, 121.41), (31.288, 121.40700000000001), (31.311999999999998, 121.462), (31.313000000000002, 121.53100000000001), (31.231999999999999, 121.40100000000001)]
import folium, jinja2, vincent
from IPython.display import IFrame
from IPython.core.display import HTML
print(folium.__file__)
print(folium.__version__)
/Users/chengjun/anaconda/lib/python2.7/site-packages/folium/__init__.pyc 0.2.0
shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)
shanghai
from folium import plugins
shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)
shanghai.add_children(plugins.HeatMap(starts[:1000]))
shanghai
shanghai = folium.Map(location=(31.306,121.410), zoom_start=10)
for loc in edges[:1000]:
line = folium.PolyLine(locations=loc)
shanghai.add_children(line)
shanghai
shanghai5054 = folium.Map(location=(31.306,121.410), zoom_start=10)
for loc in edges5054:
line = folium.PolyLine(locations=loc)
shanghai5054.add_children(line)
shanghai5054