Chapter 18: Code listing

Robert Johansson

Source code listings for Numerical Python - A Practical Techniques Approach for Industry (ISBN 978-1-484205-54-9).

The source code listings can be downloaded from http://www.apress.com/9781484205549

Imports

In [1]:
from __future__ import print_function
In [2]:
import numpy as np
np.random.seed(0)
In [3]:
import pandas as pd
In [4]:
import csv
In [5]:
import json
In [6]:
import h5py
In [7]:
import tables
In [8]:
import pickle
import cPickle
In [9]:
import msgpack

CSV

In [10]:
%%writefile playerstats-2013-2014.csv
# 2013-2014 / Regular Season / All Skaters / Summary / Points
Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%
1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5
2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0
3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9
4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5
5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0
Overwriting playerstats-2013-2014.csv
In [11]:
%%writefile playerstats-2013-2014-top30.csv
# 2013-2014 / Regular Season / All Skaters / Summary / Points
Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%
1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5
2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0
3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9
4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5
5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0
6,Phil Kessel,TOR,R,82,37,43,80,-5,27,8,20,0,0,6,0,305,12.1,20:39,24.5,14.3
7,Taylor Hall,EDM,L,75,27,53,80,-15,44,7,17,0,1,1,1,250,10.8,20:00,25.4,45.7
8,Alex Ovechkin,WSH,L,78,51,28,79,-35,48,24,39,0,1,10,3,386,13.2,20:32,21.8,66.7
9,Joe Pavelski,SJS,C,82,41,38,79,+23,32,16,31,1,2,3,0,225,18.2,19:51,27.1,56.0
10,Jamie Benn,DAL,L,81,34,45,79,+21,64,5,19,1,3,3,1,279,12.2,19:09,25.0,52.8
11,Nicklas Backstrom,WSH,C,82,18,61,79,-20,54,6,44,1,1,1,0,196,9.2,19:48,23.3,50.4
12,Patrick Sharp,CHI,L,82,34,44,78,+13,40,10,25,0,0,3,1,313,10.9,18:53,22.7,54.6
13,Joe Thornton,SJS,C,82,11,65,76,+20,32,2,19,0,1,3,1,122,9.0,18:55,26.3,56.1
14,Erik Karlsson,OTT,D,82,20,54,74,-15,36,5,31,0,0,1,0,257,7.8,27:04,28.6,0.0
15,Evgeni Malkin,PIT,C,60,23,49,72,+10,62,7,30,0,0,3,0,191,12.0,20:03,21.4,48.8
16,Patrick Marleau,SJS,L,82,33,37,70,+0,18,11,23,2,2,4,0,285,11.6,20:31,27.3,52.9
17,Anze Kopitar,LAK,C,82,29,41,70,+34,24,10,23,0,0,9,2,200,14.5,20:53,25.4,53.3
18,Matt Duchene,COL,C,71,23,47,70,+8,19,5,17,0,0,6,1,217,10.6,18:29,22.0,50.3
19,Martin St. Louis,"TBL, NYR",R,81,30,39,69,+13,10,9,21,1,2,5,1,204,14.7,20:56,25.7,40.7
20,Patrick Kane,CHI,R,69,29,40,69,+7,22,10,25,0,0,6,0,227,12.8,19:36,22.9,50.0
21,Blake Wheeler,WPG,R,82,28,41,69,+4,63,8,19,0,0,4,2,225,12.4,18:41,24.0,37.5
22,Kyle Okposo,NYI,R,71,27,42,69,-9,51,5,15,0,0,4,1,195,13.8,20:26,22.2,47.5
23,David Krejci,BOS,C,80,19,50,69,+39,28,3,19,0,0,6,1,169,11.2,19:07,21.3,51.2
24,Chris Kunitz,PIT,L,78,35,33,68,+25,66,13,22,0,0,8,0,218,16.1,19:09,22.2,75.0
25,Jonathan Toews,CHI,C,76,28,40,68,+26,34,5,15,3,5,5,0,193,14.5,20:28,25.9,57.2
26,Thomas Vanek,"BUF, NYI, MTL",L,78,27,41,68,+7,46,8,18,0,0,4,0,248,10.9,19:21,21.6,43.5
27,Jaromir Jagr,NJD,R,82,24,43,67,+16,46,5,17,0,0,6,1,231,10.4,19:09,22.8,0.0
28,John Tavares,NYI,C,59,24,42,66,-6,40,8,25,0,0,4,0,188,12.8,21:14,22.3,49.1
29,Jason Spezza,OTT,C,75,23,43,66,-26,46,9,22,0,0,5,0,223,10.3,18:12,23.8,54.0
30,Jordan Eberle,EDM,R,80,28,37,65,-11,18,7,20,1,1,4,1,200,14.0,19:32,25.4,38.1
Overwriting playerstats-2013-2014-top30.csv
In [12]:
!head -n 5 playerstats-2013-2014-top30.csv
# 2013-2014 / Regular Season / All Skaters / Summary / Points
Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%
1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5
2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0
3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9
In [13]:
rows = []
In [14]:
with open("playerstats-2013-2014.csv") as f:
    csvreader = csv.reader(f, )
    print(type(csvreader))
    for fields in csvreader:
        rows.append(fields)
<type '_csv.reader'>
In [15]:
rows[1][1:6]
Out[15]:
['Player', 'Team', 'Pos', 'GP', 'G']
In [16]:
rows[2][1:6]
Out[16]:
['Sidney Crosby', 'PIT', 'C', '80', '36']
In [17]:
data = np.random.randn(100, 3)
In [18]:
np.savetxt("data.csv", data, delimiter=",", header="x, y, z", comments="# Random x, y, z coordinates\n")
In [19]:
!head -n 5 data.csv
# Random x, y, z coordinates
x, y, z
1.764052345967664026e+00,4.001572083672232938e-01,9.787379841057392005e-01
2.240893199201457797e+00,1.867557990149967484e+00,-9.772778798764110153e-01
9.500884175255893682e-01,-1.513572082976978872e-01,-1.032188517935578448e-01
In [20]:
data_load = np.loadtxt("data.csv", skiprows=2, delimiter=",")
In [21]:
data_load[1,:]
Out[21]:
array([ 2.2408932 ,  1.86755799, -0.97727788])
In [22]:
data_load.dtype
Out[22]:
dtype('float64')
In [23]:
(data == data_load).all()
Out[23]:
True
In [24]:
np.loadtxt("playerstats-2013-2014.csv", skiprows=2, delimiter=",")
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-24-ae638a27585f> in <module>()
----> 1 np.loadtxt("playerstats-2013-2014.csv", skiprows=2, delimiter=",")

/Users/rob/miniconda/envs/py27-npm/lib/python2.7/site-packages/numpy/lib/npyio.pyc in loadtxt(fname, dtype, comments, delimiter, converters, skiprows, usecols, unpack, ndmin)
    858 
    859             # Convert each value according to its column and store
--> 860             items = [conv(val) for (conv, val) in zip(converters, vals)]
    861             # Then pack it according to the dtype's nesting
    862             items = pack_items(items, packing)

ValueError: could not convert string to float: Sidney Crosby
In [25]:
data = np.loadtxt("playerstats-2013-2014.csv", skiprows=2, delimiter=",", dtype=bytes)
In [26]:
data[0][1:6]
Out[26]:
array(['Sidney Crosby', 'PIT', 'C', '80', '36'], 
      dtype='|S13')
In [27]:
np.loadtxt("playerstats-2013-2014.csv", skiprows=2, delimiter=",", usecols=[6,7,8])
Out[27]:
array([[  68.,  104.,   18.],
       [  56.,   87.,   28.],
       [  58.,   86.,    7.],
       [  47.,   84.,   16.],
       [  39.,   82.,   32.]])
In [28]:
df = pd.read_csv("playerstats-2013-2014.csv", skiprows=1)
In [29]:
df = df.set_index("Rank")
In [30]:
df[["Player", "GP", "G", "A", "P"]]
Out[30]:
Player GP G A P
Rank
1 Sidney Crosby 80 36 68 104
2 Ryan Getzlaf 77 31 56 87
3 Claude Giroux 82 28 58 86
4 Tyler Seguin 80 37 47 84
5 Corey Perry 81 43 39 82
In [31]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5 entries, 1 to 5
Data columns (total 20 columns):
Player      5 non-null object
Team        5 non-null object
Pos         5 non-null object
GP          5 non-null int64
G           5 non-null int64
A           5 non-null int64
P           5 non-null int64
+/-         5 non-null int64
PIM         5 non-null int64
PPG         5 non-null int64
PPP         5 non-null int64
SHG         5 non-null int64
SHP         5 non-null int64
GW          5 non-null int64
OT          5 non-null int64
S           5 non-null int64
S%          5 non-null float64
TOI/GP      5 non-null object
Shift/GP    5 non-null float64
FO%         5 non-null float64
dtypes: float64(3), int64(13), object(4)
memory usage: 840.0+ bytes
In [32]:
df[["Player", "GP", "G", "A", "P"]].to_csv("playerstats-2013-2014-subset.csv")
In [33]:
!head -n 5 playerstats-2013-2014-subset.csv
Rank,Player,GP,G,A,P
1,Sidney Crosby,80,36,68,104
2,Ryan Getzlaf,77,31,56,87
3,Claude Giroux,82,28,58,86
4,Tyler Seguin,80,37,47,84

HDF5

h5py

In [34]:
import h5py
In [35]:
# mode = "w", "r", "w-", "r+", "a"
In [36]:
f = h5py.File("data.h5", "w")
In [37]:
f.mode
Out[37]:
'r+'
In [38]:
f.flush()
In [39]:
f.close()
In [40]:
f = h5py.File("data.h5", "w")
In [41]:
f.name
Out[41]:
u'/'
In [42]:
grp1 = f.create_group("experiment1")
In [43]:
grp1.name
Out[43]:
u'/experiment1'
In [44]:
grp2_meas = f.create_group("experiment2/measurement")
In [45]:
grp2_meas.name
Out[45]:
u'/experiment2/measurement'
In [46]:
grp2_sim = f.create_group("experiment2/simulation")
In [47]:
grp2_sim.name
Out[47]:
u'/experiment2/simulation'
In [48]:
f["/experiment1"]
Out[48]:
<HDF5 group "/experiment1" (0 members)>
In [49]:
f["/experiment2/simulation"]
Out[49]:
<HDF5 group "/experiment2/simulation" (0 members)>
In [50]:
grp_expr2 = f["/experiment2"]
In [51]:
grp_expr2['simulation']
Out[51]:
<HDF5 group "/experiment2/simulation" (0 members)>
In [52]:
list(f.keys())
Out[52]:
[u'experiment1', u'experiment2']
In [53]:
list(f.items())
Out[53]:
[(u'experiment1', <HDF5 group "/experiment1" (0 members)>),
 (u'experiment2', <HDF5 group "/experiment2" (2 members)>)]
In [54]:
f.visit(lambda x: print(x))
experiment1
experiment2
experiment2/measurement
experiment2/simulation
In [55]:
f.visititems(lambda name, value: print(name, value))
experiment1 <HDF5 group "/experiment1" (0 members)>
experiment2 <HDF5 group "/experiment2" (2 members)>
experiment2/measurement <HDF5 group "/experiment2/measurement" (0 members)>
experiment2/simulation <HDF5 group "/experiment2/simulation" (0 members)>
In [56]:
"experiment1" in f
Out[56]:
True
In [57]:
"simulation" in f["experiment2"]
Out[57]:
True
In [58]:
"experiment3" in f
Out[58]:
False
In [59]:
f.flush()
In [60]:
!h5ls -r data.h5
/                        Group
/experiment1             Group
/experiment2             Group
/experiment2/measurement Group
/experiment2/simulation  Group
In [61]:
data1 = np.arange(10)
In [62]:
data2 = np.random.randn(100, 100)
In [63]:
f["array1"] = data1
In [64]:
f["/experiment2/measurement/meas1"] = data2
In [65]:
f.visititems(lambda name, value: print(name, value))
array1 <HDF5 dataset "array1": shape (10,), type "<i8">
experiment1 <HDF5 group "/experiment1" (0 members)>
experiment2 <HDF5 group "/experiment2" (2 members)>
experiment2/measurement <HDF5 group "/experiment2/measurement" (1 members)>
experiment2/measurement/meas1 <HDF5 dataset "meas1": shape (100, 100), type "<f8">
experiment2/simulation <HDF5 group "/experiment2/simulation" (0 members)>
In [66]:
ds = f["array1"]
In [67]:
ds
Out[67]:
<HDF5 dataset "array1": shape (10,), type "<i8">
In [68]:
ds.name
Out[68]:
u'/array1'
In [69]:
ds.dtype
Out[69]:
dtype('int64')
In [70]:
ds.shape
Out[70]:
(10,)
In [71]:
ds.len()
Out[71]:
10
In [72]:
ds.value
Out[72]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [73]:
ds = f["/experiment2/measurement/meas1"]
In [74]:
ds
Out[74]:
<HDF5 dataset "meas1": shape (100, 100), type "<f8">
In [75]:
ds.dtype
Out[75]:
dtype('float64')
In [76]:
ds.shape
Out[76]:
(100, 100)
In [77]:
data_full = ds[...]
In [78]:
type(data_full)
Out[78]:
numpy.ndarray
In [79]:
data_full.shape
Out[79]:
(100, 100)
In [80]:
data_col = ds[:, 0]
In [81]:
data_col.shape
Out[81]:
(100,)
In [82]:
ds[10:20:3, 10:20:3]
Out[82]:
array([[ 0.60270766, -0.34804638, -0.813596  , -1.29737966],
       [ 0.91320192, -1.06343294,  0.22734595,  0.52759738],
       [ 1.25774422, -0.32775492,  1.4849256 ,  0.28005786],
       [-0.84907287, -0.30000358,  1.79691852, -0.19871506]])
In [83]:
ds[[1,2,3], :].shape
Out[83]:
(3, 100)
In [84]:
ds[[1,2,3], :].shape
Out[84]:
(3, 100)
In [85]:
mask = ds[:, 0] > 2.0
In [86]:
mask.shape, mask.dtype
Out[86]:
((100,), dtype('bool'))
In [87]:
ds[mask, 0]
Out[87]:
array([ 2.04253623,  2.1041854 ,  2.05689385])
In [88]:
ds[mask, :5]
Out[88]:
array([[ 2.04253623, -0.91946118,  0.11467003, -0.1374237 ,  1.36552692],
       [ 2.1041854 ,  0.22725706, -1.1291663 , -0.28133197, -0.7394167 ],
       [ 2.05689385,  0.18041971, -0.06670925, -0.02835398,  0.48480475]])
In [89]:
# create empty data sets, assign and update datasets
In [90]:
ds = f.create_dataset("array2", data=np.random.randint(10, size=10))
In [91]:
ds
Out[91]:
<HDF5 dataset "array2": shape (10,), type "<i8">
In [92]:
ds.value
Out[92]:
array([0, 2, 2, 4, 7, 3, 7, 2, 4, 1])
In [93]:
ds = f.create_dataset("/experiment2/simulation/data1", shape=(5, 5), fillvalue=-1)
In [94]:
ds
Out[94]:
<HDF5 dataset "data1": shape (5, 5), type "<f4">
In [95]:
ds.value
Out[95]:
array([[-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.]], dtype=float32)
In [96]:
ds = f.create_dataset("/experiment1/simulation/data1", shape=(5000, 5000, 5000),
                      fillvalue=0, compression='gzip')
In [97]:
ds
Out[97]:
<HDF5 dataset "data1": shape (5000, 5000, 5000), type "<f4">
In [98]:
ds[:, 0, 0] = np.random.rand(5000)
In [99]:
ds[1, :, 0] += np.random.rand(5000)
In [100]:
ds[:2, :5, 0]
Out[100]:
array([[ 0.69393438,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.4819994 ,  0.01639538,  0.54387355,  0.11130908,  0.99287713]], dtype=float32)
In [101]:
ds.fillvalue
Out[101]:
0.0
In [102]:
f["experiment1"].visititems(lambda name, value: print(name, value))
simulation <HDF5 group "/experiment1/simulation" (1 members)>
simulation/data1 <HDF5 dataset "data1": shape (5000, 5000, 5000), type "<f4">
In [103]:
float(np.prod(ds.shape) * ds[0,0,0].nbytes) / (1024**3)  # Gb
Out[103]:
465.66128730773926
In [104]:
f.flush()
In [105]:
f.filename
Out[105]:
u'data.h5'
In [106]:
!ls -lh data.h5
[email protected] 1 rob  staff   357K Aug  3 23:46 data.h5
In [107]:
del f["/experiment1/simulation/data1"]
In [108]:
f["experiment1"].visititems(lambda name, value: print(name, value))
simulation <HDF5 group "/experiment1/simulation" (0 members)>
In [109]:
f.close()
In [110]:
# attributes
In [111]:
f = h5py.File("data.h5")
In [112]:
f.attrs
Out[112]:
<Attributes of HDF5 object at 4456000960>
In [113]:
f.attrs["desc"] = "Result sets from experiments and simulations"
In [114]:
f["experiment1"].attrs["date"] = "2015-1-1"
In [115]:
f["experiment2"].attrs["date"] = "2015-1-2"
In [116]:
f["experiment2/simulation/data1"].attrs["k"] = 1.5
In [117]:
f["experiment2/simulation/data1"].attrs["T"] = 1000
In [118]:
list(f["experiment1"].attrs.keys())
Out[118]:
[u'date']
In [119]:
list(f["experiment2/simulation/data1"].attrs.items())
Out[119]:
[(u'k', 1.5), (u'T', 1000)]
In [120]:
"T" in f["experiment2/simulation/data1"].attrs
Out[120]:
True
In [121]:
del f["experiment2/simulation/data1"].attrs["T"]
In [122]:
"T" in f["experiment2/simulation/data1"].attrs
Out[122]:
False
In [123]:
f["experiment2/simulation/data1"].attrs["t"] = np.array([1, 2, 3])
In [124]:
f["experiment2/simulation/data1"].attrs["t"]
Out[124]:
array([1, 2, 3])
In [125]:
f.close()

pytables

In [126]:
df = pd.read_csv("playerstats-2013-2014-top30.csv", skiprows=1)
df = df.set_index("Rank")
In [127]:
df[["Player", "Pos", "GP", "P", "G", "A", "S%", "Shift/GP"]].head(5)
Out[127]:
Player Pos GP P G A S% Shift/GP
Rank
1 Sidney Crosby C 80 104 36 68 13.9 24.0
2 Ryan Getzlaf C 77 87 31 56 15.2 25.2
3 Claude Giroux C 82 86 28 58 12.6 25.1
4 Tyler Seguin C 80 84 37 47 12.6 23.4
5 Corey Perry R 81 82 43 39 15.4 23.2
In [128]:
f = tables.open_file("playerstats-2013-2014.h5", mode="w")
In [129]:
grp = f.create_group("/", "season_2013_2014", title="NHL player statistics for the 2013/2014 season")
In [130]:
grp
Out[130]:
/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'
  children := []
In [131]:
f.root
Out[131]:
/ (RootGroup) ''
  children := ['season_2013_2014' (Group)]
In [132]:
class PlayerStat(tables.IsDescription):
    player = tables.StringCol(20, dflt="")
    position = tables.StringCol(1, dflt="C")
    games_played = tables.UInt8Col(dflt=0)
    points = tables.UInt16Col(dflt=0)
    goals = tables.UInt16Col(dflt=0)
    assists = tables.UInt16Col(dflt=0)
    shooting_percentage = tables.Float64Col(dflt=0.0)
    shifts_per_game_played = tables.Float64Col(dflt=0.0) 
In [133]:
top30_table = f.create_table(grp, 'top30', PlayerStat, "Top 30 point leaders")
In [134]:
playerstat = top30_table.row
In [135]:
type(playerstat)
Out[135]:
tables.tableextension.Row
In [136]:
for index, row_series in df.iterrows():
    playerstat["player"] = row_series["Player"]    
    playerstat["position"] = row_series["Pos"]    
    playerstat["games_played"] = row_series["GP"]    
    playerstat["points"] = row_series["P"]    
    playerstat["goals"] = row_series["G"]
    playerstat["assists"] = row_series["A"] 
    playerstat["shooting_percentage"] = row_series["S%"]
    playerstat["shifts_per_game_played"] = row_series["Shift/GP"]
    playerstat.append()
In [137]:
top30_table.flush()
In [138]:
top30_table.cols.player[:5]
Out[138]:
array(['Sidney Crosby', 'Ryan Getzlaf', 'Claude Giroux', 'Tyler Seguin',
       'Corey Perry'], 
      dtype='|S20')
In [139]:
top30_table.cols.points[:5]
Out[139]:
array([104,  87,  86,  84,  82], dtype=uint16)
In [140]:
def print_playerstat(row):
    print("%20s\t%s\t%s\t%s" %
          (row["player"].decode('UTF-8'), row["points"], row["goals"], row["assists"]))
In [141]:
for row in top30_table.iterrows():
    print_playerstat(row)
       Sidney Crosby	104	36	68
        Ryan Getzlaf	87	31	56
       Claude Giroux	86	28	58
        Tyler Seguin	84	37	47
         Corey Perry	82	43	39
         Phil Kessel	80	37	43
         Taylor Hall	80	27	53
       Alex Ovechkin	79	51	28
        Joe Pavelski	79	41	38
          Jamie Benn	79	34	45
   Nicklas Backstrom	79	18	61
       Patrick Sharp	78	34	44
        Joe Thornton	76	11	65
       Erik Karlsson	74	20	54
       Evgeni Malkin	72	23	49
     Patrick Marleau	70	33	37
        Anze Kopitar	70	29	41
        Matt Duchene	70	23	47
    Martin St. Louis	69	30	39
        Patrick Kane	69	29	40
       Blake Wheeler	69	28	41
         Kyle Okposo	69	27	42
        David Krejci	69	19	50
        Chris Kunitz	68	35	33
      Jonathan Toews	68	28	40
        Thomas Vanek	68	27	41
        Jaromir Jagr	67	24	43
        John Tavares	66	24	42
        Jason Spezza	66	23	43
       Jordan Eberle	65	28	37
In [142]:
for row in top30_table.where("(points > 75) & (points <= 80)"):
    print_playerstat(row)
         Phil Kessel	80	37	43
         Taylor Hall	80	27	53
       Alex Ovechkin	79	51	28
        Joe Pavelski	79	41	38
          Jamie Benn	79	34	45
   Nicklas Backstrom	79	18	61
       Patrick Sharp	78	34	44
        Joe Thornton	76	11	65
In [143]:
for row in top30_table.where("(goals > 40) & (points < 80)"):
    print_playerstat(row)
       Alex Ovechkin	79	51	28
        Joe Pavelski	79	41	38
In [144]:
f
Out[144]:
File(filename=playerstats-2013-2014.h5, title='', mode='w', root_uep='/', filters=Filters(complevel=0, shuffle=False, fletcher32=False, least_significant_digit=None))
/ (RootGroup) ''
/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'
/season_2013_2014/top30 (Table(30,)) 'Top 30 point leaders'
  description := {
  "assists": UInt16Col(shape=(), dflt=0, pos=0),
  "games_played": UInt8Col(shape=(), dflt=0, pos=1),
  "goals": UInt16Col(shape=(), dflt=0, pos=2),
  "player": StringCol(itemsize=20, shape=(), dflt='', pos=3),
  "points": UInt16Col(shape=(), dflt=0, pos=4),
  "position": StringCol(itemsize=1, shape=(), dflt='C', pos=5),
  "shifts_per_game_played": Float64Col(shape=(), dflt=0.0, pos=6),
  "shooting_percentage": Float64Col(shape=(), dflt=0.0, pos=7)}
  byteorder := 'little'
  chunkshape := (1489,)
In [145]:
f.flush()
In [146]:
f.close()
In [147]:
!h5ls -rv playerstats-2013-2014.h5
Opened "playerstats-2013-2014.h5" with sec2 driver.
/                        Group
    Attribute: CLASS scalar
        Type:      5-byte null-terminated ASCII string
        Data:  "GROUP"
    Attribute: PYTABLES_FORMAT_VERSION scalar
        Type:      3-byte null-terminated ASCII string
        Data:  "2.1"
    Attribute: TITLE scalar
        Type:      1-byte null-terminated ASCII string
        Data:  ""
    Attribute: VERSION scalar
        Type:      3-byte null-terminated ASCII string
        Data:  "1.0"
    Location:  1:96
    Links:     1
/season_2013_2014        Group
    Attribute: CLASS scalar
        Type:      5-byte null-terminated ASCII string
        Data:  "GROUP"
    Attribute: TITLE scalar
        Type:      46-byte null-terminated ASCII string
        Data:  "NHL player statistics for the 2013/2014 season"
    Attribute: VERSION scalar
        Type:      3-byte null-terminated ASCII string
        Data:  "1.0"
    Location:  1:1032
    Links:     1
/season_2013_2014/top30  Dataset {30/Inf}
    Attribute: CLASS scalar
        Type:      5-byte null-terminated ASCII string
        Data:  "TABLE"
    Attribute: FIELD_0_FILL scalar
        Type:      native unsigned short
        Data:  0
    Attribute: FIELD_0_NAME scalar
        Type:      7-byte null-terminated ASCII string
        Data:  "assists"
    Attribute: FIELD_1_FILL scalar
        Type:      native unsigned char
        Data:  0
    Attribute: FIELD_1_NAME scalar
        Type:      12-byte null-terminated ASCII string
        Data:  "games_played"
    Attribute: FIELD_2_FILL scalar
        Type:      native unsigned short
        Data:  0
    Attribute: FIELD_2_NAME scalar
        Type:      5-byte null-terminated ASCII string
        Data:  "goals"
    Attribute: FIELD_3_FILL scalar
        Type:      1-byte null-terminated ASCII string
        Data:  ""
    Attribute: FIELD_3_NAME scalar
        Type:      6-byte null-terminated ASCII string
        Data:  "player"
    Attribute: FIELD_4_FILL scalar
        Type:      native unsigned short
        Data:  0
    Attribute: FIELD_4_NAME scalar
        Type:      6-byte null-terminated ASCII string
        Data:  "points"
    Attribute: FIELD_5_FILL scalar
        Type:      1-byte null-terminated ASCII string
        Data:  "C"
    Attribute: FIELD_5_NAME scalar
        Type:      8-byte null-terminated ASCII string
        Data:  "position"
    Attribute: FIELD_6_FILL scalar
        Type:      native double
        Data:  0
    Attribute: FIELD_6_NAME scalar
        Type:      22-byte null-terminated ASCII string
        Data:  "shifts_per_game_played"
    Attribute: FIELD_7_FILL scalar
        Type:      native double
        Data:  0
    Attribute: FIELD_7_NAME scalar
        Type:      19-byte null-terminated ASCII string
        Data:  "shooting_percentage"
    Attribute: NROWS scalar
        Type:      native long
        Data:  30
    Attribute: TITLE scalar
        Type:      20-byte null-terminated ASCII string
        Data:  "Top 30 point leaders"
    Attribute: VERSION scalar
        Type:      3-byte null-terminated ASCII string
        Data:  "2.7"
    Location:  1:2272
    Links:     1
    Chunks:    {1489} 65516 bytes
    Storage:   1320 logical bytes, 65516 allocated bytes, 2.01% utilization
    Type:      struct {
                   "assists"          +0    native unsigned short
                   "games_played"     +2    native unsigned char
                   "goals"            +3    native unsigned short
                   "player"           +5    20-byte null-terminated ASCII string
                   "points"           +25   native unsigned short
                   "position"         +27   1-byte null-terminated ASCII string
                   "shifts_per_game_played" +28   native double
                   "shooting_percentage" +36   native double
               } 44 bytes

Pandas hdfstore

In [148]:
import pandas as pd
In [149]:
store = pd.HDFStore('store.h5')
In [150]:
df = pd.DataFrame(np.random.rand(5,5))
In [151]:
store["df1"] = df
In [152]:
df = pd.read_csv("playerstats-2013-2014-top30.csv", skiprows=1)
In [153]:
store["df2"] = df
In [154]:
store.keys()
Out[154]:
['/df1', '/df2']
In [155]:
'df2' in store
Out[155]:
True
In [156]:
df = store["df1"]
In [157]:
store.root
Out[157]:
/ (RootGroup) ''
  children := ['df1' (Group), 'df2' (Group)]
In [158]:
store.close()
In [159]:
f = h5py.File("store.h5")
In [160]:
f.visititems(lambda x, y: print(x, "\t" * int(3 - len(str(x))//8), y))
df1 			 <HDF5 group "/df1" (4 members)>
df1/axis0 		 <HDF5 dataset "axis0": shape (5,), type "<i8">
df1/axis1 		 <HDF5 dataset "axis1": shape (5,), type "<i8">
df1/block0_items 	 <HDF5 dataset "block0_items": shape (5,), type "<i8">
df1/block0_values 	 <HDF5 dataset "block0_values": shape (5, 5), type "<f8">
df2 			 <HDF5 group "/df2" (8 members)>
df2/axis0 		 <HDF5 dataset "axis0": shape (21,), type "|S8">
df2/axis1 		 <HDF5 dataset "axis1": shape (30,), type "<i8">
df2/block0_items 	 <HDF5 dataset "block0_items": shape (3,), type "|S8">
df2/block0_values 	 <HDF5 dataset "block0_values": shape (30, 3), type "<f8">
df2/block1_items 	 <HDF5 dataset "block1_items": shape (14,), type "|S4">
df2/block1_values 	 <HDF5 dataset "block1_values": shape (30, 14), type "<i8">
df2/block2_items 	 <HDF5 dataset "block2_items": shape (4,), type "|S6">
df2/block2_values 	 <HDF5 dataset "block2_values": shape (1,), type "|O8">
In [161]:
f["/df2/block0_items"].value          
Out[161]:
array(['S%', 'Shift/GP', 'FO%'], 
      dtype='|S8')
In [162]:
f["/df2/block0_values"][:3]
Out[162]:
array([[ 13.9,  24. ,  52.5],
       [ 15.2,  25.2,  49. ],
       [ 12.6,  25.1,  52.9]])
In [163]:
f["/df2/block1_items"].value  
Out[163]:
array(['Rank', 'GP', 'G', 'A', 'P', '+/-', 'PIM', 'PPG', 'PPP', 'SHG',
       'SHP', 'GW', 'OT', 'S'], 
      dtype='|S4')
In [164]:
f["/df2/block1_values"][:3, :5]
Out[164]:
array([[  1,  80,  36,  68, 104],
       [  2,  77,  31,  56,  87],
       [  3,  82,  28,  58,  86]])

JSON

In [165]:
data = ["string", 1.0, 2, None]
In [166]:
data_json = json.dumps(data)
In [167]:
data_json
Out[167]:
'["string", 1.0, 2, null]'
In [168]:
data2 = json.loads(data_json)
In [169]:
data
Out[169]:
['string', 1.0, 2, None]
In [170]:
data[0]
Out[170]:
'string'
In [171]:
data = {"one": 1, "two": 2.0, "three": "three"}
In [172]:
data_json = json.dumps(data)
In [173]:
print(data_json)
{"three": "three", "two": 2.0, "one": 1}
In [174]:
data = json.loads(data_json)
In [175]:
data["two"]
Out[175]:
2.0
In [176]:
data["three"]
Out[176]:
u'three'
In [177]:
data = {"one": [1], 
        "two": [1, 2], 
        "three": [1, 2, 3]}
In [178]:
data_json = json.dumps(data, indent=True)
In [179]:
print(data_json)
{
 "three": [
  1, 
  2, 
  3
 ], 
 "two": [
  1, 
  2
 ], 
 "one": [
  1
 ]
}
In [180]:
data = {"one": [1], 
        "two": {"one": 1, "two": 2}, 
        "three": [(1,), (1, 2), (1, 2, 3)],
        "four": "a text string"}
In [181]:
with open("data.json", "w") as f:
    json.dump(data, f)
In [182]:
!cat data.json
{"four": "a text string", "three": [[1], [1, 2], [1, 2, 3]], "two": {"two": 2, "one": 1}, "one": [1]}
In [183]:
with open("data.json", "r") as f:
    data_from_file = json.load(f)
In [184]:
data_from_file["two"]
Out[184]:
{u'one': 1, u'two': 2}
In [185]:
data_from_file["three"]
Out[185]:
[[1], [1, 2], [1, 2, 3]]
In [186]:
!head -n 20 tokyo-metro.json
{
    "C": {
        "color": "#149848", 
        "transfers": [
            [
                "C3", 
                "F15"
            ], 
            [
                "C4", 
                "Z2"
            ], 
            [
                "C4", 
                "G2"
            ], 
            [
                "C7", 
                "M14"
            ], 
In [187]:
!wc tokyo-metro.json
    1471    1508   27638 tokyo-metro.json
In [188]:
with open("tokyo-metro.json", "r") as f:
    data = json.load(f)
In [189]:
data.keys()
Out[189]:
[u'C', u'G', u'F', u'H', u'M', u'N', u'T', u'Y', u'Z']
In [190]:
data["C"].keys()
Out[190]:
[u'color', u'transfers', u'travel_times']
In [191]:
data["C"]["color"]
Out[191]:
u'#149848'
In [192]:
data["C"]["transfers"]
Out[192]:
[[u'C3', u'F15'],
 [u'C4', u'Z2'],
 [u'C4', u'G2'],
 [u'C7', u'M14'],
 [u'C7', u'N6'],
 [u'C7', u'G6'],
 [u'C8', u'M15'],
 [u'C8', u'H6'],
 [u'C9', u'H7'],
 [u'C9', u'Y18'],
 [u'C11', u'T9'],
 [u'C11', u'M18'],
 [u'C11', u'Z8'],
 [u'C12', u'M19'],
 [u'C18', u'H21']]
In [193]:
[(s, e, tt) for s, e, tt in data["C"]["travel_times"] if tt == 1]
Out[193]:
[(u'C3', u'C4', 1), (u'C7', u'C8', 1), (u'C9', u'C10', 1)]
In [194]:
data
Out[194]:
{u'C': {u'color': u'#149848',
  u'transfers': [[u'C3', u'F15'],
   [u'C4', u'Z2'],
   [u'C4', u'G2'],
   [u'C7', u'M14'],
   [u'C7', u'N6'],
   [u'C7', u'G6'],
   [u'C8', u'M15'],
   [u'C8', u'H6'],
   [u'C9', u'H7'],
   [u'C9', u'Y18'],
   [u'C11', u'T9'],
   [u'C11', u'M18'],
   [u'C11', u'Z8'],
   [u'C12', u'M19'],
   [u'C18', u'H21']],
  u'travel_times': [[u'C1', u'C2', 2],
   [u'C2', u'C3', 2],
   [u'C3', u'C4', 1],
   [u'C4', u'C5', 2],
   [u'C5', u'C6', 2],
   [u'C6', u'C7', 2],
   [u'C7', u'C8', 1],
   [u'C8', u'C9', 3],
   [u'C9', u'C10', 1],
   [u'C10', u'C11', 2],
   [u'C11', u'C12', 2],
   [u'C12', u'C13', 2],
   [u'C13', u'C14', 2],
   [u'C14', u'C15', 2],
   [u'C15', u'C16', 2],
   [u'C16', u'C17', 3],
   [u'C17', u'C18', 3],
   [u'C18', u'C19', 3]]},
 u'F': {u'color': u'#b96528',
  u'transfers': [[u'F1', u'Y1'],
   [u'F2', u'Y2'],
   [u'F3', u'Y3'],
   [u'F4', u'Y4'],
   [u'F5', u'Y5'],
   [u'F6', u'Y6'],
   [u'F7', u'Y7'],
   [u'F8', u'Y8'],
   [u'F9', u'Y9'],
   [u'F9', u'M25'],
   [u'F13', u'M9'],
   [u'F15', u'C3'],
   [u'F16', u'Z1'],
   [u'F16', u'G1']],
  u'travel_times': [[u'F1', u'F2', 3],
   [u'F2', u'F3', 2],
   [u'F3', u'F4', 3],
   [u'F4', u'F5', 2],
   [u'F5', u'F6', 2],
   [u'F6', u'F7', 2],
   [u'F7', u'F8', 2],
   [u'F8', u'F9', 2],
   [u'F9', u'F10', 3],
   [u'F10', u'F11', 2],
   [u'F11', u'F12', 2],
   [u'F12', u'F13', 2],
   [u'F13', u'F14', 3],
   [u'F14', u'F15', 2],
   [u'F15', u'F16', 2]]},
 u'G': {u'color': u'#f59230',
  u'transfers': [[u'G1', u'Z1'],
   [u'G1', u'F16'],
   [u'G2', u'Z2'],
   [u'G2', u'C4'],
   [u'G4', u'Z3'],
   [u'G5', u'M13'],
   [u'G5', u'Y16'],
   [u'G5', u'Z4'],
   [u'G5', u'N7'],
   [u'G6', u'N6'],
   [u'G6', u'M14'],
   [u'G6', u'C7'],
   [u'G9', u'M16'],
   [u'G9', u'H8'],
   [u'G11', u'T10'],
   [u'G12', u'Z9'],
   [u'G15', u'H16'],
   [u'G16', u'H17']],
  u'travel_times': [[u'G1', u'G2', 2],
   [u'G2', u'G3', 1],
   [u'G3', u'G4', 2],
   [u'G4', u'G5', 2],
   [u'G5', u'G6', 2],
   [u'G6', u'G7', 2],
   [u'G7', u'G8', 2],
   [u'G8', u'G9', 2],
   [u'G9', u'G10', 1],
   [u'G10', u'G11', 2],
   [u'G11', u'G12', 2],
   [u'G12', u'G13', 1],
   [u'G13', u'G14', 2],
   [u'G14', u'G15', 2],
   [u'G15', u'G16', 1],
   [u'G16', u'G17', 2],
   [u'G17', u'G18', 1],
   [u'G18', u'G19', 2]]},
 u'H': {u'color': u'#9cacb5',
  u'transfers': [[u'H6', u'M15'],
   [u'H6', u'C8'],
   [u'H7', u'Y18'],
   [u'H7', u'C9'],
   [u'H8', u'M16'],
   [u'H8', u'G9'],
   [u'H12', u'T11'],
   [u'H16', u'G15'],
   [u'H17', u'G16'],
   [u'H21', u'C18']],
  u'travel_times': [[u'H1', u'H2', 3],
   [u'H2', u'H3', 3],
   [u'H3', u'H4', 3],
   [u'H4', u'H5', 3],
   [u'H5', u'H6', 2],
   [u'H6', u'H7', 3],
   [u'H7', u'H8', 1],
   [u'H8', u'H9', 2],
   [u'H9', u'H10', 2],
   [u'H10', u'H11', 2],
   [u'H11', u'H12', 1],
   [u'H12', u'H13', 3],
   [u'H13', u'H14', 1],
   [u'H14', u'H15', 2],
   [u'H15', u'H16', 2],
   [u'H16', u'H17', 1],
   [u'H17', u'H18', 2],
   [u'H18', u'H19', 2],
   [u'H19', u'H20', 2],
   [u'H20', u'H21', 3]]},
 u'M': {u'color': u'#ff0000',
  u'transfers': [[u'M9', u'F13'],
   [u'M12', u'N8'],
   [u'M13', u'G5'],
   [u'M13', u'Y16'],
   [u'M13', u'Z4'],
   [u'M13', u'N7'],
   [u'M14', u'C7'],
   [u'M14', u'G6'],
   [u'M14', u'N6'],
   [u'M15', u'H6'],
   [u'M15', u'C8'],
   [u'M16', u'G9'],
   [u'M16', u'H8'],
   [u'M18', u'T9'],
   [u'M18', u'C11'],
   [u'M18', u'Z8'],
   [u'M19', u'C12'],
   [u'M22', u'N11'],
   [u'M25', u'Y9'],
   [u'M25', u'F9']],
  u'travel_times': [[u'M1', u'M2', 2],
   [u'M2', u'M3', 2],
   [u'M3', u'M4', 2],
   [u'M4', u'M5', 2],
   [u'M5', u'M6', 2],
   [u'M6', u'M7', 2],
   [u'M7', u'M8', 2],
   [u'M8', u'M9', 2],
   [u'M9', u'M10', 1],
   [u'M10', u'M11', 2],
   [u'M11', u'M12', 2],
   [u'M12', u'M13', 3],
   [u'M13', u'M14', 2],
   [u'M14', u'M15', 1],
   [u'M15', u'M16', 3],
   [u'M16', u'M17', 2],
   [u'M17', u'M18', 2],
   [u'M18', u'M19', 2],
   [u'M19', u'M20', 1],
   [u'M20', u'M21', 2],
   [u'M21', u'M22', 2],
   [u'M22', u'M23', 3],
   [u'M23', u'M24', 2],
   [u'M24', u'M25', 3],
   [u'm3', u'm4', 2],
   [u'm4', u'm5', 2],
   [u'm5', u'M6', 2]]},
 u'N': {u'color': u'#1aaca9',
  u'transfers': [[u'N1', u'T1'],
   [u'N2', u'T2'],
   [u'N3', u'T3'],
   [u'N6', u'G6'],
   [u'N6', u'M14'],
   [u'N6', u'C7'],
   [u'N7', u'Y16'],
   [u'N7', u'Z4'],
   [u'N7', u'G5'],
   [u'N7', u'M13'],
   [u'N8', u'M12'],
   [u'N9', u'Y14'],
   [u'N10', u'Y13'],
   [u'N10', u'T6'],
   [u'N11', u'M22']],
  u'travel_times': [[u'N1', u'N2', 2],
   [u'N2', u'N3', 2],
   [u'N3', u'N4', 2],
   [u'N4', u'N5', 2],
   [u'N5', u'N6', 2],
   [u'N6', u'N7', 2],
   [u'N7', u'N8', 2],
   [u'N8', u'N9', 2],
   [u'N9', u'N10', 2],
   [u'N10', u'N11', 2],
   [u'N11', u'N12', 3],
   [u'N12', u'N13', 2],
   [u'N13', u'N14', 2],
   [u'N14', u'N15', 3],
   [u'N15', u'N16', 1],
   [u'N16', u'N17', 3],
   [u'N17', u'N18', 2],
   [u'N18', u'N19', 2]]},
 u'T': {u'color': u'#1aa7d8',
  u'transfers': [[u'T6', u'N10'],
   [u'T6', u'Y13'],
   [u'T7', u'Z6'],
   [u'T9', u'M18'],
   [u'T9', u'C11'],
   [u'T9', u'Z8'],
   [u'T10', u'G11'],
   [u'T11', u'H12']],
  u'travel_times': [[u'T1', u'T2', 0],
   [u'T2', u'T3', 3],
   [u'T3', u'T4', 6],
   [u'T4', u'T5', 9],
   [u'T5', u'T6', 11],
   [u'T6', u'T7', 13],
   [u'T7', u'T8', 14],
   [u'T8', u'T9', 16],
   [u'T9', u'T10', 18],
   [u'T10', u'T11', 20],
   [u'T11', u'T12', 21],
   [u'T12', u'T13', 24],
   [u'T13', u'T14', 26],
   [u'T14', u'T15', 27],
   [u'T15', u'T16', 30],
   [u'T16', u'T17', 33],
   [u'T17', u'T18', 35],
   [u'T18', u'T19', 37],
   [u'T19', u'T20', 39],
   [u'T20', u'T21', 41],
   [u'T21', u'T22', 43],
   [u'T22', u'T23', 46],
   [u'T23', u'T24', 49]]},
 u'Y': {u'color': u'#ede7c3',
  u'transfers': [[u'Y1', u'F1'],
   [u'Y2', u'F2'],
   [u'Y3', u'F3'],
   [u'Y4', u'F4'],
   [u'Y5', u'F5'],
   [u'Y6', u'F6'],
   [u'Y7', u'F7'],
   [u'Y8', u'F8'],
   [u'Y9', u'F9'],
   [u'Y9', u'M25'],
   [u'Y13', u'T6'],
   [u'Y13', u'N10'],
   [u'Y14', u'N9'],
   [u'Y16', u'Z4'],
   [u'Y16', u'N7'],
   [u'Y16', u'G5'],
   [u'Y16', u'M13'],
   [u'Y18', u'H7'],
   [u'Y18', u'C9']],
  u'travel_times': [[u'Y1', u'Y2', 4],
   [u'Y2', u'Y3', 2],
   [u'Y3', u'Y4', 3],
   [u'Y4', u'Y5', 2],
   [u'Y5', u'Y6', 2],
   [u'Y6', u'Y7', 2],
   [u'Y7', u'Y8', 2],
   [u'Y8', u'Y9', 3],
   [u'Y9', u'Y10', 2],
   [u'Y10', u'Y11', 2],
   [u'Y11', u'Y12', 2],
   [u'Y12', u'Y13', 3],
   [u'Y13', u'Y14', 2],
   [u'Y14', u'Y15', 2],
   [u'Y15', u'Y16', 1],
   [u'Y16', u'Y17', 2],
   [u'Y17', u'Y18', 2],
   [u'Y18', u'Y19', 2],
   [u'Y19', u'Y20', 2],
   [u'Y20', u'Y21', 2],
   [u'Y21', u'Y22', 2],
   [u'Y22', u'Y23', 3],
   [u'Y23', u'Y24', 2]]},
 u'Z': {u'color': u'#a384bf',
  u'transfers': [[u'Z1', u'F16'],
   [u'Z1', u'G1'],
   [u'Z2', u'C4'],
   [u'Z2', u'G2'],
   [u'Z3', u'G4'],
   [u'Z4', u'Y16'],
   [u'Z4', u'N7'],
   [u'Z4', u'M13'],
   [u'Z4', u'G5'],
   [u'Z6', u'T7'],
   [u'Z8', u'M18'],
   [u'Z8', u'C11'],
   [u'Z8', u'T9'],
   [u'Z9', u'G12']],
  u'travel_times': [[u'Z1', u'Z2', 3],
   [u'Z2', u'Z3', 2],
   [u'Z3', u'Z4', 2],
   [u'Z4', u'Z5', 2],
   [u'Z5', u'Z6', 2],
   [u'Z6', u'Z7', 2],
   [u'Z7', u'Z8', 2],
   [u'Z8', u'Z9', 2],
   [u'Z9', u'Z10', 3],
   [u'Z10', u'Z11', 3],
   [u'Z11', u'Z12', 3],
   [u'Z12', u'Z13', 2],
   [u'Z13', u'Z14', 2]]}}
In [195]:
!ls -lh tokyo-metro.json
[email protected] 1 rob  staff    27K Jul 20 00:01 tokyo-metro.json
In [196]:
data_pack = msgpack.packb(data)
In [197]:
del data
In [198]:
type(data_pack)
Out[198]:
str
In [199]:
len(data_pack)
Out[199]:
3021
In [200]:
with open("tokyo-metro.msgpack", "wb") as f:
    f.write(data_pack)
In [201]:
!ls -lh tokyo-metro.msgpack
[email protected] 1 rob  staff   3.0K Aug  3 23:46 tokyo-metro.msgpack
In [202]:
with open("tokyo-metro.msgpack", "rb") as f:
    data_msgpack = f.read()
    data = msgpack.unpackb(data_msgpack)
In [203]:
list(data.keys())
Out[203]:
['C', 'G', 'F', 'H', 'M', 'N', 'T', 'Y', 'Z']
In [204]:
with open("tokyo-metro.pickle", "wb") as f:
    cPickle.dump(data, f)
In [205]:
del data
In [206]:
!ls -lh tokyo-metro.pickle
[email protected] 1 rob  staff    11K Aug  3 23:46 tokyo-metro.pickle
In [207]:
with open("tokyo-metro.pickle", "rb") as f:
    data = pickle.load(f)
In [208]:
data.keys()
Out[208]:
['C', 'G', 'F', 'H', 'M', 'N', 'T', 'Y', 'Z']

Versions

In [209]:
%reload_ext version_information
In [210]:
%version_information numpy, pandas, csv, json, tables, h5py, msgpack
Out[210]:
SoftwareVersion
Python2.7.10 64bit [GCC 4.2.1 (Apple Inc. build 5577)]
IPython3.2.1
OSDarwin 14.1.0 x86_64 i386 64bit
numpy1.9.2
pandas0.16.2
csv1.0
json2.0.9
tables3.2.0
h5py2.5.0
msgpackThe 'msgpack' distribution was not found and is required by the application