#!/usr/bin/env python # coding: utf-8 # > This is one of the 100 recipes of the [IPython Cookbook](http://ipython-books.github.io/), the definitive guide to high-performance scientific computing and data science in Python. # # # 4.10. Manipulating large arrays with HDF5 and PyTables # In[ ]: import numpy as np import tables as tb # ## Creating an HDF5 file # Let's create a new empty HDF5 file. # In[ ]: f = tb.open_file('myfile.h5', 'w') # We create a new top-level group named "experiment1". # In[ ]: f.create_group('/', 'experiment1') # Let's also add some metadata to this group. # In[ ]: f.set_node_attr('/experiment1', 'date', '2014-09-01') # In this group, we create a 1000*1000 array named "array1". # In[ ]: x = np.random.rand(1000, 1000) f.create_array('/experiment1', 'array1', x) # Finally, we need to close the file to commit the changes on disk. # In[ ]: f.close() # ## Reading a HDF5 file # In[ ]: f = tb.open_file('myfile.h5', 'r') # We can retrieve an attribute by giving the group path and the attribute name. # In[ ]: f.get_node_attr('/experiment1', 'date') # We can access any item in the file using attributes. IPython's tab completion is incredibly useful in this respect when exploring a file interactively. # In[ ]: y = f.root.experiment1.array1 type(y) # The array can be used as a NumPy array, but an important distinction is that it is stored on disk instead of system memory. Performing a computation on this array triggers a preliminary loading of the array in memory, so that it is more efficient to only access views on this array. # In[ ]: np.array_equal(x[0,:], y[0,:]) # It is also possible to get a node from its absolute path, which is useful when this path is only known at runtime. # In[ ]: f.get_node('/experiment1/array1') # In[ ]: f.close() # Clean-up. # In[ ]: import os os.remove('myfile.h5') # > You'll find all the explanations, figures, references, and much more in the book (to be released later this summer). # # > [IPython Cookbook](http://ipython-books.github.io/), by [Cyrille Rossant](http://cyrille.rossant.net), Packt Publishing, 2014 (500 pages).