from mpes import fprocessing as fp
# from imp import reload
# reload(fp)
fpath = r'../data/data_20180605_131.h5'
HDF5 files can be read using a few different classes operating on different levels. The hierarchy meaningful to the end user is in the following (from low to high),
The hierarchy goes File $\in$ hdf5Reader $\in$ (hdf5Splitter, hdf5Processor)
hdff = fp.File(fpath)
hdff
<HDF5 file "data_20180605_131.h5" (mode r+)>
hdfr = fp.hdf5Reader(fpath)
hdfr
<HDF5 file "data_20180605_131.h5" (mode r+)>
New attributes and methods in the hdf5Reader() class
print( list(set(dir(hdfr)) - set(dir(hdff))) )
['groupAliases', 'readGroup', 'nameLookupDict', 'CHUNK_SIZE', 'attributeNames', 'convert', '_assembleGroups', 'summarize', 'name2alias', 'faddress', 'readAttribute', 'getAttributeNames', 'getGroupNames', 'ncores', 'nEvents', 'groupNames']
hdfp = fp.hdf5Processor(fpath)
hdfp
<HDF5 file "data_20180605_131.h5" (mode r+)>
New attributes and methods in the hdf5Processer() class
print( list(set(dir(hdfp)) - set(dir(hdfr))) )
['saveHistogram', 'toSplitter', 'toBandStructure', 'histdict', 'localBinning', '_addBinners', 'viewEventHistogram', 'loadMapping', 'hdfdict', 'distributedBinning', 'saveParameters', '_delayedBinning', 'ua', 'axesdict', 'updateHistogram', 'distributedProcessBinning']
Reading components can also be done at different levels, the level of hdf5Reader() or above is recommended.
hdfp.summarize()
*** HDF5 file info *** File address = /scratch/metis_storage/data_20180605_131.h5 >>> Attributes <<< CAClientMajorVersion = 0 CAClientMinorVersion = 1 CompileTimeStamp = Wed Jun 13 15:31:04 2018 KTOF:Lens:A:VSet = 514.63 KTOF:Lens:B:VSet = 2199.8 KTOF:Lens:C:VSet = 76.402 KTOF:Lens:D:VSet = 261.24 KTOF:Lens:E:VSet = 558.98 KTOF:Lens:Extr:VSet = 6000.0 KTOF:Lens:F:VSet = 48.904 KTOF:Lens:Foc:VSet = 167.0 KTOF:Lens:G:VSet = 20.1 KTOF:Lens:H:VSet = 35.0 KTOF:Lens:I:VSet = 42.25 KTOF:Lens:MCPback:VSet = 1825.0 KTOF:Lens:MCPfront:VSet = 20.0 KTOF:Lens:TOF:VSet = 20.0 KTOF:Lens:UCA:VSet = 1200.0 KTOF:Lens:UFA:VSet = 600.0 KTOF:Lens:Z1:VSet = 2452.9 KTOF:Lens:Z2:VSet = 1489.9 >>> Groups <<< EventFormat, Shape = (64,), Alias = None Stream_0, Shape = (27296214,), Alias = X Stream_1, Shape = (27296214,), Alias = Y Stream_2, Shape = (27296214,), Alias = t Stream_3, Shape = (27296214,), Alias = MasterRstCtr Stream_4, Shape = (27296214,), Alias = ADC Stream_5, Shape = (27296214,), Alias = State Input
print(list(hdfr.readGroup(hdfr, 'EventFormat')))
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 0, 64, 0, 64, 0, 64, 0, 53, 11, 42, 11, 16, 26, 15, 1, 15, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Conversion of hdf5 to Matlab (mat) format (no data processing).
hdfr.convert('mat', save_addr='../data/data_131')
Conversion to parquet format
hdfr.convert('parquet', save_addr='../data/data_131_parquet', pq_append=False, chunksz=1e7, \
compression='gzip')
hdfs = fp.hdf5Splitter(fpath)
hdfs.split(nsplit=50, save_addr=r'../data/data_114_parts/data_114_', pbar=True)
Read binned data over 3 axes
fpath_binned = r'../data/binres_114.h5'
bindict = fp.readBinnedhdf5(fpath_binned, combined=True)
bindict.keys()
Read binned data over 4 axes
fpath_binned = r'../data/data_114_4axis_binned.h5'
bindict = fp.readBinnedhdf5(fpath_binned, combined=True)
bindict.keys()
dict_keys(['ADC', 'X', 'Y', 't', 'V'])
bindict = fp.readBinnedhdf5(fpath_binned, combined=False)
bindict.keys()
dict_keys(['ADC', 'X', 'Y', 't', 'V0', 'V1', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V3', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V4', 'V40', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V5', 'V6', 'V7', 'V8', 'V9'])