Higher Dimensional Data

In [ ]:
import pathlib 
from collections import defaultdict

import h5py
import pandas as pd
import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm,Normalize
import matplotlib.cm as cm

from bluesky_tutorial_utils import nexus, fetch
In [ ]:
# Un-comment this for better-looking plots on high-resolution screens.
# %config InlineBackend.figure_format = 'retina'
In [ ]:
fetch.rsoxs_simulation_data();

Working with Multiple xArrays: Gathering Data

Build Index Table (Table of Contents)

In [ ]:
def build_pandas_index(nxs_path):
    nxs_path = pathlib.Path(nxs_path)
    nxs_files = list(nxs_path.glob('*nxs'))
    
    #progress = ipywidgets.IntProgress(0,0,len(nxs_files))
    #display(progress)
    
    index_table = []
    for i,nxs_file in enumerate(nxs_files):
        #progress.value = i
        with h5py.File(nxs_file,'r') as nxs:
            notes = nxs[u'entry/instrument/simulation_engine/notes']
            config =  {k:v[()] for k,v in notes.items()}
            config['nxs'] = nxs_file
            index_table.append(config)
    return pd.DataFrame(index_table)
In [ ]:
toc = build_pandas_index('./rsoxs_simulation_data//')
In [ ]:
toc
In [ ]:
toc.describe().loc[['count','min','max']]

Select subset of data From Index

In [ ]:
sdf = toc.query('Radius==40.0 & EndAngle==360.0 & PhysSize==5 & NumX==512')
sdf = sdf.sort_values('Energy')
sdf.describe().loc[['count','min','max']]

Gather Data

In [ ]:
def gather(df):
    coords = defaultdict(list)
    data_arrays = []
    for row_index,row in df.iterrows():
        da_img = nexus.read_singleimg_nxs(row['nxs'])
        data_arrays.append(da_img)
        
        for col_index,value in row.iteritems():
            if col_index=='nxs':
                continue
            coords[col_index].append(value)
    return data_arrays,coords
In [ ]:
data_arrays,coords = gather(sdf)
In [ ]:
data_arrays
In [ ]:
coords
In [ ]:
data_arrays[1].plot(norm=LogNorm(1e-9,1),aspect=1.2,size=5)

Multiple xArrays: simple xr.concat

In [ ]:
sdf = toc.query('Radius==40.0 & EndAngle==360.0 & PhysSize==5 & NumX==512')
sdf = sdf.sort_values('Energy')
display(sdf.describe().loc[['count','min','max']])

data_arrays,coords = gather(sdf)

da = xr.concat(data_arrays,dim='Energy')
da
In [ ]:
da = da.assign_coords(Energy=sdf.Energy.values)
da
In [ ]:
da.sel(Qy=0,method='nearest').plot(norm=LogNorm(1e-9,1),yscale='log')
In [ ]:
# da.plot(col='Energy',col_wrap=3,norm=LogNorm(1e-9,1))

Building xArrays: Multi-Index

In [ ]:
sdf = toc.query('EndAngle==360.0 & PhysSize==5 & NumX==512')
sdf = sdf.sort_values(['Energy','Radius'])
display(sdf.describe().loc[['count','min','max']])
In [ ]:
data_arrays,coords = gather(sdf)

da = xr.concat(data_arrays,dim=['Energy','Radius'])

hmmm...that didn't work...

Let's try a multi-index

In [ ]:
keys =  ['Energy','Radius']
tuples = [(i,j) for i,j in sdf[keys].values]
index = pd.MultiIndex.from_tuples(tuples,names=keys)
index.name = 'system'
da = xr.concat(data_arrays,dim=index)
da
In [ ]:
da.sel(Energy=285.,method='nearest')
In [ ]:
da.sel(Energy=285.,Qy=0,method='nearest').plot.line(x='Qx',yscale='log',xscale='log')#(norm=LogNorm(1e-9,1))
In [ ]:
da.sel(Energy=285.,method='nearest').plot(col='Radius',col_wrap=3,norm=LogNorm(1e-9,1))