Li Li, Qiang Zheng, Qiang Zou, Shivani Rajput, Anota Ijaduola, Zhiming Wu, Xiaoping Wang, Huibo Cao, Suhas Somnath, Stephen Jesse, Miaofang Chi, Zheng Gai, David Parker, and Athena Sefat
Scientific Reports
Notebook written by: Suhas Somnath
4/9/2017
This Jupyter notebook uses pycroscopy to analyze Band Excitation data. We request you to reference the following papers if you use this notebook for your research:
This is a Jupyter Notebook - it contains text and executable code cells
. To learn more about how to use it, please see this video. Please see the image below for some basic tips on using this notebook.
Image courtesy of Jean Bilheux from the neutron imaging GitHub repository.
Note: This notebook was written for the pycroscopy version listed below and is not guaranteed to work on past or future versions of the package
pycroscopy
version: 0.0a36
If you have a different version of pycroscopy
installed, you may consider using the notebook as is and accept the possibility of errors. The cell below will attempt to install the correct versions of the packages. However, if you experience trouble, uninstall the existing version of pycroscopy and install the required version above by executing the following commands in a terminal (Linux / MacOS) / Anaconda prompt (Windows):
pip uninstall pycroscopy
pip install -I pycroscopy==0.0a36
import sys
!conda install --yes --prefix {sys.prefix} numpy scipy matplotlib scikit-learn Ipython ipywidgets h5py
!pip install pycroscopy==0.0a36
# Ensure python 3 compatibility:
from __future__ import division, print_function, absolute_import
from os import path
import numpy as np
import h5py
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.signal import medfilt
from scipy.ndimage.filters import gaussian_filter
from sklearn.utils.extmath import randomized_svd
from sklearn.cluster import KMeans
import pycroscopy as px
from pycroscopy.io.translators.omicron_asc import AscTranslator
# set up notebook to show plots within the notebook
% matplotlib inline
#%% Load file
file_path = r"E:\Shivani\STS_Au-Ba122 crystal\I(V) TraceUp Tue Sep 20 09_17_08 2016 [14-1] STM_Spectroscopy STM.asc"
folder_path, file_name = path.split(file_path)
file_name = file_name[:-4] + '_'
tran = AscTranslator()
h5_path = tran.translate(file_path)
hdf = px.ioHDF5(h5_path)
h5_main = px.hdf_utils.getDataSet(hdf.file, 'Raw_Data')[-1]
x_label = 'Bias (V)'
y_label = 'Current (a.u.)'
print('Datasets and datagroups within the file:\n------------------------------------')
px.hdf_utils.print_tree(h5_main.file)
print('\nThe main dataset:\n------------------------------------')
print(h5_main)
print('\nThe ancillary datasets:\n------------------------------------')
print(hdf.file['/Measurement_000/Channel_000/Position_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Position_Values'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Values'])
print('\nMetadata or attributes in a datagroup\n------------------------------------')
for key in hdf.file['/Measurement_000'].attrs:
print('{} : {}'.format(key, hdf.file['/Measurement_000'].attrs[key]))
# Read some basic parameters
h5_meas_grp = hdf.file['/Measurement_000']
num_rows = int(h5_meas_grp.attrs['y-pixels'])
num_cols = int(h5_meas_grp.attrs['x-pixels'])
num_pos = num_rows * num_cols
spectra_length = int(h5_meas_grp.attrs['z-points'])
volt_vec = px.hdf_utils.getAuxData(h5_main, auxDataName='Spectroscopic_Values')[-1][0]
raw_data_3d = np.reshape(h5_main[()], (num_rows, num_cols, spectra_length))
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('slice of data - use this for cropping the data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Response at a single pixel')
fig.tight_layout()
#%% User supplied information for cropping data:
start_row = 30 # 0 is the first row
end_row = num_rows # num_rows is the last row
start_col = 0
end_col = num_cols # num_cols is the last column
volt_chop = 0.3
# now crop the data according the specifications above:
volt_index = np.where(np.logical_and(volt_vec >= -volt_chop, volt_vec <= volt_chop))[0]
volt_vec = volt_vec[volt_index]
raw_data_3d = raw_data_3d[start_row:end_row, start_col:end_col, volt_index]
num_rows, num_cols, spectra_length = raw_data_3d.shape
raw_data_2d = raw_data_3d.reshape(-1, spectra_length)
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('Cropped data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Cropped response at a single pixel')
fig.tight_layout()
#%% Do k-means to identify the principal response types and where (spatially) the occur in the data
num_clusters = 16 # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(raw_data_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (raw_data_3d.shape[0], raw_data_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_V_' + str(num_clusters) + '_clusters.png'),
format='png', dpi=300)
# throw out bad points using a threshold. Works better than finding outliers using K-means
current_threshold = 0.2
bad_pixels = np.where(np.max(np.abs(raw_data_2d), axis=1) > current_threshold)[0]
# find the pixels that are good:
good_pixels = [item for item in np.arange(raw_data_2d.shape[0]) if item not in bad_pixels]
# find the average of all the good pixels
good_resp = np.mean(raw_data_2d[good_pixels], axis=0)
fig, axis = plt.subplots(ncols=2, figsize=(10, 5))
axis[0].set_title('Bad pixels')
px.plot_utils.plot_line_family(axis[0], volt_vec, raw_data_2d[bad_pixels], label_prefix='Cluster')
axis[1].set_title('Mean of all other pixels')
axis[1].plot(volt_vec, good_resp)
fig.savefig(path.join(folder_path, file_name + '_outliers_removal.png'), format='png', dpi=300)
# now replace bad pixels by the mean of the good pixels
outlier_free_2d = np.copy(raw_data_2d)
outlier_free_2d[bad_pixels] = good_resp
outlier_free_3d = np.reshape(outlier_free_2d, raw_data_3d.shape)
estimators = KMeans(num_clusters)
results = estimators.fit(outlier_free_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (outlier_free_3d.shape[0], outlier_free_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'After_removing_outliers_K_means_V_' + str(num_clusters) + '_clusters.png'),
format='png', dpi=300)
num_comps = 256
U, S, V = randomized_svd(outlier_free_2d, num_comps, n_iter=3)
fig_V, axes_V = px.plot_utils.plot_loops(volt_vec, V, title='SVD Eigenvectors',
evenly_spaced=False, plots_on_side=4, use_rainbow_plots=False,
x_label=x_label, y_label=y_label, subtitles='Eigenvector')
fig_V.savefig(path.join(folder_path, file_name + 'SVD_Eigenvectors.png'), format='png', dpi=200)
loadings = np.reshape(U, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
fig_U, axes_U = px.plot_utils.plot_map_stack(loadings, num_comps=16, cmap=px.plot_utils.cmap_jet_white_center())
fig_U.savefig(path.join(folder_path, file_name + 'SVD_Eigenvalues.png'), format='png', dpi=150)
fig_S, axes_S = px.plot_utils.plotScree(S)
fig_S.savefig(path.join(folder_path, file_name + 'SVD_S.png'), format='png', dpi=300)
#%% SVD filtering - Reconstructing the original data with a subset of the SVD components to remove noise present in the senior components
# Note that this method of filtering does NOT downsample the data
filter_comps = 32
filtered_2d = np.dot(np.dot(U[:, :filter_comps], np.eye(filter_comps)*S[:filter_comps]), V[:filter_comps, :])
filtered_3d = np.reshape(filtered_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
# Compare the raw and filtered data
fig_filt, axes = px.plot_utils.plot_loops(volt_vec, [outlier_free_2d, filtered_2d], line_colors=['k', 'r'],
dataset_names=['Raw','SVD Filtered']
y_label='Current (a.u.)', title='SVD filtering')
fig_filt.savefig(path.join(folder_path, file_name + '_SVD_Filtered_data_I_' + str(filter_comps) + '_comps.png'),
format='png', dpi=300)
# Try a Gaussian filter on the stack:
gaus_sigma = 0.35
gaus_cycles = 1
source_dset = filtered_3d.copy()
for cyc_ind in range(gaus_cycles):
sink_dset = np.zeros(raw_data_3d.shape)
for volt_ind in range(spectra_length):
volt_slice = source_dset[:, :, volt_ind]
sink_dset[:, :, volt_ind] = gaussian_filter(volt_slice, gaus_sigma)
source_dset = sink_dset.copy()
gaus_filt_3d = sink_dset
# See some example loops:
gaus_filt_2d = np.reshape(gaus_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [filtered_2d, gaus_filt_2d], line_colors=['k', 'r'],
dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
title='Gaussian Filter')
fig.savefig(path.join(folder_path, file_name + 'Gaussian_Filter_sigma_' + str(gaus_sigma) + '_cycles_' +
str(gaus_cycles) + '.png'), format='png', dpi=300)
#%% Offset the data in the current axis by setting the current at 0V to 0
mid_pt = int(volt_vec.size * 0.5)
offsets = np.mean(gaus_filt_2d[:, mid_pt-1:mid_pt+1], axis=1)
offseted_data_2d = gaus_filt_2d - np.repeat(np.atleast_2d(offsets).transpose(), spectra_length, axis=1)
offseted_data_3d = np.reshape(offseted_data_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
#%% Plotting an example IV to check correction
row_ind = 15
col_ind = 12
fig, ax = plt.subplots()
ax.plot(volt_vec, filtered_3d[row_ind, col_ind], 'bo-', label='Raw')
ax.plot(volt_vec, offseted_data_3d[row_ind, col_ind], 'r*-', label='Offsetted')
ax.plot([volt_vec[0], volt_vec[-1]], [0, 0], 'k')
ax.plot([0, 0], [np.min(filtered_3d[row_ind, col_ind]), np.max(filtered_3d[row_ind, col_ind])], 'k')
# ax.set_xlim([volt_vec[mid_pt-5], volt_vec[mid_pt + 5]])
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
extents = 5
ax.set_xlim([volt_vec[mid_pt - extents], volt_vec[mid_pt + extents]])
chosen_sections_1 = filtered_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
chosen_sections_2 = offseted_data_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
ax.set_ylim([np.min(np.hstack([chosen_sections_1, chosen_sections_2])),
np.max(np.hstack([chosen_sections_1, chosen_sections_2]))])
ax.set_title('Ofsetting the current')
ax.legend(loc='best')
fig.savefig(path.join(folder_path, file_name + '_offsetting_current.png'), format='png', dpi=300)
#%% Calculating dI/dV now:
dIdV_3d = np.diff(offseted_data_3d, axis=2)
# Adding 0 to spectral axis since we lose one index due to differential
dIdV_3d = np.dstack((dIdV_3d, np.zeros(shape=(offseted_data_3d.shape[0], offseted_data_3d.shape[1]))))
dIdV_2d = np.reshape(dIdV_3d, offseted_data_2d.shape)
# Apply some median filters:
med_filt_size = 3
dIdV_filt_3d = medfilt(dIdV_3d, [1, 1, med_filt_size])
dIdV_filt_2d = np.reshape(dIdV_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [dIdV_2d, dIdV_filt_2d], line_colors=['k', 'r'],
dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
title='Median Filter')
fig.savefig(path.join(folder_path, file_name + 'Median_Filter_size_' + str(med_filt_size) + '.png'), format='png', dpi=300)
num_clusters = 8 # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(dIdV_filt_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (dIdV_filt_3d.shape[0], dIdV_filt_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_filtered_dIdV_' +
str(num_clusters) + '_clusters.png'), format='png', dpi=300)
# Downsample and average
box_size = 3
down_samp_didv_3d = np.zeros((dIdV_filt_3d.shape[0]-box_size + 1,
dIdV_filt_3d.shape[1] - box_size + 1,
dIdV_filt_3d.shape[2]))
down_samp_iv_3d = np.zeros(down_samp_didv_3d.shape)
for row_ind in range(down_samp_didv_3d.shape[0]):
row_start = row_ind
row_end = row_ind + box_size
for col_ind in range(down_samp_didv_3d.shape[1]):
col_start = col_ind
col_end = col_ind + box_size
box_didv_data = np.reshape(dIdV_filt_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
mean_di_dv = np.mean(box_didv_data, axis=0)
box_iv_data = np.reshape(offseted_data_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
mean_iv = np.mean(box_iv_data, axis=0)
offset_iv = mean_iv - np.mean(mean_iv[mid_pt - 1:mid_pt + 1])
#shifted_di_dv = mean_di_dv - np.min(mean_di_dv[int(0.5*spectra_length) - 2: int(0.5*spectra_length) + 2])
ldos = volt_vec * mean_di_dv / offset_iv
down_samp_iv_3d[row_ind, col_ind] = offset_iv
down_samp_didv_3d[row_ind, col_ind] = mean_di_dv
down_samp_didv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))
down_samp_iv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))
num_clusters = 8 # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(down_samp_didv_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (down_samp_didv_3d.shape[0], down_samp_didv_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_downsampled_dIdV_' + str(box_size) + '_box_' +
str(num_clusters) + '_clusters.png'), format='png', dpi=300)
y = centroids[-1]
x = volt_vec
from scipy.interpolate import interp1d
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(volt_vec[0], volt_vec[-1], num=250, endpoint=True)
plt.plot(x, y, 'k', xnew, f2(xnew), 'r')