Notebook

Improving superconductivity in BaFe2As2-based crystals by cobalt clustering and electronic uniformity¶

Li Li, Qiang Zheng, Qiang Zou, Shivani Rajput, Anota Ijaduola, Zhiming Wu, Xiaoping Wang, Huibo Cao, Suhas Somnath, Stephen Jesse, Miaofang Chi, Zheng Gai, David Parker, and Athena Sefat
Scientific Reports

Notebook written by: Suhas Somnath
4/9/2017

References:¶

This Jupyter notebook uses pycroscopy to analyze Band Excitation data. We request you to reference the following papers if you use this notebook for your research:

Arxiv paper titled "USID and Pycroscopy - Open frameworks for storing and analyzing spectroscopic and imaging data"
Nature Communications paper titled "Improving superconductivity in BaFe2As2-based crystals by cobalt clustering and electronic uniformity"

About the notebook¶

This is a Jupyter Notebook - it contains text and executable code cells. To learn more about how to use it, please see this video. Please see the image below for some basic tips on using this notebook.

Image courtesy of Jean Bilheux from the neutron imaging GitHub repository.

A note about software versions:¶

Note: This notebook was written for the pycroscopy version listed below and is not guaranteed to work on past or future versions of the package

pycroscopy version: 0.0a36

If you have a different version of pycroscopy installed, you may consider using the notebook as is and accept the possibility of errors. The cell below will attempt to install the correct versions of the packages. However, if you experience trouble, uninstall the existing version of pycroscopy and install the required version above by executing the following commands in a terminal (Linux / MacOS) / Anaconda prompt (Windows):

pip uninstall pycroscopy
pip install -I pycroscopy==0.0a36

In [ ]:

import sys
!conda install --yes --prefix {sys.prefix} numpy scipy matplotlib scikit-learn Ipython ipywidgets h5py
!pip install pycroscopy==0.0a36

In [ ]:

# Ensure python 3 compatibility:
from __future__ import division, print_function, absolute_import

from os import path
import numpy as np
import h5py

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable

from scipy.signal import medfilt
from scipy.ndimage.filters import gaussian_filter
from sklearn.utils.extmath import randomized_svd
from sklearn.cluster import KMeans

import pycroscopy as px
from pycroscopy.io.translators.omicron_asc import AscTranslator

# set up notebook to show plots within the notebook
% matplotlib inline

In [ ]:

#%% Load file
file_path = r"E:\Shivani\STS_Au-Ba122 crystal\I(V) TraceUp Tue Sep 20 09_17_08 2016 [14-1]  STM_Spectroscopy STM.asc"
folder_path, file_name = path.split(file_path)
file_name = file_name[:-4] + '_'

In [ ]:

tran = AscTranslator()
h5_path = tran.translate(file_path)

hdf = px.ioHDF5(h5_path)
h5_main = px.hdf_utils.getDataSet(hdf.file, 'Raw_Data')[-1]

In [ ]:

x_label = 'Bias (V)'
y_label = 'Current (a.u.)'

In [ ]:

print('Datasets and datagroups within the file:\n------------------------------------')
px.hdf_utils.print_tree(h5_main.file)
 
print('\nThe main dataset:\n------------------------------------')
print(h5_main)
print('\nThe ancillary datasets:\n------------------------------------')
print(hdf.file['/Measurement_000/Channel_000/Position_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Position_Values'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Values'])

print('\nMetadata or attributes in a datagroup\n------------------------------------')
for key in hdf.file['/Measurement_000'].attrs:
    print('{} : {}'.format(key, hdf.file['/Measurement_000'].attrs[key]))

In [ ]:

# Read some basic parameters
h5_meas_grp = hdf.file['/Measurement_000']
num_rows = int(h5_meas_grp.attrs['y-pixels'])
num_cols = int(h5_meas_grp.attrs['x-pixels'])
num_pos = num_rows * num_cols
spectra_length = int(h5_meas_grp.attrs['z-points'])
volt_vec = px.hdf_utils.getAuxData(h5_main, auxDataName='Spectroscopic_Values')[-1][0]

In [ ]:

raw_data_3d = np.reshape(h5_main[()], (num_rows, num_cols, spectra_length))
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('slice of data - use this for cropping the data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Response at a single pixel')
fig.tight_layout()

In [ ]:

#%% User supplied information for cropping data:
start_row = 30  # 0 is the first row
end_row = num_rows  # num_rows is the last row
start_col = 0
end_col = num_cols  # num_cols is the last column
volt_chop = 0.3

In [ ]:

# now crop the data according the specifications above:
volt_index = np.where(np.logical_and(volt_vec >= -volt_chop, volt_vec <= volt_chop))[0]
volt_vec = volt_vec[volt_index]

raw_data_3d = raw_data_3d[start_row:end_row, start_col:end_col, volt_index]
num_rows, num_cols, spectra_length = raw_data_3d.shape
raw_data_2d = raw_data_3d.reshape(-1, spectra_length)

fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
px.plot_utils.plot_map(axes[0], raw_data_3d[:, :, 0], cmap=px.plot_utils.cmap_jet_white_center())
axes[0].set_aspect(1)
axes[0].invert_yaxis()
axes[0].set_title('Cropped data')
axes[1].plot(volt_vec, raw_data_3d[10, 10, :])
axes[1].set_xlabel('Bias (V)')
axes[1].set_ylabel('Current (nA)')
axes[1].set_title('Cropped response at a single pixel')
fig.tight_layout()

In [ ]:

#%% Do k-means to identify the principal response types and where (spatially) the occur in the data
num_clusters = 16  # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(raw_data_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (raw_data_3d.shape[0], raw_data_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_V_' + str(num_clusters) + '_clusters.png'),
                format='png', dpi=300)

In [ ]:

# throw out bad points using a threshold. Works better than finding outliers using K-means
current_threshold = 0.2
bad_pixels = np.where(np.max(np.abs(raw_data_2d), axis=1) > current_threshold)[0]

# find the pixels that are good:
good_pixels = [item for item in np.arange(raw_data_2d.shape[0]) if item not in bad_pixels]

# find the average of all the good pixels
good_resp = np.mean(raw_data_2d[good_pixels], axis=0)

fig, axis = plt.subplots(ncols=2, figsize=(10, 5))
axis[0].set_title('Bad pixels')
px.plot_utils.plot_line_family(axis[0], volt_vec, raw_data_2d[bad_pixels], label_prefix='Cluster')
axis[1].set_title('Mean of all other pixels')
axis[1].plot(volt_vec, good_resp)
fig.savefig(path.join(folder_path, file_name + '_outliers_removal.png'), format='png', dpi=300)

# now replace bad pixels by the mean of the good pixels
outlier_free_2d = np.copy(raw_data_2d)
outlier_free_2d[bad_pixels] = good_resp
outlier_free_3d = np.reshape(outlier_free_2d, raw_data_3d.shape)

In [ ]:

estimators = KMeans(num_clusters)
results = estimators.fit(outlier_free_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (outlier_free_3d.shape[0], outlier_free_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'After_removing_outliers_K_means_V_' + str(num_clusters) + '_clusters.png'),
                format='png', dpi=300)

In [ ]:

num_comps = 256
U, S, V = randomized_svd(outlier_free_2d, num_comps, n_iter=3)

fig_V, axes_V = px.plot_utils.plot_loops(volt_vec, V, title='SVD Eigenvectors',
                                         evenly_spaced=False, plots_on_side=4, use_rainbow_plots=False,
                                         x_label=x_label, y_label=y_label, subtitles='Eigenvector')
fig_V.savefig(path.join(folder_path, file_name + 'SVD_Eigenvectors.png'), format='png', dpi=200)

loadings = np.reshape(U, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))
fig_U, axes_U = px.plot_utils.plot_map_stack(loadings, num_comps=16, cmap=px.plot_utils.cmap_jet_white_center())
fig_U.savefig(path.join(folder_path, file_name + 'SVD_Eigenvalues.png'), format='png', dpi=150)

fig_S, axes_S = px.plot_utils.plotScree(S)
fig_S.savefig(path.join(folder_path, file_name + 'SVD_S.png'), format='png', dpi=300)

In [ ]:

#%% SVD filtering - Reconstructing the original data with a subset of the SVD components to remove noise present in the senior components
# Note that this method of filtering does NOT downsample the data
filter_comps = 32
filtered_2d = np.dot(np.dot(U[:, :filter_comps], np.eye(filter_comps)*S[:filter_comps]), V[:filter_comps, :])
filtered_3d = np.reshape(filtered_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))

# Compare the raw and filtered data
fig_filt, axes = px.plot_utils.plot_loops(volt_vec, [outlier_free_2d, filtered_2d], line_colors=['k', 'r'],
                                          dataset_names=['Raw','SVD Filtered']
                                          y_label='Current (a.u.)', title='SVD filtering')
fig_filt.savefig(path.join(folder_path, file_name + '_SVD_Filtered_data_I_' + str(filter_comps) + '_comps.png'),
                 format='png', dpi=300)

In [ ]:

# Try a Gaussian filter on the stack:
gaus_sigma = 0.35
gaus_cycles = 1
source_dset = filtered_3d.copy()
for cyc_ind in range(gaus_cycles):
    sink_dset = np.zeros(raw_data_3d.shape)
    for volt_ind in range(spectra_length):
        volt_slice = source_dset[:, :, volt_ind]
        sink_dset[:, :, volt_ind] = gaussian_filter(volt_slice, gaus_sigma)
    source_dset = sink_dset.copy()
gaus_filt_3d = sink_dset
# See some example loops:
gaus_filt_2d = np.reshape(gaus_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [filtered_2d, gaus_filt_2d], line_colors=['k', 'r'],
                                         dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
                                         title='Gaussian Filter')
fig.savefig(path.join(folder_path, file_name + 'Gaussian_Filter_sigma_' + str(gaus_sigma) + '_cycles_' +
                      str(gaus_cycles) + '.png'), format='png', dpi=300)

In [ ]:

#%% Offset the data in the current axis by setting the current at 0V to 0

mid_pt = int(volt_vec.size * 0.5)
offsets = np.mean(gaus_filt_2d[:, mid_pt-1:mid_pt+1], axis=1)
offseted_data_2d = gaus_filt_2d - np.repeat(np.atleast_2d(offsets).transpose(), spectra_length, axis=1)
offseted_data_3d = np.reshape(offseted_data_2d, (raw_data_3d.shape[0], raw_data_3d.shape[1], -1))

#%% Plotting an example IV to check correction
row_ind = 15
col_ind = 12

fig, ax = plt.subplots()
ax.plot(volt_vec, filtered_3d[row_ind, col_ind], 'bo-', label='Raw')
ax.plot(volt_vec, offseted_data_3d[row_ind, col_ind], 'r*-', label='Offsetted')
ax.plot([volt_vec[0], volt_vec[-1]], [0, 0], 'k')
ax.plot([0, 0], [np.min(filtered_3d[row_ind, col_ind]), np.max(filtered_3d[row_ind, col_ind])], 'k')
# ax.set_xlim([volt_vec[mid_pt-5], volt_vec[mid_pt + 5]])
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
extents = 5
ax.set_xlim([volt_vec[mid_pt - extents], volt_vec[mid_pt + extents]])
chosen_sections_1 = filtered_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
chosen_sections_2 = offseted_data_3d[row_ind, col_ind, mid_pt - extents:mid_pt + extents]
ax.set_ylim([np.min(np.hstack([chosen_sections_1, chosen_sections_2])),
             np.max(np.hstack([chosen_sections_1, chosen_sections_2]))])
ax.set_title('Ofsetting the current')
ax.legend(loc='best')
fig.savefig(path.join(folder_path, file_name + '_offsetting_current.png'), format='png', dpi=300)

In [ ]:

#%% Calculating dI/dV now:
dIdV_3d = np.diff(offseted_data_3d, axis=2)
# Adding 0 to spectral axis since we lose one index due to differential
dIdV_3d = np.dstack((dIdV_3d, np.zeros(shape=(offseted_data_3d.shape[0], offseted_data_3d.shape[1]))))
dIdV_2d = np.reshape(dIdV_3d, offseted_data_2d.shape)

In [ ]:

# Apply some median filters:
med_filt_size = 3
dIdV_filt_3d = medfilt(dIdV_3d, [1, 1, med_filt_size])
dIdV_filt_2d = np.reshape(dIdV_filt_3d, raw_data_2d.shape)
fig, axes = px.viz.plot_utils.plot_loops(volt_vec, [dIdV_2d, dIdV_filt_2d], line_colors=['k', 'r'],
                                         dataset_names=['Raw', 'Filtered'], x_label=x_label, y_label=y_label,
                                         title='Median Filter')
fig.savefig(path.join(folder_path, file_name + 'Median_Filter_size_' + str(med_filt_size) + '.png'), format='png', dpi=300)

In [ ]:

num_clusters = 8  # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(dIdV_filt_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (dIdV_filt_3d.shape[0], dIdV_filt_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_filtered_dIdV_' +
                          str(num_clusters) + '_clusters.png'), format='png', dpi=300)

In [ ]:

# Downsample and average
box_size = 3
down_samp_didv_3d = np.zeros((dIdV_filt_3d.shape[0]-box_size + 1,
                              dIdV_filt_3d.shape[1] - box_size + 1,
                              dIdV_filt_3d.shape[2]))
down_samp_iv_3d = np.zeros(down_samp_didv_3d.shape)
for row_ind in range(down_samp_didv_3d.shape[0]):
    row_start = row_ind
    row_end = row_ind + box_size
    for col_ind in range(down_samp_didv_3d.shape[1]):
        col_start = col_ind
        col_end = col_ind + box_size

        box_didv_data = np.reshape(dIdV_filt_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
        mean_di_dv = np.mean(box_didv_data, axis=0)
        box_iv_data = np.reshape(offseted_data_3d[row_start: row_end, col_start: col_end], (-1, spectra_length))
        mean_iv = np.mean(box_iv_data, axis=0)
        offset_iv = mean_iv - np.mean(mean_iv[mid_pt - 1:mid_pt + 1])
        #shifted_di_dv = mean_di_dv - np.min(mean_di_dv[int(0.5*spectra_length) - 2: int(0.5*spectra_length) + 2])
        ldos = volt_vec * mean_di_dv / offset_iv
        down_samp_iv_3d[row_ind, col_ind] = offset_iv
        down_samp_didv_3d[row_ind, col_ind] = mean_di_dv

down_samp_didv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))
down_samp_iv_2d = np.reshape(down_samp_didv_3d, (-1, spectra_length))

In [ ]:

num_clusters = 8  # change the number of clusters here. Something ~ 16 is easy to understand and visualize
estimators = KMeans(num_clusters)
results = estimators.fit(down_samp_didv_2d)
labels, centroids = px.processing.cluster.reorder_clusters(results.labels_, results.cluster_centers_)
labels_mat = np.reshape(labels, (down_samp_didv_3d.shape[0], down_samp_didv_3d.shape[1]))
fig_KM2, axes_KM2 = px.plot_utils.plot_cluster_results_together(labels_mat, centroids, spec_val=volt_vec,
                                                                spec_label=x_label, resp_label=y_label)
axes_KM2[0].invert_yaxis()
axes_KM2[0].set_aspect(1)
fig_KM2.savefig(path.join(folder_path, file_name + 'K_means_downsampled_dIdV_' + str(box_size) + '_box_' +
                          str(num_clusters) + '_clusters.png'), format='png', dpi=300)

In [ ]:

y = centroids[-1]
x = volt_vec
from scipy.interpolate import interp1d
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(volt_vec[0], volt_vec[-1], num=250, endpoint=True)
plt.plot(x, y, 'k', xnew, f2(xnew), 'r')

In [ ]: