from IPython.parallel import Client
cli = Client()
print("Total workers: {}".format(len(cli.ids)))
Total workers: 12
dview = cli[:]
lbview = cli.load_balanced_view()
%%px --local
from collections import Counter, namedtuple
import itertools
from pathlib import Path
import pickle
from IPython.html import widgets # Widget definitions
from IPython.display import display # Used to display widgets in the notebook
from PIL import Image
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn import cross_validation
current_dir = Path('.').resolve().as_posix()
src_dir = Path('../../src').resolve().as_posix()
dview['current_dir'] = current_dir
dview['src_dir'] = src_dir
%%px --local
import sys
sys.path.append(current_dir)
sys.path.append(src_dir)
from exp_utils import (
Connection, Dataset, SingleLabelDataset,
PathConnection, DatasetConnection,
TrainTestConnection, TrainTestDatsetConnection,
Pred, get_mismatch_pred, make_mismatch_df, confmat_to_df, read_patches_from_batch
)
%%px --local
with pd.ExcelFile(
str(Path(current_dir).parent / 'exp06_both' / 'label_mapping_modified.xlsx')
) as xlsx:
label_stat_df = TrainTestConnection(xlsx.parse('Train'), xlsx.parse('Test'))
IGNORE_LABELS = [-1, 6, 7]
def make_label_map(df):
return {
fsuf: lab
for fsuf, lab in zip(df.file_suffix, df.label)
if lab not in IGNORE_LABELS
}
anno_label_map = label_stat_df.apply(make_label_map)
LABEL_NAME_MAP = {
1: '腺癌', 2: '正常', 3: '粘液癌', 4: '锯齿状癌',
5: '乳头状癌', 6: '绒毛状腺瘤', 7: '锯齿状腺瘤', 8: '绸带粉刺状癌',
}
LABEL_NAME_MAP_EN= {
1: 'AC', 2: 'N', 3: 'MC', 4: 'SC',
5: 'PC', 6: 'VA', 7: 'SA', 8: 'CCTA'
}
%%px --local
train = Dataset(
dir_name='exp04_run', img_desc_pth=str(Path(current_dir).parent / 'exp04_run/remote_train_list.txt'),
anno_label_map=anno_label_map.train
)
test = Dataset(
dir_name='exp05_run', img_desc_pth=str(Path(current_dir).parent / 'exp05_run/remote_test_list.txt'),
anno_label_map=anno_label_map.test
)
addnorm = SingleLabelDataset(
label=2,
dir_name='exp08_addnorm', img_desc_pth=str(Path(current_dir).parent /'exp08_addnorm/normal_list_0.txt')
)
all_datasets = TrainTestDatsetConnection(train=train, test=test, addnorm=addnorm)
feat_df_avg_softmax3 = all_datasets.used_features.apply(
lambda feat_mat: pd.DataFrame(np.r_[f[0], f[2]] for f in feat_mat))
feat_df_avg_softmax3.shape
Connection(train=(273, 8192), test=(120, 8192), addnorm=(300, 8192))
full_X = np.concatenate(list(feat_df_avg_softmax3))
full_Y = np.concatenate(list(all_datasets.used_labels))
ss_splitter = cross_validation.StratifiedShuffleSplit(
full_Y, n_iter=1, test_size=0.5, random_state=9527
)
train_ix, test_ix = next(iter(ss_splitter))
train_X, train_Y = full_X[train_ix], full_Y[train_ix]
test_X, test_Y = full_X[test_ix], full_Y[test_ix]
classifier = svm.LinearSVC(C=0.5, random_state=5566)
classifier.fit(train_X, train_Y)
weight_mat = classifier.coef_
weight_mat.shape
(6, 8192)
LABEL_NAME = [LABEL_NAME_MAP_EN[cls] for cls in classifier.classes_]
%%px --local
import matplotlib as mpl
mpl.use("Agg")
from matplotlib import pyplot as plt
from matplotlib import gridspec
from matplotlib import font_manager
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from mpl_toolkits.axes_grid1 import make_axes_locatable
%%px --local
class MidpointNormalize(Normalize):
def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
self.midpoint = midpoint
Normalize.__init__(self, vmin, vmax, clip)
def __call__(self, value, clip=None):
# I'm ignoring masked values and all kinds of edge cases to make a
# simple example...
x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
# masking value too low
# norm_value = np.interp(value, x, y)
# white_cond = (0.45 < norm_value) & (norm_value < 0.55)
# return np.ma.masked_where(white_cond, norm_value)
norm_arr = np.interp(value, x, y)
return np.ma.masked_array(norm_arr, mask=np.isnan(norm_arr))
out_root = Path(
r"\\msralab\ProjectData\eHealth\v-lianwa\CRC_pathology_result\exp10_refinefig"
r"\heatmap_multilabel_separate_jet"
)
if not out_root.exists():
out_root.mkdir(parents=True)
out_root_test = out_root / 'test'
out_root_train = out_root / 'train'
out_root_addnorm = out_root / 'addnorm'
for out_dir in [out_root_test, out_root_train, out_root_addnorm]:
if not out_dir.exists():
out_dir.mkdir()
for class_label in LABEL_NAME:
cls_subdir = out_dir / class_label
if not cls_subdir.exists():
cls_subdir.mkdir(parents=True)
%%px --local
def read_batch(p):
with p.open('rb') as f:
img_batch_map = pickle.load(f)
return img_batch_map
img_batch_map = TrainTestConnection(
train=read_batch(all_datasets.train.img_batch_pickle),
test=read_batch(all_datasets.test.img_batch_pickle),
addnorm=None
)
### img_batch_map for addnorm
batch_dir_addnorm = all_datasets.addnorm.result_root / 'batch'
batch_list_addnorm = sorted(
(p for p in batch_dir_addnorm.iterdir() if p.name.startswith('data_batch')),
key=lambda p: int(p.name.rsplit('_', 1)[-1])
)
img_batch_map.addnorm = dict(
(k, list(v))
for k, v in itertools.groupby(
batch_list_addnorm,
lambda p: int(p.name.rsplit('_', 1)[-1]) // 1000
))
ar = dview.push(dict(
out_root_test=out_root_test, out_root_train=out_root_train, out_root_addnorm=out_root_addnorm,
LABEL_NAME=LABEL_NAME, LABEL_NAME_MAP=LABEL_NAME_MAP, LABEL_NAME_MAP_EN=LABEL_NAME_MAP_EN,
weight_mat=weight_mat
))
ar.wait()
%%px --local
def read_patches_from_batch_addnorm(img_id, img_batch_map):
img_data = []
feat_vec = []
for batch_p in img_batch_map[img_id]:
with batch_p.open('rb') as f:
raw = pickle.load(f, encoding='latin1')
img_data.extend(
data_vec for data_vec, lab in zip(raw['data'], raw['labels'])
if lab == 0
)
feat_vec.extend(
fc2_vec for fc2_vec, lab in zip(raw['fc2'], raw['labels'])
if lab == 0
)
return img_data, feat_vec
%%px --local
def plot_class_confidence_separate(
img_id, img_patches, img_pth, label_name, dist_mat, fig_out_p
):
"""Plot two plot, original figure and heatmap"""
# open instance image
instance_img = Image.open(img_pth.open('rb'))
orig_size = instance_img.size
# resize
instance_img.thumbnail((4096, 4096), Image.ANTIALIAS)
current_size = instance_img.size
scale_x = current_size[0] / orig_size[0]
scale_y = current_size[1] / orig_size[1]
## Draw orig fig
# orig_fig_pth = fig_out_p.parent / (fig_out_p.stem + '_orig.png')
# fig = plt.figure(None, (8, 8))
# plt.imshow(instance_img, interpolation='nearest')
# plt.axis('off')
# fig.savefig(str(orig_fig_pth), transparent=True, bbox_inches='tight', pad_inches=0, dpi=300)
# plt.close(fig)
## Draw heatmap
fig = plt.figure(None, (8, 8), frameon=False)
ax = fig.add_subplot(1, 1, 1)
# make the confidence matrix
confid_mat = np.zeros(current_size, np.float32)
confid_count = np.zeros(current_size, np.int)
# confid_vec = dist_mat[0, :]
confid_vec = next(
confid_vec for class_label, confid_vec in zip(LABEL_NAME, dist_mat)
if class_label == label_name
)
for confid, patch in zip(confid_vec, img_patches):
margin = patch.window.to_margin()
slice_x = slice(np.round(margin.x1 * scale_x), np.round(margin.x2 * scale_x))
slice_y = slice(np.round(margin.y1 * scale_y), np.round(margin.y2 * scale_y))
confid_mat[slice_x, slice_y] += confid
confid_count[slice_x, slice_y] += 1
confid_count = np.where(confid_count == 0, np.nan, confid_count)
confid_mat /= confid_count
# plot heatmap
norm = MidpointNormalize(midpoint=0)
cmap = plt.cm.jet # plt.cm.RdYlBu_r
# cmap.set_bad(color='w', alpha=0)
ax.imshow(instance_img, interpolation='nearest', aspect='equal')
im_confid = ax.imshow(
confid_mat.T, interpolation='nearest', norm=norm, origin='upper',
alpha=0.4, cmap=cmap, aspect='equal'
)
# turn off borders
ax.set_axis_off()
plt.axis('off')
plt.tight_layout(pad=0, h_pad=0, w_pad=0)
fig.savefig(str(fig_out_p), transparent=True, bbox_inches='tight', pad_inches=0, dpi=600)
plt.close(fig)
return fig_out_p
%%px --local
def plot_class_confidence(
img_id, img_patches, img_pth, label_name, dist_mat, fig_out_p
):
# open instance image
instance_img = Image.open(img_pth.open('rb'))
orig_size = instance_img.size
# resize
instance_img.thumbnail((4096, 4096), Image.ANTIALIAS)
instance_img.thumbnail((2048, 2048), Image.ANTIALIAS)
# instance_img.thumbnail((1024, 1024), Image.ANTIALIAS)
current_size = instance_img.size
scale_x = current_size[0] / orig_size[0]
scale_y = current_size[1] / orig_size[1]
# Begin drawing
fig = plt.figure(None, (16, 8))
gs_all = gridspec.GridSpec(1, 2)
# orig fig
ax_orig = plt.subplot(gs_all[:2, 0])
ax_orig.imshow(instance_img)
ax_orig.axis('off')
# ax_orig.set_title("Image {:d} (Class {:s})".format(img_id, label_name), fontproperties=chs_font)
ax1 = plt.subplot(gs_all[0, 1])
for class_label, confid_vec in zip(LABEL_NAME, dist_mat):
if class_label != label_name:
continue
# make the confidence matrix
confid_mat = np.zeros(current_size)
confid_count = np.zeros(current_size, np.int)
for confid, patch in zip(confid_vec, img_patches):
margin = patch.window.to_margin()
slice_x = slice(np.round(margin.x1 * scale_x), np.round(margin.x2 * scale_x))
slice_y = slice(np.round(margin.y1 * scale_y), np.round(margin.y2 * scale_y))
confid_mat[slice_x, slice_y] += confid
confid_count[slice_x, slice_y] += 1
confid_count = np.where(confid_count == 0, np.nan, confid_count)
confid_mat /= confid_count
# plot orig image
ax1.imshow(instance_img)
# plot heatmap
norm = MidpointNormalize(midpoint=0)
im_confid = ax1.imshow(confid_mat.T, norm=norm, alpha=0.6, origin='upper', cmap=plt.cm.RdYlBu_r)
# im_confid = ax1.imshow(confid_mat.T, norm=norm, alpha=0.4, origin='upper', cmap=plt.cm.jet)
# setting
# ax.set_title("Class {:s} vs rest".format(class_label), fontproperties=chs_font)
ax1.axis('off')
# save figure
fig.savefig(str(fig_out_p), transparent=True, bbox_inches='tight', pad_inches=0.1, dpi=300)
plt.close(fig)
return fig_out_p
@lbview.parallel(block=False)
def plot_test_class_confidence(img_id):
# print("Processing test image", img_id)
img_data, img_patch_feats = read_patches_from_batch(img_id, img_batch_map.test)
# print("Getting total {:d} patches".format(len(img_patch_feats)))
img_patch_feat_mat = np.tile(img_patch_feats, [1, 2])
dist_mat = weight_mat.dot(img_patch_feat_mat.T) # comput the distance (confidence) value
# read patch
patch_pickle_p = Path(all_datasets.test.result_root, 'patch', str(img_id), 'patch_records.pickle3')
with patch_pickle_p.open('rb') as f:
img_patches = [p for p in pickle.load(f) if not any(p.window.outmargin)]
img_pth = all_datasets.test.img_paths[img_id]
label_name = LABEL_NAME_MAP_EN[all_datasets.test.labels[img_id]]
fig_out_p = Path(out_root_test, label_name, "Image_{:d}.png".format(img_id))
return plot_class_confidence_separate(
img_id, img_patches, img_pth, label_name, dist_mat, fig_out_p
)
@lbview.parallel(block=False)
def plot_train_class_confidence(img_id):
# print("Processing test image", img_id)
img_data, img_patch_feats = read_patches_from_batch(img_id, img_batch_map.train)
# print("Getting total {:d} patches".format(len(img_patch_feats)))
img_patch_feat_mat = np.tile(img_patch_feats, [1, 2])
dist_mat = weight_mat.dot(img_patch_feat_mat.T) # comput the distance (confidence) value
# read patch
patch_pickle_p = Path(all_datasets.train.result_root, 'patch', str(img_id), 'patch_records.pickle3')
with patch_pickle_p.open('rb') as f:
img_patches = [p for p in pickle.load(f) if not any(p.window.outmargin)]
img_pth = all_datasets.train.img_paths[img_id]
label_name = LABEL_NAME_MAP_EN[all_datasets.train.labels[img_id]]
fig_out_p = Path(out_root_train, label_name, "Image_{:d}.png".format(img_id))
return plot_class_confidence_separate(
img_id, img_patches, img_pth, label_name, dist_mat, fig_out_p
)
@lbview.parallel(block=False)
def plot_addnorm_class_confidence(img_id):
# print("Processing test image", img_id)
img_data, img_patch_feats = read_patches_from_batch_addnorm(img_id, img_batch_map.addnorm)
# print("Getting total {:d} patches".format(len(img_patch_feats)))
img_patch_feat_mat = np.tile(img_patch_feats, [1, 2])
dist_mat = weight_mat.dot(img_patch_feat_mat.T) # comput the distance (confidence) value
# read patch
patch_pickle_p = Path(all_datasets.addnorm.result_root, 'patch', str(img_id), 'patch_records.pickle3')
with patch_pickle_p.open('rb') as f:
img_patches = [p for p in pickle.load(f) if not any(p.window.outmargin)]
img_pth = all_datasets.addnorm.img_paths[img_id]
label_name = LABEL_NAME_MAP_EN[2]
fig_out_p = Path(out_root_addnorm, label_name, "Image_{:d}.png".format(img_id))
return plot_class_confidence_separate(
img_id, img_patches, img_pth, label_name, dist_mat, fig_out_p
)
# from IPython import display as ipydisplay
%%time
fig_p = PathConnection(train=None, test=None, addnorm=None)
fig_p.train = plot_train_class_confidence.func(13)
fig_p.addnorm = plot_addnorm_class_confidence.func(10)
fig_p.test = plot_test_class_confidence.func(15)
WARNING:py.warnings:C:\Miniconda\envs\crc34\lib\site-packages\PIL\Image.py:2192: DecompressionBombWarning: Image size (205051824 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. DecompressionBombWarning)
Wall time: 3min 12s
# ipydisplay.Image(str(fig_p.train))
def save_async_meta(async_result, out_pth):
"""Save async result meta to pickle"""
ar_meta_df = pd.DataFrame(async_result.metadata)
ar_meta_df = ar_meta_df[
['engine_id', 'engine_uuid', 'msg_id', 'started', 'completed', 'received']]
ar_meta_df.to_pickle(str(out_pth))
ar = plot_train_class_confidence.map(all_datasets.train.used_img_ids)
ar.wait_interactive()
273/273 tasks finished after 1104 s done
save_async_meta(ar, out_root / 'train_ar_meta.pickle3')
ar = plot_test_class_confidence.map(all_datasets.test.used_img_ids)
ar.wait_interactive()
120/120 tasks finished after 1250 s done
save_async_meta(ar, out_root / 'test_ar_meta.pickle3')
ar = plot_addnorm_class_confidence.map(all_datasets.addnorm.used_img_ids)
ar.wait_interactive()
300/300 tasks finished after 1835 s done
save_async_meta(ar, out_root / 'addnorm_ar_meta.pickle3')