horse | dog | airplane | automobile | bird | cat | deer | frog | ship | truck | |
---|---|---|---|---|---|---|---|---|---|---|
label | 5000 | 4950 | 5000 | 5000 | 5000 | 5000 | 5000 | 5000 | 5000 | 5000 |
mis-label | 50 | |||||||||
total | 5050 | 4950 | 5000 | 5000 | 5000 | 5000 | 5000 | 5000 | 5000 | 5000 |
Apache License 2.0
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
scores = np.load('mislabel-result.npy')
print('num tests: {}'.format(len(scores)))
begin_mislabel_idx = 5000
sorted_indices = np.argsort(scores)
print('dogs in helpful: {} / 100'.format(np.sum(sorted_indices[-100:] >= begin_mislabel_idx)))
print('mean for all: {}'.format(np.mean(scores)))
print('mean for horse: {}'.format(np.mean(scores[:begin_mislabel_idx])))
print('mean for dogs: {}'.format(np.mean(scores[begin_mislabel_idx:])))
mis_label_ranking = np.where(sorted_indices >= begin_mislabel_idx)[0]
print('all of mis-labels: {}'.format(mis_label_ranking))
total = scores.size
total_pos = mis_label_ranking.size
total_neg = total - total_pos
tpr = np.zeros([total_pos])
fpr = np.zeros([total_pos])
for idx in range(total_pos):
tpr[idx] = float(total_pos - idx)
fpr[idx] = float(total - mis_label_ranking[idx] - tpr[idx])
tpr /= total_pos
fpr /= total_neg
histogram = sorted_indices >= begin_mislabel_idx
histogram = histogram.reshape([10, -1])
histogram = np.sum(histogram, axis=1)
acc = np.cumsum(histogram[::-1])
fig, ax = plt.subplots(1, 2, figsize=(20, 10))
ax[0].set_ylabel('true positive rate')
ax[0].set_xlabel('false positive rate')
ax[0].set_ylim(0.0, 1.0)
ax[0].set_xlim(0.0, 1.0)
ax[0].grid(True)
ax[0].plot(fpr, tpr)
ax[1].set_ylabel('num of mis-label')
ax[1].set_xlabel('threshold')
ax[1].grid(True)
ax[1].bar(range(10), acc)
plt.sca(ax[1])
plt.xticks(range(10), ['{}~{}%'.format(p, p + 10) for p in range(0, 100, 10)])
num tests: 5050 dogs in helpful: 9 / 100 mean for all: 6.6571919258e-07 mean for horse: 6.65163479762e-07 mean for dogs: 7.21290474424e-07 all of mis-labels: [1141 1211 1757 1837 1841 1924 2031 2173 2646 3288 3349 3482 3542 3640 3661 3880 3938 4125 4159 4179 4186 4224 4262 4288 4296 4538 4549 4551 4609 4613 4665 4735 4775 4780 4796 4803 4834 4862 4867 4884 4913 4972 4986 5005 5007 5012 5013 5032 5041 5049]
([<matplotlib.axis.XTick at 0x106b48b90>, <matplotlib.axis.XTick at 0x106b489d0>, <matplotlib.axis.XTick at 0x106bbd750>, <matplotlib.axis.XTick at 0x106bd8d10>, <matplotlib.axis.XTick at 0x106bef490>, <matplotlib.axis.XTick at 0x106befbd0>, <matplotlib.axis.XTick at 0x106bf9350>, <matplotlib.axis.XTick at 0x106bf9a90>, <matplotlib.axis.XTick at 0x106d05210>, <matplotlib.axis.XTick at 0x106d05950>], <a list of 10 Text xticklabel objects>)
fig, ax = plt.subplots(figsize=(20, 5))
ax.grid(True)
ax.plot(scores)
[<matplotlib.lines.Line2D at 0x7fa076780a90>]
# resnet: implemented by wenxinxu
from cifar10_input import *
from cifar10_train import Train
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import darkon
# to enable specific GPU
%set_env CUDA_VISIBLE_DEVICES=0
# cifar-10 classes
_classes = (
'airplane',
'automobile',
'bird',
'cat',
'deer',
'dog',
'frog',
'horse',
'ship',
'truck'
)
env: CUDA_VISIBLE_DEVICES=0
maybe_download_and_extract()
class MyFeeder(darkon.InfluenceFeeder):
def __init__(self):
# load train data
# for ihvp
data, label = prepare_train_data(padding_size=0)
# update some label
label = self.make_mislabel(label)
self.train_origin_data = data / 256.
self.train_label = label
self.train_data = whitening_image(data)
self.train_batch_offset = 0
def make_mislabel(self, label):
target_class_idx = 7
correct_indices = np.where(label == target_class_idx)[0]
self.correct_indices = correct_indices[:]
# 1% dogs to horses.
# In the mis-label model training, I used this script to choose random dogs.
labeled_dogs = np.where(label == 5)[0]
np.random.shuffle(labeled_dogs)
mislabel_indices = labeled_dogs[:int(labeled_dogs.shape[0] * 0.01)]
label[mislabel_indices] = 7.0
self.mislabel_indices = mislabel_indices
print('target class: {}'.format(_classes[target_class_idx]))
print(self.mislabel_indices)
return label
def test_indices(self, indices):
return self.train_data[indices], self.train_label[indices]
def train_batch(self, batch_size):
# for recursion part
# calculate offset
start = self.train_batch_offset
end = start + batch_size
self.train_batch_offset += batch_size
return self.train_data[start:end, ...], self.train_label[start:end, ...]
def train_one(self, idx):
return self.train_data[idx, ...], self.train_label[idx, ...]
def reset(self):
self.train_batch_offset = 0
# to fix shuffled data
np.random.seed(75)
feeder = MyFeeder()
Reading images from cifar10_data/cifar-10-batches-py/data_batch_1 Reading images from cifar10_data/cifar-10-batches-py/data_batch_2 Reading images from cifar10_data/cifar-10-batches-py/data_batch_3 Reading images from cifar10_data/cifar-10-batches-py/data_batch_4 Reading images from cifar10_data/cifar-10-batches-py/data_batch_5 target class: horse [21961 21986 27093 41046 11712 16494 24378 42274 24006 43962 35684 36899 28777 37099 14932 6202 18096 5135 33765 44823 6358 42089 19335 11610 10737 38555 43315 19835 9665 25727 13960 13911 42538 29577 42578 324 42384 27401 1647 34188 17670 32919 45007 29459 4203 25826 22079 31240 13067 17121]
# tf model checkpoint
check_point = 'pre-trained-mislabel/model.ckpt-79999'
net = Train()
net.build_train_validation_graph()
saver = tf.train.Saver(tf.global_variables())
sess = tf.InteractiveSession()
saver.restore(sess, check_point)
INFO:tensorflow:Restoring parameters from pre-trained-mislabel/model.ckpt-79999
approx_params = {
'scale': 200,
'num_repeats': 3,
'recursion_depth': 50,
'recursion_batch_size': 100
}
# targets
test_indices = list(feeder.correct_indices) + list(feeder.mislabel_indices)
print('num test targets: {}'.format(len(test_indices)))
num test targets: 5050
# choose top one layer that will be 'fc' weight
trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
trainable_variables = trainable_variables[-2:-1]
for p in trainable_variables:
print(p.name)
# initialize Influence function
inspector = darkon.Influence(
workspace='./influence-workspace',
feeder=feeder,
loss_op_train=net.full_loss,
loss_op_test=net.loss_op,
x_placeholder=net.image_placeholder,
y_placeholder=net.label_placeholder,
trainable_variables=trainable_variables)
scores = list()
for i, target in enumerate(test_indices):
score = inspector.upweighting_influence(
sess,
[target],
1,
approx_params,
[target],
10000000,
force_refresh=True
)
scores += list(score)
print('done: [{}] - {}'.format(i, score))
print(scores)
np.save('mislabel-result.npy', scores)
Copyright 2017 Neosapience, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.