#!/usr/bin/env python
# coding: utf-8

# # Influence function example for Cifar-10, ResNet
# ### References
# ***
# - Darkon Documentation: <http://darkon.io>
# - Darkon Github: <https://github.com/darkonhub/darkon>
# - Resnet code: <https://github.com/wenxinxu/resnet-in-tensorflow>
# - More examples: <https://github.com/darkonhub/darkon-examples>
# 
# ### Prepare
# ***
# ```
# # cpu
# pip install -r requirements.txt
# 
# # gpu
# pip install -r requirements.txt
# pip install tensorflow-gpu
# ```
# 
# ### Import packages

# In[1]:


# resnet: implemented by wenxinxu
from cifar10_train import Train
from cifar10_input import *

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

import darkon

# to enable specific GPU
get_ipython().run_line_magic('set_env', 'CUDA_VISIBLE_DEVICES=0')
get_ipython().run_line_magic('matplotlib', 'inline')

# cifar-10 classes
_classes = (
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck'
)


# ### Download/Extract cifar10 dataset

# In[2]:


maybe_download_and_extract()


# ### Implement dataset feeder

# In[3]:


class MyFeeder(darkon.InfluenceFeeder):
    def __init__(self):
        # load train data
        data, label = prepare_train_data(padding_size=0)
        self.train_origin_data = data / 256.
        self.train_label = label
        self.train_data = whitening_image(data)

        # load test data
        data, label = read_validation_data_wo_whitening()
        self.test_origin_data = data / 256.
        self.test_label = label
        self.test_data = whitening_image(data)
       
        self.train_batch_offset = 0

    def test_indices(self, indices):
        return self.test_data[indices], self.test_label[indices]

    def train_batch(self, batch_size):
        # calculate offset
        start = self.train_batch_offset
        end = start + batch_size
        self.train_batch_offset += batch_size

        return self.train_data[start:end, ...], self.train_label[start:end, ...]

    def train_one(self, idx):
        return self.train_data[idx, ...], self.train_label[idx, ...]

    def reset(self):
        self.train_batch_offset = 0

feeder = MyFeeder()


# ### Restore pre-trained model

# In[4]:


# tf model checkpoint
check_point = 'pre-trained/model.ckpt-79999'

net = Train()
net.build_train_validation_graph()

saver = tf.train.Saver(tf.global_variables())
sess = tf.InteractiveSession()
saver.restore(sess, check_point)


# ### Initialize influence module

# In[5]:


inspector = darkon.Influence(
    workspace='./influence-workspace',
    feeder=feeder,
    loss_op_train=net.full_loss,
    loss_op_test=net.loss_op,
    x_placeholder=net.image_placeholder,
    y_placeholder=net.label_placeholder)


# ### Upweight influence options

# In[6]:


influence_target = 99

# display
print(_classes[int(feeder.test_label[influence_target])])
plt.imshow(feeder.test_origin_data[influence_target])

test_indices = [influence_target]
testset_batch_size = 100

train_batch_size = 100
train_iterations = 500

# train_batch_size = 100
# train_iterations = 50

approx_params = {
    'scale': 200,
    'num_repeats': 5,
    'recursion_depth': 100,
    'recursion_batch_size': 100
}


# ### Run upweight influence function

# In[7]:


scores = inspector.upweighting_influence_batch(
    sess,
    test_indices,
    testset_batch_size,
    approx_params,
    train_batch_size,
    train_iterations)


# ### Check helpful/unhelpful indices and scores

# In[9]:


sorted_indices = np.argsort(scores)
harmful = sorted_indices[:10]
helpful = sorted_indices[-10:][::-1]

print('\nHarmful:')
for idx in harmful:
    print('[{}] {}'.format(idx, scores[idx]))
    
print('\nHelpful:')
for idx in helpful:
    print('[{}] {}'.format(idx, scores[idx]))


# ### Display helpful train data

# In[10]:


fig, axes1 = plt.subplots(2, 5, figsize=(15, 5))
target_idx = 0
for j in range(2):
    for k in range(5):
        idx = helpful[target_idx]
        axes1[j][k].set_axis_off()
        axes1[j][k].imshow(feeder.train_origin_data[idx])
        label_str = _classes[int(feeder.train_label[idx])]
        axes1[j][k].set_title('[{}]: {}'.format(idx, label_str))

        target_idx += 1


# ### Display harmful train data

# In[11]:


fig, axes1 = plt.subplots(2, 5, figsize=(15, 5))
target_idx = 0
for j in range(2):
    for k in range(5):
        idx = harmful[target_idx]
        axes1[j][k].set_axis_off()
        axes1[j][k].imshow(feeder.train_origin_data[idx])
        label_str = _classes[int(feeder.train_label[idx])]
        axes1[j][k].set_title('[{}]: {}'.format(idx, label_str))

        target_idx += 1


# ***
# >***[Copyright 2017 Neosapience, Inc.](http://www.neosapience.com)***
# >
# >Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
# >    http://www.apache.org/licenses/LICENSE-2.0
# 
# >Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# ***