# Install all necessary packages
!pip install tensorflow tqdm requests --upgrade

# Dataset is taken from here: http://mmlab.ie.cuhk.edu.hk/projects/TCDCN.html
# Small code is taken from this StackOverflow thread: https://stackoverflow.com/questions/22676/how-do-i-download-a-file-over-http-using-python
# This could take a while!

from tqdm import tqdm
import requests

url = "http://mmlab.ie.cuhk.edu.hk/projects/TCDCN/data/MTFL.zip"
response = requests.get(url, stream=True)

with open("MTFL", "wb") as handle:
    for data in tqdm(response.iter_content(), unit=' KB'):
        handle.write(data)

# Unzip all files
import zipfile
zip_ref = zipfile.ZipFile('MTFL', 'r')
zip_ref.extractall()
zip_ref.close()

!ls

# Import dataset in Pandas
import pandas as pd
train_data = pd.read_csv('training.txt', sep=' ', header=None, skipinitialspace=True, nrows=10000)
test_data = pd.read_csv('testing.txt', sep=' ', header=None, skipinitialspace=True, nrows=2995)

train_data.iloc[0]

train_data.iloc[:, 0] = train_data.iloc[:, 0].apply(lambda s: s.replace('\\', '/')) # Needed for filename convention
test_data.iloc[:, 0] = test_data.iloc[:, 0].apply(lambda s: s.replace('\\', '/')) # Needed for filename convention

from sklearn import preprocessing
train_data.iloc[:, 1:11] = preprocessing.MinMaxScaler().fit_transform(train_data.iloc[:, 1:11])
test_data.iloc[:, 1:11] = preprocessing.MinMaxScaler().fit_transform(test_data.iloc[:, 1:11])

import numpy as np
import tensorflow as tf

# Example code for handling datasets

filenames = tf.constant(train_data.iloc[:, 0].tolist())
labels = tf.constant(train_data.iloc[:, 1:].values)

dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))

it = dataset.batch(64).make_one_shot_iterator().get_next()

with tf.Session() as sess:
  (imgs, labels) = sess.run(it)
  print(imgs[0])

# Reads an image from a file, decodes it into a dense tensor, and resizes it
# to a fixed shape.
def _parse_function(filename, label):
  image_string = tf.read_file(filename) 
  image_decoded = tf.image.decode_jpeg(image_string, channels=3) # Channels needed because some test images are b/w
  image_resized = tf.image.resize_images(image_decoded, [40, 40])
  return {"x": image_resized}, label

# This snippet is adapted from here: https://www.tensorflow.org/programmers_guide/datasets

def input_fn(data, is_eval=False):

  # Path delle immagini
  filenames = tf.constant(data.iloc[:, 0].tolist())

  # Etichette delle immagini
  labels = tf.constant(data.iloc[:, 1:].values.astype(np.float32))

  # Costruisco il dataset
  dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
  dataset = dataset.map(_parse_function)

  # Logica di training / testing
  if is_eval:
    dataset = dataset.batch(64)
  else:
    dataset = dataset.repeat().shuffle(1000).batch(64)
    
  # Costruisco l'iteratore
  return dataset.make_one_shot_iterator().get_next()
  
  #for (filename, label) in tfe.Iterator(dataset):
  #  d = _parse_function(filename, label)

import matplotlib.pyplot as plt
with tf.Session() as sess:
  (imgs, labels) = sess.run(input_fn(train_data, True))
  plt.imshow(imgs["x"][0] / 255)
  print(labels[0])

def extract_features(features):
  # Input Layer
  input_layer = tf.reshape(features["x"], [-1, 40, 40, 3])

  # Primo layer convolutivo
  conv1 = tf.layers.conv2d(inputs=input_layer, filters=16, kernel_size=[5, 5], padding="same", activation=tf.nn.relu)
  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

  # Secondo layer convolutivo
  conv2 = tf.layers.conv2d(inputs=pool1, filters=48, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

  # Terzo layer convolutivo
  conv3 = tf.layers.conv2d(inputs=pool2, filters=64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
  pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)
  
  # Quarto layer convolutivo
  conv4 = tf.layers.conv2d(inputs=pool3, filters=64, kernel_size=[2, 2], padding="same", activation=tf.nn.relu)
  
  # Dense Layer
  flat = tf.reshape(conv4, [-1, 5 * 5 * 64])
  dense = tf.layers.dense(inputs=flat, units=100, activation=tf.nn.relu)
  
  return dense

# Adapted from here: https://www.tensorflow.org/tutorials/layers

def single_task_cnn_model_fn(features, labels, mode):
  
  dense = extract_features(features)
  
  # Predizioni
  predictions = tf.layers.dense(inputs=dense, units=2)

  outputs = {
      "predictions": predictions
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=outputs)

  # Funzione costo (errore quadratico medio)
  loss = tf.losses.mean_squared_error(labels=labels[:, 2:8:5], predictions=predictions)

  # Ottimizzazione
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Valutazione del modello
  eval_metric_ops = {
      "rmse": tf.metrics.root_mean_squared_error(
          labels=labels[:, 2:8:5], predictions=outputs["predictions"])}
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

# Create the Estimator
single_task_classifier = tf.estimator.Estimator(
    model_fn=single_task_cnn_model_fn, model_dir="/tmp/cnn_nose")

# Train the model
single_task_classifier.train(input_fn=lambda: input_fn(train_data), steps=1000)

single_task_classifier.evaluate(input_fn=lambda: input_fn(test_data, is_eval=True))

# Funzione di input per predizioni (hack, si potrebbe migliorare!)
def input_fn_predict(data):

  # Path delle immagini
  filenames = tf.constant(data.iloc[:, 0].tolist())

  # Etichette delle immagini
  labels = tf.constant(data.iloc[:, 1:].values)

  # Costruisco il dataset
  dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
  dataset = dataset.map(_parse_function)
  dataset = dataset.batch(64)
    
  # Costruisco l'iteratore
  f, _ = dataset.make_one_shot_iterator().get_next()
  return f
  
  #for (filename, label) in tfe.Iterator(dataset):
  #  d = _parse_function(filename, label)

# Valutiamo una singola predizione
p = list(single_task_classifier.predict(lambda: input_fn_predict(test_data)))

with tf.Session() as sess:
  imgs = sess.run(input_fn_predict(test_data))
  img_idx = 2
  plt.imshow(imgs["x"][img_idx] / 255)
  plt.scatter(p[img_idx]['predictions'][0] * 40, p[img_idx]['predictions'][1] * 40, 500, marker='x', color='red', linewidth=5)

# Check the code here: https://www.tensorflow.org/api_docs/python/tf/contrib/estimator/regression_head

def single_head_cnn_model_fn(features, labels, mode):
  
  dense = extract_features(features)
  
  # Predizioni
  predictions = tf.layers.dense(inputs=dense, units=2)

  # Ottimizzatore
  optimizer = tf.train.AdamOptimizer()
  
  # Modello finale
  regression_head = tf.contrib.estimator.regression_head(label_dimension=2)
  return regression_head.create_estimator_spec(features, mode, predictions, labels[:, 2:8:5], lambda x: optimizer.minimize(x, global_step = tf.train.get_or_create_global_step()))

# Create the Estimator
cnn_classifier = tf.estimator.Estimator(
    model_fn=single_head_cnn_model_fn, model_dir="/tmp/cnn_single_head")

# Train the model
cnn_classifier.train(input_fn=lambda: input_fn(train_data), steps=1000)

def multihead_input_fn(data, is_eval=False):
  features, labels = input_fn(data, is_eval=is_eval)
  return features, {'head_nose': labels[:, 2:8:5], 'head_pose': tf.cast(labels[:, -1] - 1.0, tf.int32)}

def multi_head_cnn_model_fn(features, labels, mode):
  
  dense = extract_features(features)
  
  # Predizioni della rete (per ciascun task)
  predictions_nose = tf.layers.dense(inputs=dense, units=2)
  predictions_pose = tf.layers.dense(inputs=dense, units=5)
  logits = {'head_nose': predictions_nose, 'head_pose': predictions_pose}
  
  # Ottimizzatore
  optimizer = tf.train.AdamOptimizer()
  
  # Definiamo le due head
  regression_head = tf.contrib.estimator.regression_head(name='head_nose', label_dimension=2)
  classification_head = tf.contrib.estimator.multi_class_head(name='head_pose', n_classes=5)
  
  multi_head = tf.contrib.estimator.multi_head([regression_head, classification_head])
  
  return multi_head.create_estimator_spec(features, mode, logits, labels, lambda x: optimizer.minimize(x, global_step = tf.train.get_or_create_global_step()))

# Create the Estimator
multitask_classifier = tf.estimator.Estimator(
    model_fn=multi_head_cnn_model_fn, model_dir="/tmp/cnn_tmp")

# Train the model
multitask_classifier.train(input_fn=lambda: multihead_input_fn(train_data), steps=1000)

  multitask_classifier.evaluate(input_fn=lambda: multihead_input_fn(test_data, is_eval=True))

p = list(multitask_classifier.predict(lambda: input_fn_predict(test_data)))
print(p[0])

with tf.Session() as sess:
  imgs = sess.run(input_fn_predict(test_data))
  
  font = {'family': 'serif',
        'color':  'white',
        'weight': 'bold',
        'size': 16,
        }

  img_idx = 8

  prediction_eye = p[img_idx][(('head_nose', 'predictions'))]
  prediction_pose = p[img_idx][(('head_pose', 'class_ids'))]
  
  plt.imshow(imgs["x"][img_idx] / 255)
  plt.scatter(prediction_eye[0] * 40, prediction_eye[1] * 40, 500, marker='x', color='red', linewidth=5)
  plt.text(5, 3, 'Predicted pose: {}'.format(prediction_pose), fontdict=font)