# Install all necessary packages !pip install tensorflow tqdm requests --upgrade # Dataset is taken from here: http://mmlab.ie.cuhk.edu.hk/projects/TCDCN.html # Small code is taken from this StackOverflow thread: https://stackoverflow.com/questions/22676/how-do-i-download-a-file-over-http-using-python # This could take a while! from tqdm import tqdm import requests url = "http://mmlab.ie.cuhk.edu.hk/projects/TCDCN/data/MTFL.zip" response = requests.get(url, stream=True) with open("MTFL", "wb") as handle: for data in tqdm(response.iter_content(), unit=' KB'): handle.write(data) # Unzip all files import zipfile zip_ref = zipfile.ZipFile('MTFL', 'r') zip_ref.extractall() zip_ref.close() !ls # Import dataset in Pandas import pandas as pd train_data = pd.read_csv('training.txt', sep=' ', header=None, skipinitialspace=True, nrows=10000) test_data = pd.read_csv('testing.txt', sep=' ', header=None, skipinitialspace=True, nrows=2995) train_data.iloc[0] train_data.iloc[:, 0] = train_data.iloc[:, 0].apply(lambda s: s.replace('\\', '/')) # Needed for filename convention test_data.iloc[:, 0] = test_data.iloc[:, 0].apply(lambda s: s.replace('\\', '/')) # Needed for filename convention from sklearn import preprocessing train_data.iloc[:, 1:11] = preprocessing.MinMaxScaler().fit_transform(train_data.iloc[:, 1:11]) test_data.iloc[:, 1:11] = preprocessing.MinMaxScaler().fit_transform(test_data.iloc[:, 1:11]) import numpy as np import tensorflow as tf # Example code for handling datasets filenames = tf.constant(train_data.iloc[:, 0].tolist()) labels = tf.constant(train_data.iloc[:, 1:].values) dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) it = dataset.batch(64).make_one_shot_iterator().get_next() with tf.Session() as sess: (imgs, labels) = sess.run(it) print(imgs[0]) # Reads an image from a file, decodes it into a dense tensor, and resizes it # to a fixed shape. def _parse_function(filename, label): image_string = tf.read_file(filename) image_decoded = tf.image.decode_jpeg(image_string, channels=3) # Channels needed because some test images are b/w image_resized = tf.image.resize_images(image_decoded, [40, 40]) return {"x": image_resized}, label # This snippet is adapted from here: https://www.tensorflow.org/programmers_guide/datasets def input_fn(data, is_eval=False): # Path delle immagini filenames = tf.constant(data.iloc[:, 0].tolist()) # Etichette delle immagini labels = tf.constant(data.iloc[:, 1:].values.astype(np.float32)) # Costruisco il dataset dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) dataset = dataset.map(_parse_function) # Logica di training / testing if is_eval: dataset = dataset.batch(64) else: dataset = dataset.repeat().shuffle(1000).batch(64) # Costruisco l'iteratore return dataset.make_one_shot_iterator().get_next() #for (filename, label) in tfe.Iterator(dataset): # d = _parse_function(filename, label) import matplotlib.pyplot as plt with tf.Session() as sess: (imgs, labels) = sess.run(input_fn(train_data, True)) plt.imshow(imgs["x"][0] / 255) print(labels[0]) def extract_features(features): # Input Layer input_layer = tf.reshape(features["x"], [-1, 40, 40, 3]) # Primo layer convolutivo conv1 = tf.layers.conv2d(inputs=input_layer, filters=16, kernel_size=[5, 5], padding="same", activation=tf.nn.relu) pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) # Secondo layer convolutivo conv2 = tf.layers.conv2d(inputs=pool1, filters=48, kernel_size=[3, 3], padding="same", activation=tf.nn.relu) pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) # Terzo layer convolutivo conv3 = tf.layers.conv2d(inputs=pool2, filters=64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu) pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2) # Quarto layer convolutivo conv4 = tf.layers.conv2d(inputs=pool3, filters=64, kernel_size=[2, 2], padding="same", activation=tf.nn.relu) # Dense Layer flat = tf.reshape(conv4, [-1, 5 * 5 * 64]) dense = tf.layers.dense(inputs=flat, units=100, activation=tf.nn.relu) return dense # Adapted from here: https://www.tensorflow.org/tutorials/layers def single_task_cnn_model_fn(features, labels, mode): dense = extract_features(features) # Predizioni predictions = tf.layers.dense(inputs=dense, units=2) outputs = { "predictions": predictions } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=outputs) # Funzione costo (errore quadratico medio) loss = tf.losses.mean_squared_error(labels=labels[:, 2:8:5], predictions=predictions) # Ottimizzazione if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # Valutazione del modello eval_metric_ops = { "rmse": tf.metrics.root_mean_squared_error( labels=labels[:, 2:8:5], predictions=outputs["predictions"])} return tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) # Create the Estimator single_task_classifier = tf.estimator.Estimator( model_fn=single_task_cnn_model_fn, model_dir="/tmp/cnn_nose") # Train the model single_task_classifier.train(input_fn=lambda: input_fn(train_data), steps=1000) single_task_classifier.evaluate(input_fn=lambda: input_fn(test_data, is_eval=True)) # Funzione di input per predizioni (hack, si potrebbe migliorare!) def input_fn_predict(data): # Path delle immagini filenames = tf.constant(data.iloc[:, 0].tolist()) # Etichette delle immagini labels = tf.constant(data.iloc[:, 1:].values) # Costruisco il dataset dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) dataset = dataset.map(_parse_function) dataset = dataset.batch(64) # Costruisco l'iteratore f, _ = dataset.make_one_shot_iterator().get_next() return f #for (filename, label) in tfe.Iterator(dataset): # d = _parse_function(filename, label) # Valutiamo una singola predizione p = list(single_task_classifier.predict(lambda: input_fn_predict(test_data))) with tf.Session() as sess: imgs = sess.run(input_fn_predict(test_data)) img_idx = 2 plt.imshow(imgs["x"][img_idx] / 255) plt.scatter(p[img_idx]['predictions'][0] * 40, p[img_idx]['predictions'][1] * 40, 500, marker='x', color='red', linewidth=5) # Check the code here: https://www.tensorflow.org/api_docs/python/tf/contrib/estimator/regression_head def single_head_cnn_model_fn(features, labels, mode): dense = extract_features(features) # Predizioni predictions = tf.layers.dense(inputs=dense, units=2) # Ottimizzatore optimizer = tf.train.AdamOptimizer() # Modello finale regression_head = tf.contrib.estimator.regression_head(label_dimension=2) return regression_head.create_estimator_spec(features, mode, predictions, labels[:, 2:8:5], lambda x: optimizer.minimize(x, global_step = tf.train.get_or_create_global_step())) # Create the Estimator cnn_classifier = tf.estimator.Estimator( model_fn=single_head_cnn_model_fn, model_dir="/tmp/cnn_single_head") # Train the model cnn_classifier.train(input_fn=lambda: input_fn(train_data), steps=1000) def multihead_input_fn(data, is_eval=False): features, labels = input_fn(data, is_eval=is_eval) return features, {'head_nose': labels[:, 2:8:5], 'head_pose': tf.cast(labels[:, -1] - 1.0, tf.int32)} def multi_head_cnn_model_fn(features, labels, mode): dense = extract_features(features) # Predizioni della rete (per ciascun task) predictions_nose = tf.layers.dense(inputs=dense, units=2) predictions_pose = tf.layers.dense(inputs=dense, units=5) logits = {'head_nose': predictions_nose, 'head_pose': predictions_pose} # Ottimizzatore optimizer = tf.train.AdamOptimizer() # Definiamo le due head regression_head = tf.contrib.estimator.regression_head(name='head_nose', label_dimension=2) classification_head = tf.contrib.estimator.multi_class_head(name='head_pose', n_classes=5) multi_head = tf.contrib.estimator.multi_head([regression_head, classification_head]) return multi_head.create_estimator_spec(features, mode, logits, labels, lambda x: optimizer.minimize(x, global_step = tf.train.get_or_create_global_step())) # Create the Estimator multitask_classifier = tf.estimator.Estimator( model_fn=multi_head_cnn_model_fn, model_dir="/tmp/cnn_tmp") # Train the model multitask_classifier.train(input_fn=lambda: multihead_input_fn(train_data), steps=1000) multitask_classifier.evaluate(input_fn=lambda: multihead_input_fn(test_data, is_eval=True)) p = list(multitask_classifier.predict(lambda: input_fn_predict(test_data))) print(p[0]) with tf.Session() as sess: imgs = sess.run(input_fn_predict(test_data)) font = {'family': 'serif', 'color': 'white', 'weight': 'bold', 'size': 16, } img_idx = 8 prediction_eye = p[img_idx][(('head_nose', 'predictions'))] prediction_pose = p[img_idx][(('head_pose', 'class_ids'))] plt.imshow(imgs["x"][img_idx] / 255) plt.scatter(prediction_eye[0] * 40, prediction_eye[1] * 40, 500, marker='x', color='red', linewidth=5) plt.text(5, 3, 'Predicted pose: {}'.format(prediction_pose), fontdict=font)