#!/usr/bin/env python # coding: utf-8 # # Basic Multi GPU Computation in TensorFlow # # Credits: Forked from [TensorFlow-Examples](https://github.com/aymericdamien/TensorFlow-Examples) by Aymeric Damien # # ## Setup # # Refer to the [setup instructions](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/tensor-flow-examples/Setup_TensorFlow.md) # This tutorial requires your machine to have 2 GPUs # * "/cpu:0": The CPU of your machine. # * "/gpu:0": The first GPU of your machine # * "/gpu:1": The second GPU of your machine # * For this example, we are using 2 GTX-980 # In[2]: import numpy as np import tensorflow as tf import datetime # In[3]: #Processing Units logs log_device_placement = True #num of multiplications to perform n = 10 # In[ ]: # Example: compute A^n + B^n on 2 GPUs # Create random large matrix A = np.random.rand(1e4, 1e4).astype('float32') B = np.random.rand(1e4, 1e4).astype('float32') # Creates a graph to store results c1 = [] c2 = [] # Define matrix power def matpow(M, n): if n < 1: #Abstract cases where n < 1 return M else: return tf.matmul(M, matpow(M, n-1)) # In[6]: # Single GPU computing with tf.device('/gpu:0'): a = tf.constant(A) b = tf.constant(B) #compute A^n and B^n and store results in c1 c1.append(matpow(a, n)) c1.append(matpow(b, n)) with tf.device('/cpu:0'): sum = tf.add_n(c1) #Addition of all elements in c1, i.e. A^n + B^n t1_1 = datetime.datetime.now() with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess: # Runs the op. sess.run(sum) t2_1 = datetime.datetime.now() # In[7]: # Multi GPU computing # GPU:0 computes A^n with tf.device('/gpu:0'): #compute A^n and store result in c2 a = tf.constant(A) c2.append(matpow(a, n)) #GPU:1 computes B^n with tf.device('/gpu:1'): #compute B^n and store result in c2 b = tf.constant(B) c2.append(matpow(b, n)) with tf.device('/cpu:0'): sum = tf.add_n(c2) #Addition of all elements in c2, i.e. A^n + B^n t1_2 = datetime.datetime.now() with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess: # Runs the op. sess.run(sum) t2_2 = datetime.datetime.now() # In[8]: print "Single GPU computation time: " + str(t2_1-t1_1) print "Multi GPU computation time: " + str(t2_2-t1_2)