In [1]:
import numpy as np
import matplotlib.pyplot as plt
In [2]:
import torch
import time
In [3]:
import subprocess

def use_gpu(use=True):
    if use:
        subprocess.run(['system76-power', 'graphics', 'power', 'on'])
        subprocess.run(['sudo', 'modprobe', 'nvidia'])
    else:
        subprocess.run(['sudo', 'rmmod', 'nvidia'])
        subprocess.run(['system76-power', 'graphics', 'off'])
        
use_gpu()
torch.cuda.is_available()
Out[3]:
True
In [9]:
ntimes = 1

results = []
for n in [2, 10] + list(range(500, 8000, 500)):

    print(f'\n------------ n is {n}')
    
    a = np.random.uniform(size=(n, n)).astype(np.float32)
    b = np.random.uniform(size=(n, n)).astype(np.float32)
    print('np type', a.dtype)

    at = torch.from_numpy(a)
    bt = torch.from_numpy(b)
    print('torch type', at.dtype)

    atgpu = at.to('cuda')
    btgpu = bt.to('cuda')
    print('cuda type', atgpu.dtype)
    
    start_time = time.time()
    for i in range(ntimes):
        c = a @ b
    nptime = time.time() - start_time
    print(f'Numpy took {nptime:.3f} seconds. {c[0, 0:5]}')

    start_time = time.time()
    for i in range(ntimes):
        ct = at @ bt
    torchtime = time.time() - start_time
    print(f'Pytorch took {torchtime:.3f} seconds. {ct[0, 0:5]}')

    start_time = time.time()
    for i in range(ntimes):
        ctgpu = atgpu @ btgpu
    cudatime = time.time() - start_time
    print(f'Pytorch cuda took {cudatime:.3f} seconds. {ctgpu[0, 0:5]}')
    print(f'Cuda is {nptime / cudatime :.2f} times faster.')

    results.append([n, nptime, torchtime, cudatime])
# system76.use_gpu(False)  # to turn off GPU power to conserve battery
------------ n is 2
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.001 seconds. [0.12070783 0.1623306 ]
Pytorch took 0.025 seconds. tensor([0.1207, 0.1623])
Pytorch cuda took 0.000 seconds. tensor([0.1207, 0.1623], device='cuda:0')
Cuda is 2.81 times faster.

------------ n is 10
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.000 seconds. [3.0962718 2.7059033 3.3572454 2.849105  3.50571  ]
Pytorch took 0.001 seconds. tensor([3.0963, 2.7059, 3.3572, 2.8491, 3.5057])
Pytorch cuda took 0.000 seconds. tensor([3.0963, 2.7059, 3.3572, 2.8491, 3.5057], device='cuda:0')
Cuda is 0.13 times faster.

------------ n is 500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.004 seconds. [123.50014 115.61719 118.07661 119.9367  122.15569]
Pytorch took 0.004 seconds. tensor([123.5001, 115.6172, 118.0766, 119.9367, 122.1557])
Pytorch cuda took 0.000 seconds. tensor([123.5002, 115.6173, 118.0766, 119.9367, 122.1557], device='cuda:0')
Cuda is 9.05 times faster.

------------ n is 1000
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.024 seconds. [260.6872  263.69745 259.3229  254.5637  265.81363]
Pytorch took 0.025 seconds. tensor([260.6872, 263.6974, 259.3229, 254.5637, 265.8136])
Pytorch cuda took 0.000 seconds. tensor([260.6873, 263.6973, 259.3232, 254.5636, 265.8137], device='cuda:0')
Cuda is 145.34 times faster.

------------ n is 1500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.062 seconds. [379.54385 378.8832  383.67926 391.91995 370.53366]
Pytorch took 0.057 seconds. tensor([379.5439, 378.8832, 383.6793, 391.9200, 370.5337])
Pytorch cuda took 0.001 seconds. tensor([379.5436, 378.8835, 383.6790, 391.9198, 370.5336], device='cuda:0')
Cuda is 121.44 times faster.

------------ n is 2000
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.127 seconds. [489.46414 477.55127 489.4321  488.4458  498.1868 ]
Pytorch took 0.151 seconds. tensor([489.4641, 477.5513, 489.4321, 488.4458, 498.1868])
Pytorch cuda took 0.001 seconds. tensor([489.4642, 477.5511, 489.4320, 488.4460, 498.1868], device='cuda:0')
Cuda is 170.50 times faster.

------------ n is 2500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.242 seconds. [606.3979  623.87836 616.6427  622.49854 614.7176 ]
Pytorch took 0.236 seconds. tensor([606.3979, 623.8784, 616.6427, 622.4985, 614.7176])
Pytorch cuda took 0.001 seconds. tensor([606.3976, 623.8778, 616.6428, 622.4987, 614.7166], device='cuda:0')
Cuda is 339.20 times faster.

------------ n is 3000
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.392 seconds. [735.50116 735.01306 740.9163  741.9557  737.6644 ]
Pytorch took 0.383 seconds. tensor([735.5012, 735.0131, 740.9163, 741.9557, 737.6644])
Pytorch cuda took 0.001 seconds. tensor([735.5003, 735.0129, 740.9155, 741.9562, 737.6645], device='cuda:0')
Cuda is 520.33 times faster.

------------ n is 3500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.602 seconds. [891.23737 882.3337  872.65314 877.6168  863.9329 ]
Pytorch took 0.619 seconds. tensor([891.2374, 882.3337, 872.6531, 877.6168, 863.9329])
Pytorch cuda took 0.001 seconds. tensor([891.2368, 882.3333, 872.6531, 877.6169, 863.9331], device='cuda:0')
Cuda is 905.66 times faster.

------------ n is 4000
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 0.913 seconds. [981.5295  980.4023  976.8022  978.9583  978.53143]
Pytorch took 0.914 seconds. tensor([981.5295, 980.4023, 976.8022, 978.9583, 978.5314])
Pytorch cuda took 0.001 seconds. tensor([981.5276, 980.4008, 976.8013, 978.9589, 978.5306], device='cuda:0')
Cuda is 1142.03 times faster.

------------ n is 4500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 1.265 seconds. [1135.2175 1136.5304 1138.8174 1135.0779 1128.369 ]
Pytorch took 1.287 seconds. tensor([1135.2175, 1136.5304, 1138.8174, 1135.0779, 1128.3690])
Pytorch cuda took 0.001 seconds. tensor([1135.2178, 1136.5297, 1138.8167, 1135.0791, 1128.3698],
       device='cuda:0')
Cuda is 1583.37 times faster.

------------ n is 5000
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 1.799 seconds. [1227.8173 1232.35   1233.9011 1241.7736 1244.5304]
Pytorch took 1.750 seconds. tensor([1227.8173, 1232.3500, 1233.9011, 1241.7736, 1244.5304])
Pytorch cuda took 0.001 seconds. tensor([1227.8167, 1232.3492, 1233.9010, 1241.7770, 1244.5303],
       device='cuda:0')
Cuda is 2245.40 times faster.

------------ n is 5500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 2.301 seconds. [1371.9634 1373.3914 1359.0396 1355.5087 1331.4181]
Pytorch took 2.326 seconds. tensor([1371.9634, 1373.3914, 1359.0396, 1355.5087, 1331.4181])
Pytorch cuda took 0.000 seconds. tensor([1371.9640, 1373.3890, 1359.0403, 1355.5072, 1331.4175],
       device='cuda:0')
Cuda is 10928.63 times faster.

------------ n is 6000
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 3.007 seconds. [1510.5569 1503.6063 1519.8984 1518.0404 1515.924 ]
Pytorch took 2.982 seconds. tensor([1510.5569, 1503.6063, 1519.8984, 1518.0404, 1515.9240])
Pytorch cuda took 0.001 seconds. tensor([1510.5569, 1503.6063, 1519.8950, 1518.0405, 1515.9209],
       device='cuda:0')
Cuda is 3904.76 times faster.

------------ n is 6500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 3.810 seconds. [1617.8059 1590.9952 1610.4662 1617.6915 1607.5289]
Pytorch took 3.918 seconds. tensor([1617.8059, 1590.9952, 1610.4662, 1617.6915, 1607.5289])
Pytorch cuda took 0.001 seconds. tensor([1617.8046, 1590.9948, 1610.4664, 1617.6897, 1607.5255],
       device='cuda:0')
Cuda is 5484.67 times faster.

------------ n is 7000
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 4.736 seconds. [1760.7155 1754.1416 1740.6127 1741.255  1725.3534]
Pytorch took 4.701 seconds. tensor([1760.7155, 1754.1416, 1740.6127, 1741.2550, 1725.3534])
Pytorch cuda took 0.000 seconds. tensor([1760.7168, 1754.1438, 1740.6135, 1741.2534, 1725.3501],
       device='cuda:0')
Cuda is 21042.99 times faster.

------------ n is 7500
np type float32
torch type torch.float32
cuda type torch.float32
Numpy took 6.138 seconds. [1868.2257 1859.2701 1866.1921 1873.8728 1901.446 ]
Pytorch took 5.919 seconds. tensor([1868.2257, 1859.2701, 1866.1921, 1873.8728, 1901.4460])
Pytorch cuda took 0.001 seconds. tensor([1868.2227, 1859.2667, 1866.1921, 1873.8728, 1901.4438],
       device='cuda:0')
Cuda is 9077.51 times faster.
In [10]:
results = np.array(results)

plt.figure(figsize=(12, 12))
plt.plot(results[:, 0], results[:, 1:])
plt.legend(('Numpy', 'Torch', 'Cuda'))
plt.xlabel('$n$')
plt.ylabel(r'Seconds to multiply two $n \times n$ matrices');
In [12]:
plt.plot(results[1:, -1])
plt.xlabel('$n$')
plt.ylabel('Seconds')
plt.title('Running on GPU');
In [ ]: