import numpy as np
data = np.array([[0.9,-0.2,-0.8],[0.5,0.2,0.9]])
data
array([[ 0.9, -0.2, -0.8], [ 0.5, 0.2, 0.9]])
data*10
array([[ 9., -2., -8.], [ 5., 2., 9.]])
data+data
array([[ 1.8, -0.4, -1.6], [ 1. , 0.4, 1.8]])
data.shape
(2, 3)
data.dtype
dtype('float64')
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1
array([ 6. , 7.5, 8. , 0. , 1. ])
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2
array([[1, 2, 3, 4], [5, 6, 7, 8]])
arr2.ndim
2
arr2.shape
(2, 4)
arr1.dtype
dtype('float64')
arr2.dtype
dtype('int64')
np.zeros(10)
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.zeros((3,6))
array([[ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.]])
np.empty((2,3,2))
array([[[ 0.00000000e+000, 1.28822983e-231], [ 2.16366771e-314, 2.16371523e-314], [ 2.16372047e-314, 2.16371173e-314]], [[ 2.14550304e-314, 0.00000000e+000], [ 2.15072965e-314, 2.14796467e-314], [ 0.00000000e+000, 1.28822983e-231]]])
np.arange(15)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
arr1 = np.array([1,2,3], dtype=np.float64)
arr2 = np.array([1,2,3], dtype=np.int32)
arr1.dtype
dtype('float64')
arr2.dtype
dtype('int32')
arr=np.array([1,2,3,4,5])
arr.dtype
dtype('int64')
float_arr = arr.astype(np.float64)
float_arr.dtype
dtype('float64')
numeric_strings = np.array(['1.25','-9.6','42'], dtype = np.string_)
numeric_strings.astype(float)
array([ 1.25, -9.6 , 42. ])
int_array = np.arange(10)
caliber = np.array([.22,.270,.357, .380, .44,.50],dtype=np.float64)
int_array.astype(caliber.dtype)
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
empty_uint32 = np.empty(8, dtype='u4')
empty_uint32
array([ 0, 1075314688, 0, 1075707904, 0, 1075838976, 0, 1072693248], dtype=uint32)
Arrays are important because they enable you to express batch operations on data without writing any for loops. This is usually called vectorization.
arr = np.array([[1.,2.,3.],[4.,5.,6.]])
arr
array([[ 1., 2., 3.], [ 4., 5., 6.]])
arr*arr
array([[ 1., 4., 9.], [ 16., 25., 36.]])
arr-arr
array([[ 0., 0., 0.], [ 0., 0., 0.]])
1/arr
array([[ 1. , 0.5 , 0.33333333], [ 0.25 , 0.2 , 0.16666667]])
arr**0.5
array([[ 1. , 1.41421356, 1.73205081], [ 2. , 2.23606798, 2.44948974]])
arr = np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[5]
5
arr[5:8]
array([5, 6, 7])
arr[5:8]=12
arr
array([ 0, 1, 2, 3, 4, 12, 12, 12, 8, 9])
arr_slice = arr[5:8]
arr_slice[1]=12345
arr
array([ 0, 1, 2, 3, 4, 12, 12345, 12, 8, 9])
arr_slice[:]=64
arr
array([ 0, 1, 2, 3, 4, 64, 64, 64, 8, 9])
arr[1:6]
array([ 1, 2, 3, 4, 64])
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d
array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[:2]
array([[1, 2, 3], [4, 5, 6]])
arr2d[:2,1:]
array([[2, 3], [5, 6]])
arr2d[:,:1]=0
arr2d
array([[0, 2, 3], [0, 5, 6], [0, 8, 9]])
names = np.array(['Bob','Joe','Will', 'Bob','Will','Joe','Joe'])
data = np.random.randn(7,4)
names
array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='|S4')
data
array([[ 0.80157062, -1.11126999, -0.36717661, -0.10146253], [ 0.27996882, 0.1007806 , -1.57268242, 0.53328684], [-1.72991998, -1.68063347, -0.56110756, -0.98492423], [ 0.62189742, 1.16186301, 0.97171423, 0.25522433], [-0.7793186 , 0.94409302, -0.82486789, -0.3777059 ], [-0.12322917, -0.71568842, 1.47153722, 1.73777872], [-0.45996373, -1.41262271, 1.26718241, -0.22801257]])
names == 'Bob'
array([ True, False, False, True, False, False, False], dtype=bool)
data[names=='Bob']
array([[ 0.80157062, -1.11126999, -0.36717661, -0.10146253], [ 0.62189742, 1.16186301, 0.97171423, 0.25522433]])
data[names=='Bob', 2:]
array([[-0.36717661, -0.10146253], [ 0.97171423, 0.25522433]])
names != 'Bob'
array([False, True, True, False, True, True, True], dtype=bool)
data[-(names=='Bob')]
array([[ 0.27996882, 0.1007806 , -1.57268242, 0.53328684], [-1.72991998, -1.68063347, -0.56110756, -0.98492423], [-0.7793186 , 0.94409302, -0.82486789, -0.3777059 ], [-0.12322917, -0.71568842, 1.47153722, 1.73777872], [-0.45996373, -1.41262271, 1.26718241, -0.22801257]])
mask = (names == 'Bob') | (names == 'Will')
mask
array([ True, False, True, True, True, False, False], dtype=bool)
data[mask]
array([[ 0.80157062, -1.11126999, -0.36717661, -0.10146253], [-1.72991998, -1.68063347, -0.56110756, -0.98492423], [ 0.62189742, 1.16186301, 0.97171423, 0.25522433], [-0.7793186 , 0.94409302, -0.82486789, -0.3777059 ]])
data[data<0]=0
data
array([[ 0.80157062, 0. , 0. , 0. ], [ 0.27996882, 0.1007806 , 0. , 0.53328684], [ 0. , 0. , 0. , 0. ], [ 0.62189742, 1.16186301, 0.97171423, 0.25522433], [ 0. , 0.94409302, 0. , 0. ], [ 0. , 0. , 1.47153722, 1.73777872], [ 0. , 0. , 1.26718241, 0. ]])
data[names != 'Joe'] = 7
data
array([[ 7. , 7. , 7. , 7. ], [ 0.27996882, 0.1007806 , 0. , 0.53328684], [ 7. , 7. , 7. , 7. ], [ 7. , 7. , 7. , 7. ], [ 7. , 7. , 7. , 7. ], [ 0. , 0. , 1.47153722, 1.73777872], [ 0. , 0. , 1.26718241, 0. ]])
arr = np.empty((8,4))
for i in range(8):
arr[i]=i
arr
array([[ 0., 0., 0., 0.], [ 1., 1., 1., 1.], [ 2., 2., 2., 2.], [ 3., 3., 3., 3.], [ 4., 4., 4., 4.], [ 5., 5., 5., 5.], [ 6., 6., 6., 6.], [ 7., 7., 7., 7.]])
arr[[4,3,0,6]]
array([[ 4., 4., 4., 4.], [ 3., 3., 3., 3.], [ 0., 0., 0., 0.], [ 6., 6., 6., 6.]])
arr[[-3,-5,-7]]
array([[ 5., 5., 5., 5.], [ 3., 3., 3., 3.], [ 1., 1., 1., 1.]])
arr = np.arange(32).reshape((8,4))
arr
array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27], [28, 29, 30, 31]])
arr[[1,5,7,2],[0,3,1,2]]
array([ 4, 23, 29, 10])
arr = np.arange(15).reshape((3,5))
arr
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]])
arr.T
array([[ 0, 5, 10], [ 1, 6, 11], [ 2, 7, 12], [ 3, 8, 13], [ 4, 9, 14]])
arr = np.random.randn(6,3)
np.dot(arr.T,arr)
array([[ 3.43085137, 1.45241985, -0.28497343], [ 1.45241985, 7.43054637, -2.33795759], [-0.28497343, -2.33795759, 6.11364269]])
arr = np.arange(16).reshape((2,2,4))
arr
array([[[ 0, 1, 2, 3], [ 4, 5, 6, 7]], [[ 8, 9, 10, 11], [12, 13, 14, 15]]])
arr.transpose((1,0,2))
array([[[ 0, 1, 2, 3], [ 8, 9, 10, 11]], [[ 4, 5, 6, 7], [12, 13, 14, 15]]])
arr.swapaxes(1,2)
array([[[ 0, 4], [ 1, 5], [ 2, 6], [ 3, 7]], [[ 8, 12], [ 9, 13], [10, 14], [11, 15]]])
arr = np.arange(10)
np.sqrt(arr)
array([ 0. , 1. , 1.41421356, 1.73205081, 2. , 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])
np.exp(arr)
array([ 1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03])
x = np.random.randn(8)
y = np.random.randn(8)
x
array([ 0.99819174, -1.67071413, 0.69328763, 1.10724666, 0.06318684, 1.30097777, 2.39201052, 0.18678585])
y
array([ 1.86216662, -0.60762356, -0.49207699, -1.61177888, -0.77775008, -0.06480237, -0.48120421, 0.01314353])
np.maximum(x,y)
array([ 1.86216662, -0.60762356, 0.69328763, 1.10724666, 0.06318684, 1.30097777, 2.39201052, 0.18678585])
arr = np.random.randn(7)*5
arr
array([ 1.91395828, -0.42429758, -3.825587 , -2.62878641, 0.11969267, -5.36031755, 7.21784393])
np.modf(arr)
(array([ 0.91395828, -0.42429758, -0.825587 , -0.62878641, 0.11969267, -0.36031755, 0.21784393]), array([ 1., -0., -3., -2., 0., -5., 7.]))
points = np.arange(-5,5,0.01) # 1000 equally spaced points
xs, ys = np.meshgrid(points,points)
ys
array([[-5. , -5. , -5. , ..., -5. , -5. , -5. ], [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99], [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98], ..., [ 4.97, 4.97, 4.97, ..., 4.97, 4.97, 4.97], [ 4.98, 4.98, 4.98, ..., 4.98, 4.98, 4.98], [ 4.99, 4.99, 4.99, ..., 4.99, 4.99, 4.99]])
import matplotlib.pyplot as plt
z = np.sqrt(xs**2 + ys**2)
z
array([[ 7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985, 7.06400028], [ 7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815, 7.05692568], [ 7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354, 7.04985815], ..., [ 7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603, 7.04279774], [ 7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354, 7.04985815], [ 7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815, 7.05692568]])
%pylab inline
plt.imshow(z,cmap=plt.cm.gray); plt.colorbar()
plt.title('Image plot of $\sqrt{x^2 + y^2}$ for a grid of values')
Populating the interactive namespace from numpy and matplotlib
<matplotlib.text.Text at 0x1114d3d10>
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True, False, True, True, False])
result = [(x if c else y)
for x,y,c in zip(xarr, yarr, cond)]
result
[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]
result = np.where(cond, xarr, yarr)
result
array([ 1.1, 2.2, 1.3, 1.4, 2.5])
arr = np.random.randn(4,4)
arr
array([[ 0.15860243, -0.19025505, -0.28670256, -0.78042955], [ 0.99593231, 0.67750065, 1.50543254, -0.54876993], [ 0.01450733, -0.97386191, -0.47407092, 0.47691936], [-0.90876212, 0.82059069, 0.7958378 , 1.45423948]])
np.where(arr>0,2,-2)
array([[ 2, -2, -2, -2], [ 2, 2, 2, -2], [ 2, -2, -2, 2], [-2, 2, 2, 2]])
arr = np.random.randn(5,4)
arr.mean()
0.01162071577157121
np.mean(arr)
0.01162071577157121
arr.sum()
0.23241431543142421
arr.mean(axis=1)
array([ 0.01078394, 0.56975368, 0.08557923, 0.25582785, -0.86384112])
arr.sum(0)
array([-0.12054343, -2.49026435, 1.17933985, 1.66388225])
arr = np.array([[0,1,2],[3,4,5],[6,7,8]])
arr.cumsum(0)
array([[ 0, 1, 2], [ 3, 5, 7], [ 9, 12, 15]])
arr.cumprod(1)
array([[ 0, 0, 0], [ 3, 12, 60], [ 6, 42, 336]])
arr = randn(100)
(arr>0).sum() # number of positive values
51
bools = np.array([False, False, True, False])
bools.any()
True
bools.all()
False
arr = np.random.randn(8)
arr
array([-2.13432211, -0.17830939, -0.5780675 , 0.17706475, -0.99877497, -0.1909259 , 0.245384 , -0.90764502])
arr.sort()
arr
array([-2.13432211, -0.99877497, -0.90764502, -0.5780675 , -0.1909259 , -0.17830939, 0.17706475, 0.245384 ])
names = np.array(['Bob','Joe','Will','Bob','Will', 'Joe','Joe'])
np.unique(names)
array(['Bob', 'Joe', 'Will'], dtype='|S4')
sorted(set(names))
['Bob', 'Joe', 'Will']
values = np.array([6,0,0,3,2,5,6])
np.in1d(values,[2,3,6])
array([ True, False, False, True, True, False, True], dtype=bool)
arr = np.arange(10)
np.save('some_array',arr)
np.load('some_array.npy')
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.savez('array_archive.npz',a=arr,b=arr)
arch = np.load('array_archive.npz')
arch['a']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arch['b']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
!rm array_archive.npz
!rm some_array.npy
x = np.array([[1.,2.,3.],[4.,5.,6.]])
y = np.array([[6.,23.], [-1,7], [8,9]])
x
array([[ 1., 2., 3.], [ 4., 5., 6.]])
y
array([[ 6., 23.], [ -1., 7.], [ 8., 9.]])
x.dot(y) # equivalently np.dot(x,y)
array([[ 28., 64.], [ 67., 181.]])
np.dot(x, np.ones(3))
array([ 6., 15.])
np.ones(3).shape
(3,)
from numpy.linalg import inv,qr
X = np.random.randn(5,5)
mat = X.T.dot(X)
inv(mat)
array([[ 0.55656045, -0.1052293 , 0.54259798, 0.04927059, -0.20943461], [-0.1052293 , 0.7800284 , -0.51741589, -0.21122656, 0.40304701], [ 0.54259798, -0.51741589, 1.21249508, 0.28979103, -0.74981628], [ 0.04927059, -0.21122656, 0.28979103, 0.47800195, -0.49408226], [-0.20943461, 0.40304701, -0.74981628, -0.49408226, 0.87290569]])
mat.dot(inv(mat))
array([[ 1.00000000e+00, 0.00000000e+00, 2.22044605e-16, 0.00000000e+00, 0.00000000e+00], [ -4.85722573e-17, 1.00000000e+00, 1.38777878e-17, -1.04083409e-16, 2.22044605e-16], [ -3.33066907e-16, 2.22044605e-16, 1.00000000e+00, 0.00000000e+00, 4.44089210e-16], [ 0.00000000e+00, 0.00000000e+00, 4.44089210e-16, 1.00000000e+00, -4.44089210e-16], [ -2.22044605e-16, -4.44089210e-16, 8.88178420e-16, 4.44089210e-16, 1.00000000e+00]])
q,r = qr(mat)
r
array([[-5.16468799, 1.57980756, 5.97742386, 3.407923 , 5.41611016], [ 0. , -1.89127908, -0.6445985 , -0.31264466, 0.34849209], [ 0. , 0. , -3.01424877, -6.02413598, -6.77379008], [ 0. , 0. , 0. , -4.00915939, -2.21444492], [ 0. , 0. , 0. , 0. , 0.75066787]])
samples = np.random.normal(size = (4,4))
samples
array([[-1.40590462, -0.91612122, 0.72683934, 0.2305343 ], [ 0.49735553, -1.6994437 , -0.06710131, 1.9160297 ], [-0.44294493, 2.15673576, -0.46160566, -0.1098582 ], [ 0.9837433 , 0.57693797, -1.48162467, -1.88921783]])
N=1000000
from random import normalvariate
%timeit samples = [normalvariate(0,1) for _ in xrange(N)]
1 loops, best of 3: 1.21 s per loop
%timeit np.random.normal(size=N)
10 loops, best of 3: 41.1 ms per loop