To know about Numpy check my blog
Arrays allows you to perform mathematical operations on whole blocks of data.
# easiest way to create an array is by using an array function
import numpy as np # I am importing numpy as np
scores = [89,56.34, 76,89, 98]
first_arr =np.array(scores)
print first_arr
print first_arr.dtype # .dtype return the data type of the array object
[ 89. 56.34 76. 89. 98. ] float64
# Nested lists with equal length, will be converted into a multidimensional array
scores_1 = [[34,56,23,89], [11,45,76,34]]
second_arr = np.array(scores_1)
print second_arr
print second_arr.ndim #.ndim gives you the dimensions of an array.
print second_arr.shape #(number of rows, number of columns)
print second_arr.dtype
[[34 56 23 89] [11 45 76 34]] 2 (2L, 4L) int32
x = np.zeros(10) # returns a array of zeros, the same applies for np.ones(10)
x
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.zeros((4,3)) # you can also mention the shape of the array
array([[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]])
np.arange(15)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
np.eye(6) # Create a square N x N identity matrix (1’s on the diagonal and 0’s elsewhere)
array([[ 1., 0., 0., 0., 0., 0.], [ 0., 1., 0., 0., 0., 0.], [ 0., 0., 1., 0., 0., 0.], [ 0., 0., 0., 1., 0., 0.], [ 0., 0., 0., 0., 1., 0.], [ 0., 0., 0., 0., 0., 1.]])
#Batch operations on data can be performed without using for loops, this is called vectorization
scores = [89,56.34, 76,89, 98]
first_arr =np.array(scores)
print first_arr
print first_arr * first_arr
print first_arr - first_arr
print 1/(first_arr)
print first_arr ** 0.5
[ 89. 56.34 76. 89. 98. ] [ 7921. 3174.1956 5776. 7921. 9604. ] [ 0. 0. 0. 0. 0.] [ 0.01123596 0.01774938 0.01315789 0.01123596 0.01020408] [ 9.43398113 7.5059976 8.71779789 9.43398113 9.89949494]
# you may want to select a subset of your data, for which Numpy array indexing is really useful
new_arr = np.arange(12)
print new_arr
print new_arr[5]
print new_arr[4:9]
new_arr[4:9] = 99 #assign sequence of values from 4 to 9 as 99
print new_arr
[ 0 1 2 3 4 5 6 7 8 9 10 11] 5 [4 5 6 7 8] [ 0 1 2 3 99 99 99 99 99 9 10 11]
# A major diffence between lists and array is that, array slices are views on the original array. This means that
# the data is not copied, and any modifications to the view will be reflected in the source
# array.
modi_arr = new_arr[4:9]
modi_arr[1] = 123456
print new_arr # you can see the changes are refelected in main array.
modi_arr[:] # the sliced variable
[ 0 1 2 3 99 123456 99 99 99 9 10 11]
array([ 99, 123456, 99, 99, 99])
# arrays can be treated like matrices
matrix_arr =np.array([[3,4,5],[6,7,8],[9,5,1]])
print matrix_arr
print matrix_arr[1]
print matrix_arr[0][2] #first row and third column
print matrix_arr[0,2] # This is same as the above operation
from IPython.display import Image # importing a image from my computer.
i = Image(filename='Capture.png')
i # Blue print of a matrix
[[3 4 5] [6 7 8] [9 5 1]] [6 7 8] 5 5
cd C:\Users\tk\Desktop\pics # changing my directory
C:\Users\tk\Desktop\pics
# 3d arrays -> this is a 2x2x3 array
three_d_arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print three_d_arr
print "returns the second list inside first list {}".format(three_d_arr[0,1])
[[[ 1 2 3] [ 4 5 6]] [[ 7 8 9] [10 11 12]]] returns the second list inside first list [4 5 6]
three_d_arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print three_d_arr[0]
#if you omit later indices, the returned object will be a lowerdimensional
# ndarray consisting of all the data along the higher dimensions
[[1 2 3] [4 5 6]]
I have used format function in the below cell.
copied_values = three_d_arr[0].copy() # copy arr[0] value to copied_values
three_d_arr[0]= 99 # change all values of arr[0] to 99
print "New value of three_d_arr: {}".format(three_d_arr) # check the new value of three_d_arr
three_d_arr[0] = copied_values # assign copied values back to three_d_arr[0]
print" three_d_arr again: {}".format(three_d_arr)
New value of three_d_arr: [[[99 99 99] [99 99 99]] [[ 7 8 9] [10 11 12]]] three_d_arr again: [[[99 99 99] [99 99 99]] [[ 7 8 9] [10 11 12]]]
matrix_arr =np.array([[3,4,5],[6,7,8],[9,5,1]])
print "The original matrix {}:".format(matrix_arr)
print "slices the first two rows:{}".format(matrix_arr[:2]) # similar to list slicing. returns first two rows of the array
print "Slices the first two rows and two columns:{}".format(matrix_arr[:2, 1:])
print "returns 6 and 7: {}".format(matrix_arr[1,:2])
print "Returns first column: {}".format(matrix_arr[:,:1]) #Note that a colon by itself means to take the entire axis
The original matrix [[3 4 5] [6 7 8] [9 5 1]]: slices the first two rows:[[3 4 5] [6 7 8]] Slices the first two rows and two columns:[[4 5] [7 8]] returns 6 and 7: [6 7] Returns first column: [[3] [6] [9]]
from IPython.display import Image # importing a image from my computer.
j = Image(filename='Expre.png')
j # diagrammatic explanation of matrix array slicing works.
#Import random module from Numpy
personals = np.array(['Manu', 'Jeevan', 'Prakash', 'Manu', 'Prakash', 'Jeevan', 'Prakash'])
print personals == 'Manu' #checks for the string 'Manu' in personals. If present it returns true; else false#
[ True False False True False False False]
from numpy import random
random_no = random.randn(7,4)
print random_no
random_no[personals =='Manu'] #The function returns the rows for which the value of manu is true
# Check the image displayed in the cell below.
[[-0.129557 0.3684001 -0.15747451 -0.1196816 ] [-0.35946571 -1.23477985 1.08186057 -0.61596683] [ 1.67096505 1.11183755 -0.39640455 0.22848279] [-0.27989438 -1.51275966 -0.48825407 1.32425359] [-0.04493194 -1.10371501 -0.52742166 -1.06265549] [ 1.16938298 -0.60478133 1.40615125 -1.35350336] [ 0.86325448 1.97577081 0.05339779 0.71515521]]
array([[-0.129557 , 0.3684001 , -0.15747451, -0.1196816 ], [-0.27989438, -1.51275966, -0.48825407, 1.32425359]])
cd C:\Users\Manu\Desktop
C:\Users\Manu\Desktop
from IPython.display import Image
k = Image(filename='Matrix.png')
k
random_no[personals == 'Manu', 2:] #First two columns and first two rows.
array([[-0.15747451, -0.1196816 ], [-0.48825407, 1.32425359]])
# To select everything except 'Manu', you can != or negate the condition using -:
print personals != 'Manu'
random_no[-(personals == 'Manu')] #get everything except 1st and 4th rows
[False True True False True True True]
array([[-0.35946571, -1.23477985, 1.08186057, -0.61596683], [ 1.67096505, 1.11183755, -0.39640455, 0.22848279], [-0.04493194, -1.10371501, -0.52742166, -1.06265549], [ 1.16938298, -0.60478133, 1.40615125, -1.35350336], [ 0.86325448, 1.97577081, 0.05339779, 0.71515521]])
# you can use boolean operator &(and), |(or)
new_variable = (personals == 'Manu') | (personals == 'Jeevan')
print new_variable
random_no[new_variable]
[ True True False True False True False]
array([[-0.129557 , 0.3684001 , -0.15747451, -0.1196816 ], [-0.35946571, -1.23477985, 1.08186057, -0.61596683], [-0.27989438, -1.51275966, -0.48825407, 1.32425359], [ 1.16938298, -0.60478133, 1.40615125, -1.35350336]])
random_no[random_no < 0] =0
random_no # This will set all negative values to zero
array([[ 0. , 0.3684001 , 0. , 0. ], [ 0. , 0. , 1.08186057, 0. ], [ 1.67096505, 1.11183755, 0. , 0.22848279], [ 0. , 0. , 0. , 1.32425359], [ 0. , 0. , 0. , 0. ], [ 1.16938298, 0. , 1.40615125, 0. ], [ 0.86325448, 1.97577081, 0.05339779, 0.71515521]])
random_no[ personals != 'Manu'] = 9 # This will set all rows except 1 and 4 to 9.
random_no
array([[ 0. , 0.3684001 , 0. , 0. ], [ 9. , 9. , 9. , 9. ], [ 9. , 9. , 9. , 9. ], [ 0. , 0. , 0. , 1.32425359], [ 9. , 9. , 9. , 9. ], [ 9. , 9. , 9. , 9. ], [ 9. , 9. , 9. , 9. ]])
Fancy indexing copies data into a new array
from numpy import random
algebra = random.randn(7,4) # empty will return a matrix of size 7,4
for j in range(7):
algebra[j] = j
algebra
array([[ 0., 0., 0., 0.], [ 1., 1., 1., 1.], [ 2., 2., 2., 2.], [ 3., 3., 3., 3.], [ 4., 4., 4., 4.], [ 5., 5., 5., 5.], [ 6., 6., 6., 6.]])
# To select a subset of rows in particular order, you can simply pass a list.
algebra[[4,5,1]] #returns a subset of rows
array([[ 4., 4., 4., 4.], [ 5., 5., 5., 5.], [ 1., 1., 1., 1.]])
fancy = np.arange(36).reshape(9,4) #reshape is to reshape an array
print fancy
fancy[[1,4,3,2],[3,2,1,0]] #the position of the output array are[(1,3),(4,2),(3,1),(2,0)]
[[ 0 1 2 3] [ 4 5 6 7] [ 8 9 10 11] [12 13 14 15] [16 17 18 19] [20 21 22 23] [24 25 26 27] [28 29 30 31] [32 33 34 35]]
array([ 7, 18, 13, 8])
fancy[[1, 4, 8, 2]][:, [0, 3, 1, 2]] # entire first row is selected, but the elements are interchanged, same goes for 4th, 8th and 2 nd row.
array([[ 4, 7, 5, 6], [16, 19, 17, 18], [32, 35, 33, 34], [ 8, 11, 9, 10]])
# another way to do the above operation is by using np.ix_ function.
fancy[np.ix_([1,4,8,2],[0,3,1,2])]
array([[ 4, 7, 5, 6], [16, 19, 17, 18], [32, 35, 33, 34], [ 8, 11, 9, 10]])
transpose= np.arange(12).reshape(3,4)
transpose.T # the shape has changed to 4,3
array([[ 0, 4, 8], [ 1, 5, 9], [ 2, 6, 10], [ 3, 7, 11]])
#you can use np.dot function to perform matrix computations. You can calculate X transpose X as follows:
np.dot(transpose.T, transpose)
array([[ 80, 92, 104, 116], [ 92, 107, 122, 137], [104, 122, 140, 158], [116, 137, 158, 179]])
They perform element wise operations on data in arrays.
funky =np.arange(8)
print np.sqrt(funky)
print np.exp(funky) #exponent of the array
# these are called as unary functions
[ 0. 1. 1.41421356 1.73205081 2. 2.23606798 2.44948974 2.64575131] [ 1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03]
# Binary functions take two value, Others such as maximum, add
x = random.randn(10)
y = random.randn(10)
print x
print y
print np.maximum(x,y)# element wise operation
print np.modf(x)# function modf returns the fractional and integral parts of a floating point arrays
[-0.47538326 -0.32308133 1.45505923 -0.53196376 -1.34427866 -2.14409558 -0.96296558 0.14068437 -0.29208196 -1.17537313] [-1.68868842 -0.53788536 -1.01887225 -0.02972594 -1.04607062 -2.08636169 0.34398903 -0.64183089 1.55401001 0.73270627] [-0.47538326 -0.32308133 1.45505923 -0.02972594 -1.04607062 -2.08636169 0.34398903 0.14068437 1.55401001 0.73270627] (array([-0.47538326, -0.32308133, 0.45505923, -0.53196376, -0.34427866, -0.14409558, -0.96296558, 0.14068437, -0.29208196, -0.17537313]), array([-0., -0., 1., -0., -1., -2., -0., 0., -0., -1.]))
# List of unary functions avaliable
from IPython.display import Image
l = Image(filename='unary functions.png')
l
#List of binary functions available
from IPython.display import Image
l = Image(filename='binary functions.png')
l
#logical operators , and greater, greater_equal,less, less_equal, equal, not_equal operations can also be performed
mtrices =np.arange(-5,5,1)
x, y = np.meshgrid(mtrices, mtrices) #mesh grid function takes two 1 d arrays and produces two 2d arrays
print "Matrix values of y: {}".format(y)
print "Matrix values of x: {}".format(x)
Matrix values of y: [[-5 -5 -5 -5 -5 -5 -5 -5 -5 -5] [-4 -4 -4 -4 -4 -4 -4 -4 -4 -4] [-3 -3 -3 -3 -3 -3 -3 -3 -3 -3] [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2] [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1] [ 0 0 0 0 0 0 0 0 0 0] [ 1 1 1 1 1 1 1 1 1 1] [ 2 2 2 2 2 2 2 2 2 2] [ 3 3 3 3 3 3 3 3 3 3] [ 4 4 4 4 4 4 4 4 4 4]] Matrix values of x: [[-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4] [-5 -4 -3 -2 -1 0 1 2 3 4]]
zip function is clearly explained here.
x1= np.array([1,2,3,4,5])
y1 = np.array([6,7,8,9,10])
cond =[True, False, True, True, False]
#If you want to take a value from x1 whenever the corresponding value in cond is true, otherwise take value from y.
z1 = [(x,y,z) for x,y,z in zip(x1, y1, cond)] # I have used zip function To illustrate the concept
print z1
np.where(cond, x1, y1)
[(1, 6, True), (2, 7, False), (3, 8, True), (4, 9, True), (5, 10, False)]
array([ 1, 7, 3, 4, 10])
ra = np.random.randn(5,5)
# If you want to replace negative values in ra with -1 and positive values with 1. You can do it using where function
print ra
print np.where(ra>0, 1, -1) # If values in ra are greater than zero, replace it with 1, else replace it with -1.
# to set only positive values
np.where(ra >0, 1, ra) # same implies to negative values
[[-0.91593384 0.38253326 -0.13340929 -0.12353528 -0.90849552] [ 2.23109011 -0.7980066 0.13600282 -0.2447923 1.32865533] [-0.65568719 -1.48154609 0.8033841 -0.84157511 -0.19588005] [ 1.42527047 0.63082249 -0.80092209 -0.69935209 0.20470869] [ 0.18245815 -0.99953295 0.05586992 0.38031972 0.60522581]] [[-1 1 -1 -1 -1] [ 1 -1 1 -1 1] [-1 -1 1 -1 -1] [ 1 1 -1 -1 1] [ 1 -1 1 1 1]]
array([[-0.91593384, 1. , -0.13340929, -0.12353528, -0.90849552], [ 1. , -0.7980066 , 1. , -0.2447923 , 1. ], [-0.65568719, -1.48154609, 1. , -0.84157511, -0.19588005], [ 1. , 1. , -0.80092209, -0.69935209, 1. ], [ 1. , -0.99953295, 1. , 1. , 1. ]])
thie = np.random.randn(5,5)
print thie.mean() # calculates the mean of thie
print np.mean(thie) # alternate method to calculate mean
print thie.sum()
0.286291297223 0.286291297223 7.15728243058
jp =np.arange(12).reshape(4,3)
print"The arrays are: {}".format(jp)
print "The sum of rows are :{}".format(np.sum(jp, axis =0)) #axis =0, gives you sum of the columns. axis =1 , gives sum of rows.
# remember this zero is for columns and one is for rows.
The arrays are: [[ 0 1 2] [ 3 4 5] [ 6 7 8] [ 9 10 11]] The sum of rows are :[18 22 26]
print jp.sum(1)#returns sum of rows
[ 3 12 21 30]
jp.cumsum(0) #cumulative sum of columns, try the same for jp.cumprod(0)
array([[ 0, 1, 2], [ 3, 5, 7], [ 9, 12, 15], [18, 22, 26]])
jp.cumsum(1)#cumulative sum of rows
array([[ 0, 1, 3], [ 3, 7, 12], [ 6, 13, 21], [ 9, 19, 30]])
xp =np.random.randn(100)
print(xp > 0).sum() # sum of all positive values
print (xp < 0).sum()
tandf =np.array([True,False,True,False,True,False])
print tandf.any()#checks if any of the values are true
print tandf.all() #returns false even if a single value is false
#These methods also work with non-boolean arrays, where non-zero elements evaluate to True.
45 55 True False
Other array functions are:
std, var -> standard deviation and variance
min, max -> Minimum and Maximum
argmin, argmax -> Indices of minimum and maximum elements
lp = np.random.randn(8)
print lp
lp.sort()
lp
[-0.38465299 -0.84381465 -1.78393531 -0.80242681 -2.54136215 -0.47354742 -1.17517075 0.23759082]
array([-2.54136215, -1.78393531, -1.17517075, -0.84381465, -0.80242681, -0.47354742, -0.38465299, 0.23759082])
tp = np.random.randn(4,4)
tp
array([[ 0.4968525 , -0.65497365, -0.43687651, 0.51706412], [-1.39148137, -0.0166924 , -0.82572908, 2.20839298], [-0.5400157 , -0.8311936 , -1.92611011, 0.04556166], [ 0.41679611, -1.1659837 , -1.7181857 , 0.15529182]])
tp.sort(1) #check the rows are sorted
tp
array([[-0.65497365, -0.43687651, 0.4968525 , 0.51706412], [-1.39148137, -0.82572908, -0.0166924 , 2.20839298], [-1.92611011, -0.8311936 , -0.5400157 , 0.04556166], [-1.7181857 , -1.1659837 , 0.15529182, 0.41679611]])
personals = np.array(['Manu', 'Jeevan', 'Prakash', 'Manu', 'Prakash', 'Jeevan', 'Prakash'])
np.unique(personals)# returns the unique elements in the array
array(['Jeevan', 'Manu', 'Prakash'], dtype='|S7')
set(personals) # set is an alternative to unique function
{'Jeevan', 'Manu', 'Prakash'}
np.in1d(personals, ['Manu']) #in1d function checks for the value 'Manu' and returns True, other wise returns False
array([ True, False, False, True, False, False, False], dtype=bool)
Other Functions are :
intersect1d(x, y)-> Compute the sorted, common elements in x and y
union1d(x,y) -> compute the sorted union of elements
setdiff1d(x,y) -> set difference, elements in x that are not in y
setxor1d(x, y) -> Set symmetric differences; elements that are in either of the arrays, but not both
cp = np.array([[1,2,3],[4,5,6]])
dp = np.array([[7,8],[9,10],[11,12]])
print "CP array :{}".format(cp)
print "DP array :{}".format(dp)
CP array :[[1 2 3] [4 5 6]] DP array :[[ 7 8] [ 9 10] [11 12]]
# element wise multiplication
cp.dot(dp) # this is equivalent to np.dot(x,y)
array([[ 58, 64], [139, 154]])
np.dot(cp, np.ones(3))
array([ 6., 15.])
# numpy.linalg has standard matrix operations like determinants and inverse.
from numpy.linalg import inv, qr
cp = np.array([[1,2,3],[4,5,6]])
new_mat = cp.T.dot(cp) # multiply cp inverse and cp, this is element wise multiplication
print new_mat
[[17 22 27] [22 29 36] [27 36 45]]
sp = np.random.randn(5,5)
print inv(sp)
rt = inv(sp)
[[ 8.42073934 -3.99404791 -1.02750024 -9.15141449 -11.83177632] [ 0.99455489 0.12614541 0.97324631 0.13731371 1.83602625] [ 7.22433965 -3.9236319 -1.72053933 -8.26352406 -11.80445805] [ 4.35711911 -2.62701594 -0.75752399 -4.80133342 -6.89057351] [ 4.97536913 -1.66709125 0.42132364 -4.00769704 -4.45711904]]
# to calculate the product of a matrix and its inverse
sp.dot(rt)
array([[ 1.00000000e+00, -6.66133815e-16, -3.88578059e-16, -4.44089210e-16, -5.77315973e-15], [ -8.88178420e-16, 1.00000000e+00, 1.11022302e-16, 4.44089210e-16, 8.88178420e-16], [ -2.66453526e-15, 2.22044605e-16, 1.00000000e+00, -3.55271368e-15, 2.22044605e-15], [ 8.88178420e-16, 0.00000000e+00, -1.11022302e-16, 1.00000000e+00, -8.88178420e-16], [ 0.00000000e+00, -6.66133815e-16, 1.66533454e-16, 8.88178420e-16, 1.00000000e+00]])
q,r = qr(sp)
print q
r
[[-0.50510571 0.0181599 0.07531349 0.59150958 -0.62368481] [ 0.13921471 -0.40513763 0.84451738 0.24413444 0.20897736] [ 0.53635022 -0.51829708 -0.46188958 0.47703793 -0.05281481] [-0.66103319 -0.49468555 -0.25644088 0.01307464 0.50238278] [ 0.02917284 0.56761612 -0.04488163 0.6023111 0.55871984]]
array([[ 2.90927288, -0.76452754, -3.08539037, 0.77536573, -1.07156322], [-0. , 2.28961296, 1.31005059, -0.44393071, -1.96748764], [-0. , 0. , 1.48340931, -2.65558951, 0.18679631], [ 0. , 0. , 0. , -0.37900614, 0.4507976 ], [-0. , -0. , -0. , -0. , -0.12535448]])
Other Matrix Functions