import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
1D¶
2+D¶
np.linspace¶
arithmetics interval = (end - start)/(n -1).
For example, (10-1)/(4-1) = 3
np.arange¶
any dimension¶
np.arange(some_size).reshape(some_shape)
np.random¶
Inspecting array¶
np.copy(ar) | copies to new memory, i.e. deep copy.
In comparison,ar2 = ar1 is a shallow copy because changing ar1 will change ar2, and vice versa! Because shallow copy is just giving the original array another name, nothing else.
ar.sort() | sort
ar.sort(axis=0)
two_d_ar.flatten() | Flattens 2D aray to 1D
ar.T | Transposes ar (rows become columns and vice versa)
ar.reshape(3,4) | Reshapes ar to 3 rows, 4 columns without changing data
ar.resize((5,6)) | Changes ar shape to 5x6 and fills new values with 0
ar.view(dtype) | Creates view of ar elements with type dtype
Combining/splitting¶
Adding/removing Elements¶
Indexing/slicing/subsetting¶
ar[5] | the element at index 5
ar[2,5] | element on index [2][5]
ar[1]=4 | Assigns aray element on index 1 the value 4
ar[0:3] | Returns the elements at indices 0,1,2 (On a 2D aray: returns rows 0,1,2)
ar[0:3,4] | Returns the elements on rows 0,1,2 at column 4
ar[:2] | Returns the elements at indices 0,1 (On a 2D aray: returns rows 0,1)
ar[:,1] | Returns the elements at index 1 on all rows
ar\<5 | Returns an aray with boolean values
(ar1\<3) & (ar2\>5) | Returns an aray with boolean values
~ar | Inverts a boolean aray
ar[ar<5] | boolean slicing
#data1.txt just have 6 numbers, separated with spaces, and a line breaker
ar1 = np.loadtxt('./data/data1.txt')
ar1
array([[1., 2., 3.], [4., 5., 6.]])
#save txt file
np.savetxt('./data/ar1.txt',ar1,delimiter=' ')
#re-import it to verfiy it is working
ar1 = np.loadtxt('./data/ar1.txt')
ar1
array([[1., 2., 3.], [4., 5., 6.]])
df = pd.read_csv('./data/data2.csv', header=None)
df
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 6 | 2 | 3 | 1 |
1 | 4 | 5 | 4 | 3 |
numpy_matrix = df.values
numpy_matrix
array([[6, 2, 3, 1], [4, 5, 4, 3]], dtype=int64)
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df
A | B | |
---|---|---|
0 | 1 | 3 |
1 | 2 | 4 |
ar = df.values
ar
array([[1, 3], [2, 4]], dtype=int64)
ar = np.array([[1,100],[2,1]])
ar
array([[ 1, 100], [ 2, 1]])
l = ar.tolist()
l
[[1, 100], [2, 1]]
type(l)
list
np.array([1,7,90,1000.])
array([ 1., 7., 90., 1000.])
np.zeros(100)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.ones(10)
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
np.full((1),1)
array([1])
np.zeros((10,10))
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
np.ones((10,10))
array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
np.full((2,3),1000.1)
array([[1000.1, 1000.1, 1000.1], [1000.1, 1000.1, 1000.1]])
np.eye(3)
array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
np.array([(1,2),(3,4)])
array([[1, 2], [3, 4]])
np.array([(1,7),(6,9)]).shape
(2, 2)
np.array([(1,2),(3,0)])
array([[1, 2], [3, 0]])
np.array([[1,2,3,4],[1,4,3]]).shape
(2,)
np.array([[1,2,3,4],[5,6,7,8]]).shape
(2, 4)
np.array([[1,2,3,4]])
array([[1, 2, 3, 4]])
np.array([(1,2),(3,4)])
array([[1, 2], [3, 4]])
# nested lists result in multi-dimensional arrays
np.array([range(i, i + 3) for i in [1, 3, 1]])
array([[1, 2, 3], [3, 4, 5], [1, 2, 3]])
#np.linspace arithmetics interval = (end - start)/(n -1).
# For example, (10-1)/(4-1) = 3
np.linspace(1,2,4)
array([1. , 1.33333333, 1.66666667, 2. ])
#interval = (11-1)/(6-1)= 2
np.linspace(1,11,6)
array([ 1., 3., 5., 7., 9., 11.])
# np.arange is similar to np.linspace with following 3 differences:
# 1. end point is not included; 2. you specify the interval; 3.np.linspace returns array of floats whereas np,.arange returns integers
# in general, I prefer np.arange instead of np.linspace
np.arange(0,10,3)
array([0, 3, 6, 9])
np.random.rand(2,3) # 2X3 array of random floats between 0–1
array([[0.57256735, 0.50921722, 0.11257014], [0.17108578, 0.62249645, 0.52027419]])
np.random.rand(2,3)*100 # 2X3 array of random floats between 0–100
array([[80.97626817, 84.65068553, 28.65173649], [62.53292734, 71.53976566, 47.46193707]])
np.random.randint(2,100)
66
ar = np.random.randn(10)*10+1
plt.hist(ar, bins = np.arange(-30,31,3))
(array([0., 0., 0., 0., 0., 0., 0., 4., 2., 2., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.]), array([-30, -27, -24, -21, -18, -15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30]), <a list of 20 Patch objects>)
ar.std() # Returns the standard deviation of the array elements along given axis.
5.758782594543135
ar = np.random.randn(1000)*10+1
plt.hist(ar, bins = np.arange(-30,31,3))
(array([ 1., 5., 8., 20., 30., 33., 72., 82., 93., 118., 120., 106., 117., 81., 52., 28., 13., 15., 5., 1.]), array([-30, -27, -24, -21, -18, -15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30]), <a list of 20 Patch objects>)
ar.std()
9.75399945467625
ar = np.random.randint(0,10, size=(2,2))
ar
array([[2, 1], [0, 7]])
ar.size
4
ar.shape
(2, 2)
ar.ndim
2
ar.dtype
dtype('int32')
ar.astype('float')
array([[2., 1.], [0., 7.]])
np.copy(ar) | copies to new memory
ar.sort() | sort
ar.sort(axis=0)
two_d_ar.flatten() | Flattens 2D aray to 1D
ar.T | Transposes ar (rows become columns and vice versa)
ar.reshape(3,4) | Reshapes ar to 3 rows, 4 columns without changing data
ar.resize((5,6)) | Changes ar shape to 5x6 and fills new values with 0
ar.view(dtype) | Creates view of ar elements with type dtype
ar
array([[2, 1], [0, 7]])
b = ar.copy()
b
array([[2, 1], [0, 7]])
c =ar
c
array([[1000, 1], [ 0, 7]])
ar[0,0]=1000
ar
array([[1000, 1], [ 0, 7]])
# c is changed as it is a shallow copy of ar
c
array([[1000, 1], [ 0, 7]])
b = ar.copy()
ar[0,0] = 0
ar
array([[0, 1], [0, 7]])
b
array([[1000, 1], [ 0, 7]])
c
array([[0, 1], [0, 7]])
Changing the shallow copy will change the original array
c[0,1] = 999
c
array([[ 0, 999], [ 0, 7]])
ar #note that its [0,1] element is also changed!
array([[ 0, 999], [ 0, 7]])
a.sort(axis=-1, kind='quicksort', order=None)
dr = np.concatenate((ar,ar,ar,ar),axis=0)
np.concatenate((dr,dr,dr,dr), axis=1)
array([[10, 10, 10, 10, 10, 10, 10, 10], [ 8, 6, 8, 6, 8, 6, 8, 6], [10, 10, 10, 10, 10, 10, 10, 10], [ 8, 6, 8, 6, 8, 6, 8, 6], [10, 10, 10, 10, 10, 10, 10, 10], [ 8, 6, 8, 6, 8, 6, 8, 6], [10, 10, 10, 10, 10, 10, 10, 10], [ 8, 6, 8, 6, 8, 6, 8, 6]])
np.split(ar,2)
[array([[10, 10]]), array([[8, 6]])]
np.hsplit(ar,2)
[array([[10], [ 8]]), array([[10], [ 6]])]
np.concatenate(np.hsplit(ar,2), axis=1)
array([[10, 10], [ 8, 6]])
ar
array([[10, 10], [ 8, 6]])
#if you don't specify axis, then the result will be flattened
np.append(ar,[5,5])
array([10, 10, 8, 6, 5, 5])
#if you specify axis, then you must provide exactly the same shape of array(s)
np.append(ar,[[5,5],[5,5]], axis=0)
array([[10, 10], [ 8, 6], [ 5, 5], [ 5, 5]])
If you specify axis, then you must provide exactly the same shape of array(s)
np.append(ar,[[5,5],[5,5]], axis=1)
array([[10, 10, 5, 5], [ 8, 6, 5, 5]])
a = np.array([[1, 1], [2, 2], [3, 3]])
a
array([[1, 1], [2, 2], [3, 3]])
#np.insert(arr, obj, values, axis=None)
# obj=: Object that defines the index or indices before which `values` is inserted.
# in this example, 0 means the 0th index
np.insert(a, 0, 5)
array([5, 1, 1, 2, 2, 3, 3])
np.insert(a, -1, 5)
array([1, 1, 2, 2, 3, 5, 3])
np.array([1,2,3]).shape
(3,)
np.array([[1],[2],[3]]).shape
(3, 1)
# in this example, [0] means to insert the 1D array as the very first column
np.insert(a, [0],[[1],[2],[3]], axis=1)
array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
# in this example, [1] means to insert the 1D array as the second column
np.insert(a, [1], [[1],[2],[3]], axis=1)
array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
a
array([[1, 1], [2, 2], [3, 3]])
b = a.flatten()
b
array([1, 1, 2, 2, 3, 3])
b
array([1, 1, 2, 2, 3, 3])
np.insert(b, slice(2, 4), [5, 6])
array([1, 1, 5, 2, 6, 2, 3, 3])
np.arange(8)
array([0, 1, 2, 3, 4, 5, 6, 7])
x = np.arange(8).reshape(2, 4)
x
array([[0, 1, 2, 3], [4, 5, 6, 7]])
idx = (1, 3) #this is row No. 2, column No. 3
np.insert(x, idx, 999, axis=1)
array([[ 0, 999, 1, 2, 999, 3], [ 4, 999, 5, 6, 999, 7]])
#array of 10 zeros
np.zeros(10, dtype=int)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
np.zeros((2,3),dtype=float)
array([[0., 0., 0.], [0., 0., 0.]])
np.full((10,10),1)
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
np.ones((10,10), dtype=int)
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
X = np.ones((2,1), dtype=int)
X
array([[1], [1]])
Y = np.ones((1,2), dtype=int)
Y
array([[1, 1]])
X - Y
array([[0, 0], [0, 0]])
X = np.full((2,1),2)
X
array([[2], [2]])
Y = np.full((1,2),1)
Y
array([[1, 1]])
X - Y
array([[1, 1], [1, 1]])
X = np. array([[1],[2]])
X
array([[1], [2]])
Y = np.array([[2, 1]])
Y
array([[2, 1]])
X -Y
array([[-1, 0], [ 0, 1]])
If we think of each row is a point on 2-D space (like a sheet of paper), if we want to get its distance from all other points, including itself,which we called X here,
then we reshape a copy of it into 3-D space, which we call Y. So when we take the difference between them, X will be duplicated along the 3rd dimension.
The trick is that we do not reshape Y in (2,2,1). Rather, we reshape Y in (2,1,2).
In the first 2D space, X is (2,2) whereas Y is (2,1). So Y has to duplicate itself to become (2,2).
In the last dimension, X has to duplicate itself for Y.
X = np.array([[1,0],
[2,1]])
X
array([[1, 0], [2, 1]])
Y = X.reshape(2,1,2)
Y
array([[[1, 0]], [[2, 1]]])
#[[0,0] ,[-1,-1]] = [[1, 0]] - [[1, 0],[2, 1]]
#[[ 1, 1],[ 0, 0]]] = [[2, 1]] - [[1, 0],[2, 1]]
Y-X
array([[[ 0, 0], [-1, -1]], [[ 1, 1], [ 0, 0]]])
np.array([[1, 0]]) - np.array([[1, 0],[2, 1]])
array([[ 0, 0], [-1, -1]])
np.array([[2, 1]]) - np.array([[1, 0],[2, 1]])
array([[1, 1], [0, 0]])
np.hstack((np.array([[1, 0]]) - np.array([[1, 0],[2, 1]]), np.array([[2, 1]])) - np.array([[1, 0],[2, 1]]) )
array([array([[-1, -1], [-2, -2]]), array([[2, 1]]), array([[-2, -2], [-3, -3]]), array([[1, 0]])], dtype=object)