Numpy tutorials¶

Content¶

Basic
Matrix
Vector Math

Basic¶

In [1]:

# import libraries
import numpy as np
import matplotlib.pyplot as plt

In [2]:

# name it
a1 = np.array([9, 5, 1, 5, 67, 7, 8, 6, 2, 3, 4, 5])

In [3]:

a1

Out[3]:

array([ 9,  5,  1,  5, 67,  7,  8,  6,  2,  3,  4,  5])

Indexing / Slicing¶

In [4]:

a1[2]

Out[4]:

In [5]:

a1[2:]

Out[5]:

array([ 1,  5, 67,  7,  8,  6,  2,  3,  4,  5])

In [6]:

a1[:-4]

Out[6]:

array([ 9,  5,  1,  5, 67,  7,  8,  6])

In [7]:

a1[1:-4]

Out[7]:

array([ 5,  1,  5, 67,  7,  8,  6])

In [8]:

a1[1:]

Out[8]:

array([ 5,  1,  5, 67,  7,  8,  6,  2,  3,  4,  5])

In [9]:

a1[:-1]

Out[9]:

array([ 9,  5,  1,  5, 67,  7,  8,  6,  2,  3,  4])

In [10]:

a1 > 3  # bool

Out[10]:

array([ True,  True, False,  True,  True,  True,  True,  True, False,
       False,  True,  True])

In [11]:

a1[a1 > 3]

Out[11]:

array([ 9,  5,  5, 67,  7,  8,  6,  4,  5])

In [12]:

a1[a1 % 4 == 0]

Out[12]:

array([8, 4])

In [13]:

name = np.array(['Jim', 'Luke', 'Josh', 'Pete'])

In [14]:

# fist_value = lambda s:s[0]
first_letter = np.vectorize(lambda s: s[0])(name) == 'J'

In [15]:

first_letter

Out[15]:

array([ True, False,  True, False])

In [16]:

name[first_letter]

Out[16]:

array(['Jim', 'Josh'], dtype='<U4')

`np.zeros`¶

In [17]:

# a2 = np.zeros(10)  no name for coviences
np.zeros(10)

Out[17]:

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

`random` and `randn`¶

In [18]:

np.random.random(10)  # 0-1

Out[18]:

array([0.5889042 , 0.17449312, 0.33973247, 0.02388802, 0.54856456,
       0.92125803, 0.01959399, 0.84832647, 0.65894694, 0.48249202])

In [19]:

np.random.randn(10)  # -1 ~ 1 normal

Out[19]:

array([ 0.65684522,  0.87369899, -2.12652329, -1.01871432, -2.7057335 ,
       -0.59476432, -1.19795978, -0.17184097,  0.94891405, -1.44042028])

`linspace` and `arange`¶

In [20]:

np.linspace(0, 10, 21)  # 21: number of elements

Out[20]:

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,
        5.5,  6. ,  6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5, 10. ])

In [21]:

np.linspace(0, 10, 20)

Out[21]:

array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])

In [22]:

np.arange(0, 2, 0.02)  # 0.02: number of spacing, step length.

Out[22]:

array([0.  , 0.02, 0.04, 0.06, 0.08, 0.1 , 0.12, 0.14, 0.16, 0.18, 0.2 ,
       0.22, 0.24, 0.26, 0.28, 0.3 , 0.32, 0.34, 0.36, 0.38, 0.4 , 0.42,
       0.44, 0.46, 0.48, 0.5 , 0.52, 0.54, 0.56, 0.58, 0.6 , 0.62, 0.64,
       0.66, 0.68, 0.7 , 0.72, 0.74, 0.76, 0.78, 0.8 , 0.82, 0.84, 0.86,
       0.88, 0.9 , 0.92, 0.94, 0.96, 0.98, 1.  , 1.02, 1.04, 1.06, 1.08,
       1.1 , 1.12, 1.14, 1.16, 1.18, 1.2 , 1.22, 1.24, 1.26, 1.28, 1.3 ,
       1.32, 1.34, 1.36, 1.38, 1.4 , 1.42, 1.44, 1.46, 1.48, 1.5 , 1.52,
       1.54, 1.56, 1.58, 1.6 , 1.62, 1.64, 1.66, 1.68, 1.7 , 1.72, 1.74,
       1.76, 1.78, 1.8 , 1.82, 1.84, 1.86, 1.88, 1.9 , 1.92, 1.94, 1.96,
       1.98])

array operations¶

In [23]:

a1

Out[23]:

array([ 9,  5,  1,  5, 67,  7,  8,  6,  2,  3,  4,  5])

In [24]:

2 * a1

Out[24]:

array([ 18,  10,   2,  10, 134,  14,  16,  12,   4,   6,   8,  10])

In [25]:

1 / a1

Out[25]:

array([0.11111111, 0.2       , 1.        , 0.2       , 0.01492537,
       0.14285714, 0.125     , 0.16666667, 0.5       , 0.33333333,
       0.25      , 0.2       ])

In [ ]:

In [26]:

1 / a1 + 7

Out[26]:

array([7.11111111, 7.2       , 8.        , 7.2       , 7.01492537,
       7.14285714, 7.125     , 7.16666667, 7.5       , 7.33333333,
       7.25      , 7.2       ])

In [27]:

1 / a1 + a1 + 7

Out[27]:

array([16.11111111, 12.2       ,  9.        , 12.2       , 74.01492537,
       14.14285714, 15.125     , 13.16666667,  9.5       , 10.33333333,
       11.25      , 12.2       ])

In [28]:

a1 ** 2

Out[28]:

array([  81,   25,    1,   25, 4489,   49,   64,   36,    4,    9,   16,
         25])

In [29]:

x = np.linspace(0, 1, 100)
y = np.sin(x)

In [30]:

plt.plot(x, y)

Out[30]:

[<matplotlib.lines.Line2D at 0x111dccee0>]

In [31]:

# More math function check the np doc "Mathematical functions"
# logistc funtion
def logistic_function(a, b, t):
    return np.exp(-a + b * t)/(1 + np.exp(-a + b*t))

In [32]:

# let a = 5, b = 0.02
a = 1
b = 1
t = np.arange(0, 100)
y_1 = logistic_function(a, b, t)

In [33]:

plt.plot(t, y_1)

Out[33]:

[<matplotlib.lines.Line2D at 0x111ed0730>]

Calculus and Statistics in np¶

In [34]:

a1 = 3 * np.random.randn(100000) + 13  # mean 13 and std  =3

In [35]:

np.mean(a1)

Out[35]:

13.003654720811985

In [36]:

np.std(a1)

Out[36]:

2.9949197374144263

In [37]:

np.percentile(a1, 80)  # the number of percentiles

Out[37]:

15.533068614461017

Derivative and integral¶

In [38]:

x = np.linspace(0, 4, 100)
y = x ** 2

dydx = np.gradient(y, x)  # x is the spcae\steps
dy2dx2 = np.gradient(np.gradient(y, x), x)

In [39]:

plt.plot(x, y)
plt.plot(x, dydx)
plt.plot(x, dy2dx2)

Out[39]:

[<matplotlib.lines.Line2D at 0x111f2b5e0>]

In [40]:

y_2 = np.cumsum(dydx)  # integral
plt.plot(x, y_2)
plt.plot(x, dydx)

Out[40]:

[<matplotlib.lines.Line2D at 0x111fe86d0>]

In [41]:

y_t1 = np.exp(-x/10) * np.cos(3*x)
plt.plot(x, y_t1)

Out[41]:

[<matplotlib.lines.Line2D at 0x11206d810>]

In [42]:

np.mean(y_t1), np.std(y_t1)

Out[42]:

(-0.02058110687042993, 0.582342218010063)

In [43]:

# for x domain [0.5,2] here * NOT &
y_t1[(x >= 0.5) * (x <= 2)]

Out[43]:

array([-0.00470736, -0.11891828, -0.23047005, -0.33775191, -0.43922852,
       -0.53346186, -0.61913144, -0.69505272, -0.76019331, -0.81368688,
       -0.85484447, -0.88316317, -0.89833196, -0.90023473, -0.88895043,
       -0.86475027, -0.8280923 , -0.77961314, -0.72011726, -0.65056382,
       -0.57205146, -0.48580101, -0.39313675, -0.29546609, -0.1942584 ,
       -0.09102296,  0.01271341,  0.11542887,  0.21562875,  0.31186723,
        0.40276808,  0.48704424,  0.56351595,  0.63112711,  0.6889598 ,
        0.73624653,  0.7723803 ])

In [44]:

y_t1_range = y_t1[(x >= 0.5) * (x <= 2)]

In [45]:

np.mean(y_t1_range), np.std(y_t1_range)

Out[45]:

(-0.2679816289479111, 0.5389335524581526)

In [46]:

np.percentile(y_t1_range, 80)

Out[46]:

0.2926195358453827

In [47]:

dy_t1dx = np.gradient(y_t1)
plt.plot(x, dy_t1dx)

Out[47]:

[<matplotlib.lines.Line2D at 0x1120f9930>]

In [48]:

x[1:][dy_t1dx[1:]*dy_t1dx[:-1] < 0]  # dydx=0

Out[48]:

array([1.05050505, 2.1010101 , 3.15151515])

In [49]:

# sum up 0 to 10000 except for those can be divided by 4 or 7 in one line
# nums_ls = [i for i in range(10000) if i % 4 != 0 & i % 7 != 0]
nums = np.arange(0, 10001, 1)
sum(nums[(nums % 4 != 0)*(nums % 7 != 0)])

Out[49]:

32147142

In [50]:

# Flower petal
theta = np.linspace(0, 2 * np.pi, 1000)
r_theta = [1 + 3/4 * np.sin(3 * _) for _ in theta]
plt.plot(theta, r_theta)

Out[50]:

[<matplotlib.lines.Line2D at 0x111f689a0>]

In [51]:

# polar cordinates
x = r_theta * np.cos(theta)
y = r_theta * np.sin(theta)
plt.plot(x, y)

Out[51]:

[<matplotlib.lines.Line2D at 0x112200bb0>]

Matrix¶

Multi-Dimensional Arrays¶

In [52]:

A_1 = np.array([[1, 2, 3], [6, 7, 8], [4, 5, 9]])
A_1

Out[52]:

array([[1, 2, 3],
       [6, 7, 8],
       [4, 5, 9]])

In [53]:

A_1*2

Out[53]:

array([[ 2,  4,  6],
       [12, 14, 16],
       [ 8, 10, 18]])

In [54]:

2 / A_1

Out[54]:

array([[2.        , 1.        , 0.66666667],
       [0.33333333, 0.28571429, 0.25      ],
       [0.5       , 0.4       , 0.22222222]])

In [55]:

# Booling
A_1 > 5

Out[55]:

array([[False, False, False],
       [ True,  True,  True],
       [False, False,  True]])

In [56]:

A_2 = np.random.randn(3, 3)
A_2

Out[56]:

array([[ 0.73025004, -0.91601807, -0.367925  ],
       [ 0.42103639, -0.38280398, -0.23627246],
       [-0.07653038, -0.58021474, -1.73931773]])

In [57]:

# nesting 很高级
A_2[A_1 > 5]

Out[57]:

array([ 0.42103639, -0.38280398, -0.23627246, -1.73931773])

In [58]:

# Element Indexing
A_1

Out[58]:

array([[1, 2, 3],
       [6, 7, 8],
       [4, 5, 9]])

In [59]:

A_1[:, 0]  # first col

Out[59]:

array([1, 6, 4])

In [60]:

A_1[:, 1]

Out[60]:

array([2, 7, 5])

In [61]:

# just want 7,5 above
A_1[1:, 1]

Out[61]:

array([7, 5])

In [62]:

# 2-d Function
x = np.linspace(0, 10, 1000)
y = np.linspace(0, 10, 1000)
xv, yv = np.meshgrid(x, y)
zv = xv**2 + yv ** 2

In [63]:

plt.contour(xv, yv, zv, levels=30)
plt.colorbar()

Out[63]:

<matplotlib.colorbar.Colorbar at 0x1122e2500>

`ravel()` method¶

Turn any N-Dimensional array to 1-D array

In [64]:

A_1.ravel()

Out[64]:

array([1, 2, 3, 6, 7, 8, 4, 5, 9])

Basic Linear Algebra¶

In [65]:

# Equations A for coefficients matrix and c For constants: Qramer rules
A = np.array([[3, 2, 1], [5, -5, 4], [6, 0, 1]])
c = np.array([4, 3, 0])

In [66]:

np.linalg.solve(A, c)

Out[66]:

array([-0.49056604,  1.26415094,  2.94339623])

In [67]:

# Eigen value4s
matrix_B = np.array([[4, 2, 2], [2, 4, 2], [2, 2, 4]])
np.linalg.eig(matrix_B)

Out[67]:

(array([2., 8., 2.]),
 array([[-0.81649658,  0.57735027, -0.32444284],
        [ 0.40824829,  0.57735027, -0.48666426],
        [ 0.40824829,  0.57735027,  0.81110711]]))

It returns eigen values and eigen vectors

In [ ]:

Vector Math¶

vectorization,vectorized operation or array programming:
- Convert for-loop to one-time computation

In [68]:

# Difference between vectorization and Non-vectorization
# sourcery skip: move-assign-in-block, sum-comprehension
import time
# vectorization
num = 10000
a = np.random.random(num)
b = np.random.random(num)

start = time.time()
c = np.dot(a, b)
end = time.time()
print(c)
print(f"Verctorize time:{round((end -start)*1000,4)} ms")

# Loop version:Non-vectorization
c = 0  # iconic
start = time.time()
for i in range(num):
    c += a[i]*b[i]
end = time.time()
print(c)
print(f"Loop version time:{round((end -start)*1000,4)} ms")

2494.7196350405775
Verctorize time:0.2 ms
2494.7196350405666
Loop version time:4.252 ms

Generate continuous uniform random variables, with a=20, b =20¶

In [69]:

N = 1000
a = 20
b = 30

rand_nums = np.random.uniform(a, b, N)
uniform_rvs = (rand_nums - a) / (b - a)
plt.plot(np.arange(N), uniform_rvs)

Out[69]:

[<matplotlib.lines.Line2D at 0x1125359c0>]

Generate exponential random variables with lambda = 2 by inverse transform method.¶

In [70]:

from numpy import log as ln

In [71]:

lam = 2
mean = 1 / lam

exp_rvs = -mean * ln(uniform_rvs)
plt.plot(np.arange(N), exp_rvs)

Out[71]:

[<matplotlib.lines.Line2D at 0x1125a9ea0>]

Conditional vectorizaion ¶

Generate bernoulli random variables with probability p = 0.3¶

In [72]:

p = 0.3
bernoulli_rvs = [1 if _ < p else 0 for _ in uniform_rvs]
plt.hist(bernoulli_rvs)

Out[72]:

(array([717.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0., 283.]),
 array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
 <BarContainer object of 10 artists>)

In [73]:

# np.where
bernoulli_rvs_where = np.where(uniform_rvs < p, 1, 0)
plt.hist(bernoulli_rvs_where)

Out[73]:

(array([717.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0., 283.]),
 array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
 <BarContainer object of 10 artists>)

Generate random variables from the discrete distribution with the support set ${1,2,3,4}$ and pmf $P[W_i = j] = \frac{j}{10} ,j = 1,2,3, i = 1, ⋯, N.$ ¶

that is:

$\begin{aligned} \mathrm{P[W_i = 1]} &= \frac{1}{10} \\ \mathrm{P[W_i = 2]} &= \frac{2}{10} \\ \mathrm{P[W_i = 3]} &= \frac{3}{10} \\ \mathrm{P[W_i = 4]} &= \frac{4}{10} ,where:\ i = 1 \cdots N \end{aligned}$

Nesting conditional vectorization

In [74]:

discrete_rvs_where = np.where(uniform_rvs < 0.1, 1, np.where(
    uniform_rvs <= 0.3, 2, np.where(uniform_rvs <= 0.6, 3, 4)))
plt.hist(discrete_rvs_where)

Out[74]:

(array([ 95.,   0.,   0., 188.,   0.,   0., 292.,   0.,   0., 425.]),
 array([1. , 1.3, 1.6, 1.9, 2.2, 2.5, 2.8, 3.1, 3.4, 3.7, 4. ]),
 <BarContainer object of 10 artists>)

In [75]:

# # Define the boundaries of the intervals
# bins = [0, 0.1, 0.3, 0.6, 1]

# # Use np.digitize to find the indices of the intervals
# indices = np.digitize(uniform_rvs, bins)

# # The discrete RV values are just the indices + 1
# discrete_rvs = indices + 1

Generate random variables from Poisson, Binomial and Gamma (with the integer shape parameter) distributions¶

Generate poisson random variables¶

In [76]:

def poisson_rvs_gen(lam=2):
    time = 0
    event_count = -1
    mean = 1 / lam

    while True:
        event_t = -mean * ln(np.random.uniform())
        time += event_t
        event_count += 1
        if time > 1:
            break
    return event_count

In [77]:

poisson_rvs = [poisson_rvs_gen(lam=2) for _ in range(N)]
plt.hist(poisson_rvs)

Out[77]:

(array([147., 246., 259., 189.,  89.,  46.,  17.,   4.,   2.,   1.]),
 array([0. , 0.9, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9. ]),
 <BarContainer object of 10 artists>)

Generate binomial rvs from bernoulli distribution with p = 0.3¶

In [78]:

p = 0.3
# N = 1000

In [79]:

binom_rvs = [np.sum([1 if _ < p else 0 for _ in np.random.uniform(0, 1, N)])
             for _ in range(N)]

In [80]:

plt.hist(binom_rvs)

Out[80]:

(array([  7.,  43.,  87., 199., 244., 214., 127.,  56.,  19.,   4.]),
 array([256. , 265.3, 274.6, 283.9, 293.2, 302.5, 311.8, 321.1, 330.4,
        339.7, 349. ]),
 <BarContainer object of 10 artists>)

In [81]:

np.mean(binom_rvs)

Out[81]:

299.747

Generate gamma rvs with alpha = 4, beta = 2¶

In [82]:

# # Original code inverse function
# a = 4
# b = 2
# gamma_rvs = []

# for _ in range(N):
#     r = 0
#     for _ in range(a):
#         E = -b * ln(np.random.uniform())
#         r = r + E
#     gamma_rvs.append(r)

In [83]:

a = 4
b = 2

gamma_rvs = [np.sum([-b * ln(np.random.uniform())
                    for _ in range(a)]) for _ in range(N)]

In [84]:

np.mean(gamma_rvs)

Out[84]:

8.16327265123655

In [85]:

plt.hist(gamma_rvs)

Out[85]:

(array([121., 389., 291., 124.,  52.,  20.,   2.,   0.,   0.,   1.]),
 array([ 0.32080009,  3.92932245,  7.53784482, 11.14636719, 14.75488955,
        18.36341192, 21.97193428, 25.58045665, 29.18897902, 32.79750138,
        36.40602375]),
 <BarContainer object of 10 artists>)

In [ ]:

Numpy tutorials¶

Content¶

Basic¶

Indexing / Slicing¶

np.zeros¶

random and randn¶

linspace and arange¶

array operations¶

Calculus and Statistics in np¶

Derivative and integral¶

Matrix¶

Multi-Dimensional Arrays¶

ravel() method¶

Basic Linear Algebra¶

Vector Math¶

Generate continuous uniform random variables, with a=20, b =20¶

Generate exponential random variables with lambda = 2 by inverse transform method.¶

Conditional vectorizaion¶

Generate bernoulli random variables with probability p = 0.3¶

Generate random variables from the discrete distribution with the support set 1,2,3,4{1,2,3,4} and pmf P[Wi=j]=j10,j=1,2,3,i=1,⋯,N.P[W_i = j] = \frac{j}{10} ,j = 1,2,3, i = 1, ⋯, N.¶

Generate random variables from Poisson, Binomial and Gamma (with the integer shape parameter) distributions¶

Generate poisson random variables¶

Generate binomial rvs from bernoulli distribution with p = 0.3¶

Generate gamma rvs with alpha = 4, beta = 2¶

`np.zeros`¶

`random` and `randn`¶

`linspace` and `arange`¶

`ravel()` method¶

Conditional vectorizaion ¶

Generate random variables from the discrete distribution with the support set ${1,2,3,4}$ and pmf $P[W_i = j] = \frac{j}{10} ,j = 1,2,3, i = 1, ⋯, N.$ ¶