import numpy as np
python内置对象list是一维数组,而numpy提供了n维数组及其运算,其数据类型是ndarray,运算法则依然遵循list方式,对于多维数组、矢量以及矩阵运算非常方便。
#将list转换为ndarray
np.array([[1,2], [3,4]])
array([[1, 2], [3, 4]])
#生成一维序列,再转换为ndarray
np.arange(25).reshape(5,5)
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19], [20, 21, 22, 23, 24]])
#也可以随机生成多维数组
np.random.rand(3,4)
#randn生成的数据符合正太分布
np.random.randn(4,4)
array([[-0.17294533, 0.57664369, 0.90030628, -1.50687522], [ 0.44645627, -0.007905 , -0.52326875, -0.60296276], [ 0.90004056, 0.76595149, -0.51941328, 0.40380163], [-0.5386898 , 1.68159334, -0.07916927, -1.28261209]])
#区间采样:[start, end]
np.random.uniform(10, 30, 100)
array([12.0695157 , 15.28852086, 28.16887606, 23.78457455, 19.43228899, 19.4356459 , 29.21877117, 23.19956387, 19.72328742, 18.5866164 , 23.65236814, 22.90528204, 14.61641437, 19.12403056, 18.71734194, 21.86738754, 12.61084045, 16.48857546, 10.13327166, 29.18626043, 13.52597009, 25.3757283 , 15.02568918, 17.58892082, 29.02929713, 19.31601472, 18.70991889, 27.60091717, 17.90855395, 22.32056116, 29.66164178, 15.80899303, 13.42051403, 21.63006552, 27.3575489 , 19.15324054, 11.6461633 , 15.2618994 , 17.25711308, 11.15194372, 24.7998248 , 20.68732859, 19.61350829, 13.52628516, 20.93910938, 18.22947301, 22.55097367, 19.83017006, 28.02903298, 21.88747298, 25.62843102, 25.9743397 , 28.41009972, 19.96665695, 12.90643362, 10.53082376, 25.83417207, 14.47090988, 12.30504558, 25.61192155, 29.43102168, 17.16377112, 25.98836525, 10.56868229, 19.95139715, 10.80447219, 18.29030416, 16.87592569, 25.22108043, 25.97517119, 13.60687878, 15.40081725, 18.56386783, 29.06878897, 25.60131042, 18.75444808, 13.6712598 , 20.53491322, 20.62849974, 24.44350913, 22.15870654, 25.66238923, 14.30747784, 20.37187352, 24.98596286, 18.57617349, 10.26463449, 11.84077318, 23.86112526, 11.08118911, 26.51167248, 19.01538728, 18.25704808, 10.36148126, 10.69855404, 18.29660878, 14.1800761 , 27.21586491, 20.89498324, 13.51931409])
#全为1的数组
np.ones((2,2))
array([[1., 1.], [1., 1.]])
#全为0的数组
np.zeros((2,2))
array([[0., 0.], [0., 0.]])
#对角矩阵
np.eye(5, 5, 1)
array([[0., 1., 0., 0., 0.], [0., 0., 1., 0., 0.], [0., 0., 0., 1., 0.], [0., 0., 0., 0., 1.], [0., 0., 0., 0., 0.]])
#获取对角元素
np.diag(np.arange(25).reshape(5,5))
array([ 0, 6, 12, 18, 24])
#numpy提供了直接加载cvs格式数据的接口。delimiter是指定分隔符,usecols指定选择数据的列
filename = 'ex1data2.txt'
np.loadtxt(filename, delimiter=',', usecols=(0, 2))
array([[ 2104., 399900.], [ 1600., 329900.], [ 2400., 369000.], [ 1416., 232000.], [ 3000., 539900.], [ 1985., 299900.], [ 1534., 314900.], [ 1427., 198999.], [ 1380., 212000.], [ 1494., 242500.], [ 1940., 239999.], [ 2000., 347000.], [ 1890., 329999.], [ 4478., 699900.], [ 1268., 259900.], [ 2300., 449900.], [ 1320., 299900.], [ 1236., 199900.], [ 2609., 499998.], [ 3031., 599000.], [ 1767., 252900.], [ 1888., 255000.], [ 1604., 242900.], [ 1962., 259900.], [ 3890., 573900.], [ 1100., 249900.], [ 1458., 464500.], [ 2526., 469000.], [ 2200., 475000.], [ 2637., 299900.], [ 1839., 349900.], [ 1000., 169900.], [ 2040., 314900.], [ 3137., 579900.], [ 1811., 285900.], [ 1437., 249900.], [ 1239., 229900.], [ 2132., 345000.], [ 4215., 549000.], [ 2162., 287000.], [ 1664., 368500.], [ 2238., 329900.], [ 2567., 314000.], [ 1200., 299000.], [ 852., 179900.], [ 1852., 299900.], [ 1203., 239500.]])
#切片,同样适用于dnarray,list切片语法为[start : end : step],应用到多维则是[start : end : step, start : end : step, ……]
dnarr = np.arange(25).reshape(5,5)
#选取头两列、所有行的数据
dnarr[:, :2]
array([[ 0, 1], [ 5, 6], [10, 11], [15, 16], [20, 21]])
#坐标为数组
x = [1, 2, 3, 4]
y = [0, 1, 0, 1]
dnarr[x, y]
array([ 5, 11, 15, 21])
#广播(Broadcast)是 numpy 对不同形状(shape)的数组进行数值计算的方式,当运算中的 2 个数组的形状不同时,numpy 将自动触发广播机制
a = np.array([[ 0, 0, 0],
[10,10,10],
[20,20,20],
[30,30,30]])
b = np.array([1,2,3])
a + b
array([[ 1, 2, 3], [11, 12, 13], [21, 22, 23], [31, 32, 33]])
#翻转矩阵
a.T
array([[ 0, 10, 20, 30], [ 0, 10, 20, 30], [ 0, 10, 20, 30]])
#数组的运算,对数组中的所有元素进行计算,非常方便
a = np.random.rand(3,3)
b = np.random.rand(3,3)
a + b
a - b
a * b
a / b
a * 2
a + 2
array([[2.37861674, 2.24048424, 2.06956704], [2.21845504, 2.45944021, 2.56521539], [2.72539798, 2.51533669, 2.7490997 ]])
#统计函数
#按轴统计最大最小值
np.amax(a, 1)
np.amin(a, 0)
#最大最小差值
np.ptp(a, 0)
#中位数
np.median(a, 1)
#平均值
np.mean(a, 1)
#方差
np.var(a, 1)
#标准差
np.std(a, 1)
array([0.12640543, 0.14510723, 0.10505695])
#矩阵
import numpy.matlib
numpy.matlib.rand(3,3)
matrix([[0.51150901, 0.0553782 , 0.99089512], [0.51719364, 0.00331011, 0.08076412], [0.66316065, 0.94387314, 0.30780752]])