data = [[0.9526, -0.246, -0.8856], [0.5639, 0.2379, 0.9104]] data * 10 data + data import numpy as np data = [[0.9526, -0.246, -0.8856], [0.5639, 0.2379, 0.9104]] nd_data = np.array(data) nd_data nd_data * 10 nd_data + nd_data nd_data.shape nd_data.dtype data1 = [6, 7.5, 8, 0, 1] arr1 = np.array(data1) arr1 data2 = [[1,2,3,4], [5,6,7,8]] arr2 = np.array(data2) arr2 arr2.ndim? arr2.ndim arr2.nbytes arr2.shape arr1.dtype arr2.dtype np.zeros(10) np.zeros((3,6)) np.empty((2,3)) np.empty((2,3,4)) np.empty((3,2)) np.empty((1,2,3)) np.arange(15) type(np.arange(15)) np.asarray(nd_data) np.asarray(data) d = [1,2,3,4] np.asarray([1,2,3,4]) np.asarray(np.array([1,2,3,5])) np.eye(5) np.identity(4) arr1 = np.array([1,2,3], dtype=np.float64) arr2 = np.array([1,2,3], dtype=np.int32) arr1.dtype, arr2.dtype arr = np.array([1,2,3,4,5]) arr.dtype float_arr = arr.astype(np.float64) float_arr float_arr.dtype arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1]) arr arr.astype(np.int32) numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_) numeric_strings numeric_strings.astype(float) # 중간에 문자열로 넣지 않고 그냥 int로 넣었는데도 dtype을 np.string_으로 설정해주니 # 문자열로 변경 numeric_strings = np.array(['1.25', '-9.6', 42], dtype=np.string_) numeric_strings numeric_strings.astype(float) int_array = np.arange(10) int_array calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64) calibers int_array.astype(calibers.dtype) empty_uint32 = np.empty(8, dtype='u4') arr = np.array([[1., 2., 3.], [4., 5., 6.]]) arr arr * arr arr - arr 1 / arr arr # **는 square root of x arr ** 0.5 import math math.sqrt(3) arr = np.arange(10) arr arr[5] arr[5:8] arr[5:8] = 12 arr arr_slice = arr[5:8] arr_slice[1] = 12345 arr arr_slice[:] = 64 arr_slice arr_slice[0:2] arr l = range(10) l_slice = l[5:8] l_slice[1] = 12345 l # Python list는 브로드 캐스팅이 안됨 l_slice[:] = 64 l l_slice l_slice[0] = 64 l_slice # l_slice의 값을 변경해도 원본 l은 영향을 받지 않는다. 복사한 값이기 때문이다. l arr_slice2 = arr[5:8].copy() arr_slice2 arr arr_slice2[:] = 33333 arr arr_slice2 arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]]) arr2d[2] arr2d[0][2] # ,로도 구분 가능 arr2d[0, 2] arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]) arr3d arr3d.ndim arr2d.ndim arr3d[0] old_values = arr3d[0].copy() old_values arr3d[0] = 42 arr3d old_values arr3d[0] = old_values arr3d arr3d[1, 0] arr[1:6] arr2d arr2d[:2] arr2d[:2, 1:] arr2d[:2][1:] arr2d[1, :2] arr2d[2, :1] arr2d[:, :1] arr2d[:2, 1:] = 0 arr2d names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) # Return a sample (or samples) from the "standard normal" distribution. data = np.random.randn? data = np.random.randn data = np.random.randn data = np.random.randn data = np.random.randn(7,4) names data # names에서 보듯이 0, 3번째 열이 True names == 'Bob' # names에서 Bob는 0, 3번째에 존재한다. # 그러므로 0, 3번 로우값은 True가 되어 data의 0, 3번째 로우값이 반환된다. # 이렇게 되면 반복문을 쓸 필요도 없네.. data[names == 'Bob'] names2 = np.array(['Bob', 'Joe', 'Will']) data[names2 == 'Bob'] data[names == 'Bob', 2:] data[names == 'Bob', 3] names != 'Bob' data[-(names == 'Bob')] data[~(names == 'Bob')] mask = (names == 'Bob') | (names == 'Will') mask data[mask] data[mask, 3:] data[data < 0] = 0 data data[names != 'Joe'] = 7 data arr = np.empty((8, 4)) for i in range(8): arr[i] = i arr # 특정한 순서로 로우 선택 arr[[4, 3, 0, 6]] arr[[-3, -5, -7]] np.arange(32) arr = np.arange(32).reshape((8, 4)) arr # 이건 1개만 선택하는 것 arr[[1, 5, 7, 2], [0, 3, 1, 2]] # 1, 5, 7, 2 로우 # 컬럼 열에서 :,로 모든 행을 선택해 주어야 한다. # 0, 3, 1, 2 열을 순서대로 선택 # 이건 열 순서대로 선택해서 보여주기 arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]] # Construct an open mesh from multiple sequences. # 이것을 써야 우리가 예상했던 로우 열과 컬럼의 순서대로 볼 수 있다. np.ix_ arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])] arr = np.arange(15).reshape((3, 5)) arr arr.T arr.transpose() # Returns a view of the array with axes transposed. arr.transpose? arr.T? arr = np.random.randn(6, 3) arr arr.T # Dot product of two arrays. np.dot? np.dot(arr.T, arr) # Matrices are not aligned. # ex) (3, 6) * (6, 3) is available # but (3, 6) * (3, 6) is not avaiable. np.dot(arr.T, arr.T) arr = np.arange(16).reshape((2, 2, 4)) arr arr.transpose((1, 0, 2)) arr.T arr arr.swapaxes(1, 2) arr.swapaxes(1, 1) arr.swapaxes(2, 1) arr.swapaxes(1, 3) arr.swapaxes(2, 2) arr = np.arange(10) np.sqrt(arr) arr np.exp(arr) x = randn(8) y = randn(8) x y np.maximum(x, y) arr = randn(7) * 5 arr np.modf(arr) # Return the fractional and integral parts of an array, element-wise # 파이썬 내장 함수인 divmod의 벡터화 버전이며, modf는 분수를 받아 몫과 나머지를 함께 반환한다. np.modf? points = np.arange(-5, 5, 0.01) len(points) xs, ys = np.meshgrid(points, points) ys len(ys) import matplotlib.pyplot as plt z = np.sqrt( xs ** 2 + ys ** 2) z plt.imshow(z, cmap=plt.cm.gray); plt.colorbar() plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values") # Display an image on the axes. # `~matplotlib.colors.Colormap` plt.imshow? plt.imshow(z, cmap=plt.cm.Blues) # plt.cm = This module provides a large set of colormaps, # functions for registering new colormaps and for getting a colormap by name, # and a mixin class for adding color mapping functionality. plt.cm? plt.colorbar() import matplotlib matplotlib.matplotlib_fname() import matplotlib.pyplot as p p.plot(range(20), range(20)) p.show() import matplotlib.rcsetup as rcsetup print(rcsetup.all_backends) xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5]) yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5]) cond = np.array([True, False, True, True, False]) cond result = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)] result result = np.where(cond, xarr, yarr) result arr = np.random.randn(4, 4) arr np.where(arr > 0, 2, -2) np.where(arr > 0, 2, arr) cond1 = False cond2 = True np.where(cond1 & cond2, 0, np.where(cond1, 1, np.where(cond2, 2, 3))) result = 1 * (cond1 & -cond2) + 2 * (cond2 & -cond1) + 3 * -(cond1 | cond2) result (cond1 & -cond2) 2 * (cond2 & -cond1) 3 * -(cond1 | cond2) arr = np.random.randn(5, 4) arr arr.mean() np.mean(arr) arr.sum() arr.mean(axis=1) # axis 인자를 넘긴 것과 넘기지 않은 것의 결과값이 같다. arr.sum(axis=0) arr.sum(0) arr.sum(axis=1) arr.sum(1) arr.sum? Type: builtin_function_or_method String form: Docstring: a.sum(axis=None, dtype=None, out=None) Return the sum of the array elements over the given axis. Refer to `numpy.sum` for full documentation. See Also -------- numpy.sum : equivalent function arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) arr # Return the cumulative sum of the elements along the given axis. arr.cumsum? np.cumsum? # 세로로 더하기 # 0은 column arr.cumsum(0) # 가로로 곱하기 # 1은 row arr.cumprod(1) arr.cumprod(0) arr = randn(100) (arr > 0).sum() arr bools = np.array([False, False, True, False]) bools.any() bools.all() bools = np.array([True, True, True, True]) bools.any() bools.all() # 0만 아니면 True # 불리언 배열이 아니어도 동작 arr.any() arr.all() arr = np.random.randn(8) arr arr.sort() arr arr = randn(5, 3) arr # row sort arr.sort(1) arr # column sort arr.sort(0) arr # row sort == arr.sort(1) arr.sort() arr large_arr = randn(1000) large_arr large_arr.sort() int(0.05 * len(large_arr)) large_arr[int(0.05 * len(large_arr))] large_arr[50] names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) np.unique(names) ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4]) np.unique(ints) sorted(set(names)) %timeit np.unique(names) %timeit sorted(set(names)) values = np.array([6, 0, 0, 3, 2, 5, 6]) # Test whether each element of a 1-D array is also present in a second array. # Returns a boolean array the same length as 'ar1' that is True # where an element of `ar1` is in `ar2` and False otherwise. # in 1 dictionary? np.in1d? np.in1d(values, [2, 3, 6]) arr = np.arange(10) # add file extension '.npy' np.save('some_array', arr) !ls np.load('some_array.npy') np.savez('array_archive.npz', a=arr, b=arr) arch = np.load('array_archive.npz') arch['b'] arch['a'] a = randn(10) a %%writefile array_ex.txt 2.23342715,-0.37376633,-1.05142871 -0.57247149,-1.35777871,0.28676036 -0.01042671,-0.0211314,-0.72049352 !cat array_ex.txt arr = np.loadtxt('array_ex.txt', delimiter=',') arr x = np.array([[1., 2., 3.], [4., 5., 6.]]) y = np.array([[6., 23.], [-1, 7], [8, 9]]) x x.shape # Understand. 2 * 3 dot 3 * 1 -> 2 * 1 np.ones(3).shape y x.dot(y) # np.dot(x, y) is same 1*6 + 2*-1 + 3*8 #6 + -2 + 24 np.dot(x, np.ones(3)) np.ones(3) 1*1 + 2*1 + 3*1 from numpy.linalg import inv, qr # Core Linear Algebra Tools numpy.linalg? # Inverse of a square matrix inv? # QR decomposition of a matrix qr? X = randn(5, 5) mat = X.T.dot(X) mat inv(mat) # 원래 행렬 * 역행렬 = 단위 행렬 # 선형대수에 대한 기본적인 지식이 있어야 이해가 된다. mat.dot(inv(mat)) q, r = qr(mat) r q samples = np.random.normal(size=(4, 4)) samples from random import normalvariate N = 1000000 %timeit samples = [normalvariate(0, 1) for i in xrange(N)] %timeit np.random.normal(size=N) import random position = 0 walk = [position] steps = 1000 for i in xrange(steps): step = 1 if random.randint(0, 1) else -1 position += step walk.append(position) from matplotlib.pyplot import plot as plt plt(walk) walk random.randint(0, 1) nsteps = 1000 draws = np.random.randint(0, 2, size=nsteps) draws steps = np.where(draws > 0, 1, -1) steps walk = steps.cumsum() walk walk.min(), walk.max() (np.abs(walk) >= 10).argmax() walk.ndim walk.shape # Error! 당연히 axis가 1개 밖에 없는 배열이니까 (np.abs(walk) >= 10).argmax(1) nwalks = 5000 nsteps = 1000 # randint는 0부터 2까지(포함 안됨)의 숫자를 랜덤으로 생성. 즉 0과 1만 생성 # size에 넘기는 것은 행, 렬을 튜플 형태로 넘김 draws = np.random.randint(0, 2, size=(nwalks, nsteps)) np.random.randint(0, 2, size=(10,3)) # size에 인자 1개만 넘어갈 시 행은 1개로 고정 지정되고 1개 인자값은 컬럼값으로 설정 됨. np.random.randint(0, 2, size=5) draws.ndim draws.shape steps = np.where(draws > 0, 1, -1) walks = steps.cumsum(1) walks walks.max(), walks.min() hits30 = (np.abs(walks) >= 30).any(1) hits30 len(hits30) hits30.sum() crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1) crossing_times.mean() steps = np.random.normal(loc=0, scale=0.25, size=(nwalks, nsteps)) steps walks walks.shape walks.ndim np.abs(walks)