data = [[0.9526, -0.246, -0.8856],
        [0.5639, 0.2379, 0.9104]]

data * 10

data + data

import numpy as np

data = [[0.9526, -0.246, -0.8856],
        [0.5639, 0.2379, 0.9104]]

nd_data = np.array(data)

nd_data

nd_data * 10

nd_data + nd_data

nd_data.shape

nd_data.dtype

data1 = [6, 7.5, 8, 0, 1]

arr1 = np.array(data1)

arr1

data2 = [[1,2,3,4], [5,6,7,8]]

arr2 = np.array(data2)

arr2

arr2.ndim?

arr2.ndim

arr2.nbytes

arr2.shape

arr1.dtype

arr2.dtype

np.zeros(10)

np.zeros((3,6))

np.empty((2,3))

np.empty((2,3,4))

np.empty((3,2))

np.empty((1,2,3))

np.arange(15)

type(np.arange(15))

np.asarray(nd_data)

np.asarray(data)

d = [1,2,3,4]

np.asarray([1,2,3,4])

np.asarray(np.array([1,2,3,5]))

np.eye(5)

np.identity(4)

arr1 = np.array([1,2,3], dtype=np.float64)

arr2 = np.array([1,2,3], dtype=np.int32)

arr1.dtype, arr2.dtype

arr = np.array([1,2,3,4,5])

arr.dtype

float_arr = arr.astype(np.float64)

float_arr

float_arr.dtype

arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])

arr

arr.astype(np.int32)

numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)

numeric_strings

numeric_strings.astype(float)

# 중간에 문자열로 넣지 않고 그냥 int로 넣었는데도 dtype을 np.string_으로 설정해주니 
# 문자열로 변경
numeric_strings = np.array(['1.25', '-9.6', 42], dtype=np.string_)

numeric_strings

numeric_strings.astype(float)

int_array = np.arange(10)

int_array

calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)

calibers

int_array.astype(calibers.dtype)

empty_uint32 = np.empty(8, dtype='u4')

arr = np.array([[1., 2., 3.], [4., 5., 6.]])

arr

arr * arr

arr - arr

1 / arr

arr

# **는 square root of x
arr ** 0.5

import math
math.sqrt(3)

arr = np.arange(10)

arr

arr[5]

arr[5:8]

arr[5:8] = 12

arr

arr_slice = arr[5:8]

arr_slice[1] = 12345

arr

arr_slice[:] = 64

arr_slice

arr_slice[0:2]

arr

l = range(10)

l_slice = l[5:8]

l_slice[1] = 12345

l

# Python list는 브로드 캐스팅이 안됨
l_slice[:] = 64

l

l_slice

l_slice[0] = 64

l_slice

# l_slice의 값을 변경해도 원본 l은 영향을 받지 않는다. 복사한 값이기 때문이다.
l

arr_slice2 = arr[5:8].copy()

arr_slice2

arr

arr_slice2[:] = 33333

arr

arr_slice2

arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]])

arr2d[2]

arr2d[0][2]

# ,로도 구분 가능
arr2d[0, 2]

arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

arr3d

arr3d.ndim

arr2d.ndim

arr3d[0]

old_values = arr3d[0].copy()

old_values

arr3d[0] = 42

arr3d

old_values

arr3d[0] = old_values

arr3d

arr3d[1, 0]

arr[1:6]

arr2d

arr2d[:2]

arr2d[:2, 1:]

arr2d[:2][1:]

arr2d[1, :2]

arr2d[2, :1]

arr2d[:, :1]

arr2d[:2, 1:] = 0

arr2d

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

# Return a sample (or samples) from the "standard normal" distribution.
data = np.random.randn?

data = np.random.randn

data = np.random.randn

data = np.random.randn

data = np.random.randn(7,4)

names

data

# names에서 보듯이 0, 3번째 열이 True
names == 'Bob'

# names에서 Bob는 0, 3번째에 존재한다.
# 그러므로 0, 3번 로우값은 True가 되어 data의 0, 3번째 로우값이 반환된다.
# 이렇게 되면 반복문을 쓸 필요도 없네..
data[names == 'Bob']

names2 = np.array(['Bob', 'Joe', 'Will'])

data[names2 == 'Bob']

data[names == 'Bob', 2:]

data[names == 'Bob', 3]

names != 'Bob'

data[-(names == 'Bob')]

data[~(names == 'Bob')]

mask = (names == 'Bob') | (names == 'Will')

mask

data[mask]

data[mask, 3:]

data[data < 0] = 0

data

data[names != 'Joe'] = 7

data

arr = np.empty((8, 4))

for i in range(8):
    arr[i] = i

arr

# 특정한 순서로 로우 선택
arr[[4, 3, 0, 6]]

arr[[-3, -5, -7]]

np.arange(32)

arr = np.arange(32).reshape((8, 4))

arr

# 이건 1개만 선택하는 것
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

# 1, 5, 7, 2 로우
# 컬럼 열에서 :,로 모든 행을 선택해 주어야 한다.
# 0, 3, 1, 2 열을 순서대로 선택
# 이건 열 순서대로 선택해서 보여주기
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

# Construct an open mesh from multiple sequences.
# 이것을 써야 우리가 예상했던 로우 열과 컬럼의 순서대로 볼 수 있다.
np.ix_

arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])]

arr = np.arange(15).reshape((3, 5))

arr

arr.T

arr.transpose()

# Returns a view of the array with axes transposed.
arr.transpose?

arr.T?

arr = np.random.randn(6, 3)

arr

arr.T

# Dot product of two arrays.
np.dot?

np.dot(arr.T, arr)

# Matrices are not aligned.
# ex) (3, 6) * (6, 3) is available
# but (3, 6) * (3, 6) is not avaiable.
np.dot(arr.T, arr.T)

arr = np.arange(16).reshape((2, 2, 4))

arr

arr.transpose((1, 0, 2))

arr.T

arr

arr.swapaxes(1, 2)

arr.swapaxes(1, 1)

arr.swapaxes(2, 1)

arr.swapaxes(1, 3)

arr.swapaxes(2, 2)

arr = np.arange(10)

np.sqrt(arr)

arr

np.exp(arr)

x = randn(8)

y = randn(8)

x

y

np.maximum(x, y)

arr = randn(7) * 5

arr

np.modf(arr)

# Return the fractional and integral parts of an array, element-wise
# 파이썬 내장 함수인 divmod의 벡터화 버전이며, modf는 분수를 받아 몫과 나머지를 함께 반환한다.
np.modf?

points = np.arange(-5, 5, 0.01)

len(points)

xs, ys = np.meshgrid(points, points)

ys

len(ys)

import matplotlib.pyplot as plt

z = np.sqrt( xs ** 2 + ys ** 2)

z

plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

# Display an image on the axes.
# `~matplotlib.colors.Colormap`
plt.imshow?

plt.imshow(z, cmap=plt.cm.Blues)
# plt.cm = This module provides a large set of colormaps, 
# functions for registering new colormaps and for getting a colormap by name,
# and a mixin class for adding color mapping functionality.
plt.cm?

plt.colorbar()

import matplotlib

matplotlib.matplotlib_fname()

import matplotlib.pyplot as p

p.plot(range(20), range(20))

p.show()

import matplotlib.rcsetup as rcsetup
print(rcsetup.all_backends)

xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])

yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])

cond = np.array([True, False, True, True, False])

cond

result = [(x if c else y)
          for x, y, c in zip(xarr, yarr, cond)]

result

result = np.where(cond, xarr, yarr)

result

arr = np.random.randn(4, 4)

arr

np.where(arr > 0, 2, -2)

np.where(arr > 0, 2, arr)

cond1 = False
cond2 = True

np.where(cond1 & cond2, 0,
         np.where(cond1, 1, 
                  np.where(cond2, 2, 3)))

result = 1 * (cond1 & -cond2) + 2 * (cond2 & -cond1) + 3 * -(cond1 | cond2)

result

(cond1 & -cond2)

2 * (cond2 & -cond1)

3 * -(cond1 | cond2)

arr = np.random.randn(5, 4)

arr

arr.mean()

np.mean(arr)

arr.sum()

arr.mean(axis=1)

# axis 인자를 넘긴 것과 넘기지 않은 것의 결과값이 같다.
arr.sum(axis=0)

arr.sum(0)

arr.sum(axis=1)

arr.sum(1)

arr.sum?
Type:        builtin_function_or_method
String form: <built-in method sum of numpy.ndarray object at 0x102440380>
Docstring:
a.sum(axis=None, dtype=None, out=None)

Return the sum of the array elements over the given axis.

Refer to `numpy.sum` for full documentation.

See Also
--------
numpy.sum : equivalent function
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])

arr

# Return the cumulative sum of the elements along the given axis.
arr.cumsum?
np.cumsum?

# 세로로 더하기
# 0은 column
arr.cumsum(0)

# 가로로 곱하기
# 1은 row
arr.cumprod(1)

arr.cumprod(0)

arr = randn(100)

(arr > 0).sum()

arr

bools = np.array([False, False, True, False])

bools.any()

bools.all()

bools = np.array([True, True, True, True])

bools.any()

bools.all()

# 0만 아니면 True
# 불리언 배열이 아니어도 동작
arr.any()

arr.all()

arr = np.random.randn(8)

arr

arr.sort()

arr

arr = randn(5, 3)

arr

# row sort
arr.sort(1)

arr

# column sort
arr.sort(0)

arr

# row sort == arr.sort(1)
arr.sort()

arr

large_arr = randn(1000)

large_arr

large_arr.sort()

int(0.05 * len(large_arr))

large_arr[int(0.05 * len(large_arr))]

large_arr[50]

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

np.unique(names)

ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])

np.unique(ints)

sorted(set(names))

%timeit np.unique(names)

%timeit sorted(set(names))

values = np.array([6, 0, 0, 3, 2, 5, 6])

# Test whether each element of a 1-D array is also present in a second array.
# Returns a boolean array the same length as 'ar1' that is True
# where an element of `ar1` is in `ar2` and False otherwise.
# in 1 dictionary?
np.in1d?

np.in1d(values, [2, 3, 6])

arr = np.arange(10)

# add file extension '.npy'
np.save('some_array', arr)

!ls

np.load('some_array.npy')

np.savez('array_archive.npz', a=arr, b=arr)

arch = np.load('array_archive.npz')

arch['b']

arch['a']

a = randn(10)

a

%%writefile array_ex.txt
2.23342715,-0.37376633,-1.05142871
-0.57247149,-1.35777871,0.28676036
-0.01042671,-0.0211314,-0.72049352

!cat array_ex.txt

arr = np.loadtxt('array_ex.txt', delimiter=',')

arr

x = np.array([[1., 2., 3.], [4., 5., 6.]])

y = np.array([[6., 23.], [-1, 7], [8, 9]])

x

x.shape

# Understand. 2 * 3 dot 3 * 1 -> 2 * 1
np.ones(3).shape

y

x.dot(y) # np.dot(x, y) is same

1*6 + 2*-1 + 3*8
#6 + -2 + 24

np.dot(x, np.ones(3))

np.ones(3)

1*1 + 2*1 + 3*1

from numpy.linalg import inv, qr

# Core Linear Algebra Tools
numpy.linalg?

# Inverse of a square matrix
inv?

# QR decomposition of a matrix
qr?

X = randn(5, 5)

mat = X.T.dot(X)

mat

inv(mat)

# 원래 행렬 * 역행렬 = 단위 행렬
# 선형대수에 대한 기본적인 지식이 있어야 이해가 된다.
mat.dot(inv(mat))

q, r = qr(mat)

r

q

samples = np.random.normal(size=(4, 4))

samples

from random import normalvariate

N = 1000000

%timeit samples = [normalvariate(0, 1) for i in xrange(N)]

%timeit np.random.normal(size=N)

import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)

from matplotlib.pyplot import plot as plt

plt(walk)

walk

random.randint(0, 1)

nsteps = 1000

draws = np.random.randint(0, 2, size=nsteps)

draws

steps = np.where(draws > 0, 1, -1)

steps

walk = steps.cumsum()

walk

walk.min(), walk.max()

(np.abs(walk) >= 10).argmax()

walk.ndim

walk.shape

# Error! 당연히 axis가 1개 밖에 없는 배열이니까
(np.abs(walk) >= 10).argmax(1)

nwalks = 5000

nsteps = 1000

# randint는 0부터 2까지(포함 안됨)의 숫자를 랜덤으로 생성. 즉 0과 1만 생성
# size에 넘기는 것은 행, 렬을 튜플 형태로 넘김
draws = np.random.randint(0, 2, size=(nwalks, nsteps))

np.random.randint(0, 2, size=(10,3))

# size에 인자 1개만 넘어갈 시 행은 1개로 고정 지정되고 1개 인자값은 컬럼값으로 설정 됨.
np.random.randint(0, 2, size=5)

draws.ndim

draws.shape

steps = np.where(draws > 0, 1, -1)

walks = steps.cumsum(1)

walks

walks.max(), walks.min()

hits30 = (np.abs(walks) >= 30).any(1)

hits30

len(hits30)

hits30.sum()

crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)

crossing_times.mean()

steps = np.random.normal(loc=0, scale=0.25, size=(nwalks, nsteps))

steps

walks

walks.shape

walks.ndim

np.abs(walks)