#!/usr/bin/env python
# coding: utf-8
# ## an array looks like a Python list that has different shapes
# - That is why you see the square brackets
# - Array is just a group of items of the same type
# In[1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
#
#
# ## [Importing into array /exporting array](#Importing-into-array/-exporting-array)
#
# - np.loadtxt('./data/data1.txt')
# - np.savetxt('./data/ar1.txt',ar1,delimiter=' ')
# ***
#
#
# ## [Convert from pandas](#convert-from-pandas)
# - df.values
#
#
# Tip: to python list
# ar.to_list()
#
#
# ***
#
# ## [Creating arrays from scratch](#from-scratch)
#
# > ### 1D
# - np.array([1,2,3, 10])
# - np.zeros(how_many)
# - np.ones(how_many)
# - np.full((what), how_many)
#
# > ### 2+D
# ### np.array([(), ()]), or np.array([[ , , ]])
#
# > ### [np.linspace](#np-linspace)
# > arithmetics interval = (end - start)/(n -1).
# > For example, (10-1)/(4-1) = 3
#
# > ### [np.arange](#np-arange)
# > ### any dimension
# np.arange(some_size).reshape(some_shape)
#
#
#
# Tip: np.arange is similar to np.linspace with following 3 differences:
# 1. end point is not included; 2. you specify the interval; 3.np.linspace returns array of floats whereas np,.arange returns integers
#
#
# > ### [np.random](#np-random)
# - np.random.rand(n,m) retarts array with floats between 0-1
# - np.random.rand(n,m)*k retarts array with floats between 0-k
# - np.random.randint(start, end, size=(n,m)) If we do not specify size =, it will return 1 random integer between start and end; np.random.randint(0,10,size=(2,3))
# - np.random.randn(n,m) returns array with floats from standard normal distribution with mean 0 and standard deviation of 1
# - np.random.randn(n,m) * sigma + mu, returns array with floats shifted and scaled from the standard normal distribution
#
# ***
#
# > ### [Inspecting array](#inspect)
# - ar.size | number of elements in ar
# - ar.shape | dimensions of ar (rows,columns)
# - ar.ndim | number of dimensions
# - ar.dtype | type of elements in ar
# - ar.astype(dtype) | Convert arr elements to type dtype
# ***
#
#
# ## [Copying/sorting/reshaping](#copy)
# - np.copy(ar) | copies to *new* memory, i.e. deep copy.
# In comparison,ar2 = ar1 is a shallow copy because changing ar1 will change ar2, and vice versa! Because shallow copy is just giving the original array another name, nothing else.
# - ar.sort() | sort
#
# - ar.sort(axis=0)
#
#
# - two_d_ar.flatten() | Flattens 2D aray to 1D
#
#
# - ar.T | Transposes ar (rows become columns and vice versa)
#
#
# - ar.reshape(3,4) | Reshapes ar to 3 rows, 4 columns without changing data
#
#
# - ar.resize((5,6)) | Changes ar shape to 5x6 and fills new values with 0
#
# - ar.view(dtype) | Creates view of ar elements with type dtype
#
# Tip: b = array.copy() is a deep copy (i.e. new memory)
# This is very different from python list. Copied python list is not a deep copy unless you make it a deep copy using import copy.
#
#
# ***
#
# > ## [Combining/splitting](#Combining-splitting)
# - np.concatenate((ar1,ar2),axis=0) | Adds ar2 as rows to the end of ar1
# - np.concatenate((ar1,ar2),axis=1) | Adds ar2 as columns to end of ar1
# - np.split(ar,3) | Splits ar into 3 sub-arays
# - np.hsplit(ar,5) | Splits ar horizontally on the 5th index
#
# ***
#
# > ## [Adding/removing Elements](#add-remove)
# - np.append(arr,values) | Appends values to end of arr
# - np.insert(arr,2,values) | Inserts values into arr before index 2
# - np.delete(arr,3,axis=0) | Deletes row on index 3 of arr
# - np.delete(arr,4,axis=1) | Deletes column on index 4 of arr
#
# ***
#
# > ## [Indexing/slicing/subsetting](#indexing-slicing-subsetting)
# - ar[5] | the element at index 5
# - ar[2,5] | element on index [2][5]
# - ar[1]=4 | Assigns aray element on index 1 the value 4
# - ar[0:3] | Returns the elements at indices 0,1,2 (On a 2D aray: returns rows 0,1,2)
# - ar[0:3,4] | Returns the elements on rows 0,1,2 at column 4
# - ar[:2] | Returns the elements at indices 0,1 (On a 2D aray: returns rows 0,1)
# - ar[:,1] | Returns the elements at index 1 on all rows
# - ar\\<5 | Returns an aray with boolean values
# - (ar1\\<3) & (ar2\\>5) | Returns an aray with boolean values
# - ~ar | Inverts a boolean aray
#
# - ar[ar<5] | boolean slicing
#
#
# ## [More examples](#more-exaples)
# ***
#
# ## Importing into array /exporting array
#
# #### txt file (data separated with space)
# In[2]:
#data1.txt just have 6 numbers, separated with spaces, and a line breaker
ar1 = np.loadtxt('./data/data1.txt')
ar1
# In[3]:
#save txt file
np.savetxt('./data/ar1.txt',ar1,delimiter=' ')
# In[93]:
#re-import it to verfiy it is working
ar1 = np.loadtxt('./data/ar1.txt')
ar1
#
# ## Convert from pandas
# - df.values
#
# ## Convert to list
# - arr.tolist() | Convert arr to a Python list
# In[5]:
df = pd.read_csv('./data/data2.csv', header=None)
df
# In[6]:
numpy_matrix = df.values
numpy_matrix
# In[7]:
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df
# In[8]:
ar = df.values
ar
# # Convert to python list
# - we already know numpy.array converts python list to numpy array
# - arr.tolist() | Convert arr to a Python list
# In[9]:
ar = np.array([[1,100],[2,1]])
ar
# In[10]:
l = ar.tolist()
l
# In[11]:
type(l)
#
# ## From Scrach
# ### 1D
# - np.array([1,2,3, 10])
# - np.zeros(how_many)
# - np.ones(how_many)
# - np.full((what), how_many)
# In[12]:
np.array([1,7,90,1000.])
# In[13]:
np.zeros(100)
# In[14]:
np.ones(10)
# In[15]:
np.full((1),1)
# ### 2D
# ### np.array([(), ()]), or np.array([[ , , ]])
# In[16]:
np.zeros((10,10))
# In[17]:
np.ones((10,10))
# In[18]:
np.full((2,3),1000.1)
# In[19]:
np.eye(3)
# In[20]:
np.array([(1,2),(3,4)])
# In[21]:
np.array([(1,7),(6,9)]).shape
# In[22]:
np.array([(1,2),(3,0)])
# In[23]:
np.array([[1,2,3,4],[1,4,3]]).shape
# In[24]:
np.array([[1,2,3,4],[5,6,7,8]]).shape
# In[25]:
np.array([[1,2,3,4]])
# In[26]:
np.array([(1,2),(3,4)])
# In[27]:
# nested lists result in multi-dimensional arrays
np.array([range(i, i + 3) for i in [1, 3, 1]])
# ## np.linspace
# In[28]:
#np.linspace arithmetics interval = (end - start)/(n -1).
# For example, (10-1)/(4-1) = 3
np.linspace(1,2,4)
# In[29]:
#interval = (11-1)/(6-1)= 2
np.linspace(1,11,6)
# ### np.arange
# In[30]:
# np.arange is similar to np.linspace with following 3 differences:
# 1. end point is not included; 2. you specify the interval; 3.np.linspace returns array of floats whereas np,.arange returns integers
# in general, I prefer np.arange instead of np.linspace
np.arange(0,10,3)
# ### np.random
# - np.random.rand(n,m) retarts array with floats between 0-1
# - np.random.rand(n,m)*k retarts array with floats between 0-k
# - np.random.randint(start, end, size=(n,m)) If we do not specify size =, it will return 1 random integer between start and end; np.random.randint(0,10,size=(2,3))
# - np.random.randn(n,m) returns array with floats from standard normal distribution with mean 0 and standard deviation of 1
# - np.random.randn(n,m) * sigma + mu, returns array with floats shifted and scaled from the standard normal distribution
# In[31]:
np.random.rand(2,3) # 2X3 array of random floats between 0–1
# In[32]:
np.random.rand(2,3)*100 # 2X3 array of random floats between 0–100
# In[33]:
np.random.randint(2,100)
# In[34]:
ar = np.random.randn(10)*10+1
plt.hist(ar, bins = np.arange(-30,31,3))
# In[35]:
ar.std() # Returns the standard deviation of the array elements along given axis.
# In[36]:
ar = np.random.randn(1000)*10+1
# In[37]:
plt.hist(ar, bins = np.arange(-30,31,3))
# In[38]:
ar.std()
# ## Inspecting array
# - ar.size | Returns number of elements in array
# - ar.shape | Returns dimensions of array (rows,columns)
# - ar.dtype | Returns type of elements in array
# - ar.astype(dtype) | Convert arr elements to type dtype
#
# [Back to top](#back-to-top)
# In[95]:
ar = np.random.randint(0,10, size=(2,2))
ar
# In[96]:
ar.size
# In[97]:
ar.shape
# In[98]:
ar.ndim
# In[99]:
ar.dtype
# In[100]:
ar.astype('float')
# ## Copying/sorting/reshaping
# - np.copy(ar) | copies to *new* memory
# - ar.sort() | sort
# - ar.sort(axis=0)
# - two_d_ar.flatten() | Flattens 2D aray to 1D
# - ar.T | Transposes ar (rows become columns and vice versa)
# - ar.reshape(3,4) | Reshapes ar to 3 rows, 4 columns without changing data
# - ar.resize((5,6)) | Changes ar shape to 5x6 and fills new values with 0
#
# - ar.view(dtype) | Creates view of ar elements with type dtype
# In[101]:
ar
# In[102]:
b = ar.copy()
b
# #### shallow copy
# In[106]:
c =ar
c
# In[104]:
ar[0,0]=1000
ar
# In[105]:
# c is changed as it is a shallow copy of ar
c
# #### deep copy (note this would have been a shallow copy if ar is a python list and not a numpy array)
# In[107]:
b = ar.copy()
# In[108]:
ar[0,0] = 0
ar
# In[109]:
b
# In[110]:
c
# #### Note that shallow copy acts as if it is the original except that it's got anther name.
# *Changing the shallow copy will change the original array*
# In[112]:
c[0,1] = 999
c
# In[115]:
ar #note that its [0,1] element is also changed!
# a.sort(axis=-1, kind='quicksort', order=None)
# In[ ]:
# ## Combining/splitting
# - np.concatenate((ar1,ar2),axis=0) | Adds ar2 as rows to the end of ar1
# - np.concatenate((ar1,ar2),axis=1) | Adds ar2 as columns to end of ar1
# - np.split(ar,3) | Splits ar into 3 sub-arays
# - np.hsplit(ar,5) | Splits ar horizontally on the 5th index
#
# [Back to top](#back-to-top)
# In[52]:
dr = np.concatenate((ar,ar,ar,ar),axis=0)
np.concatenate((dr,dr,dr,dr), axis=1)
# In[53]:
np.split(ar,2)
# In[54]:
np.hsplit(ar,2)
# In[55]:
np.concatenate(np.hsplit(ar,2), axis=1)
# ## Adding/removing Elements
# - np.append(arr,values) | Appends values to end of array
# - np.insert(arr,2,values) | Inserts values into arr before index 2
# - np.delete(arr,3,axis=0) | Deletes row on index 3 of arr
# - np.delete(arr,4,axis=1) | Deletes column on index 4 of arr
#
# [Back to top](#back-to-top)
# In[56]:
ar
# In[57]:
#if you don't specify axis, then the result will be flattened
np.append(ar,[5,5])
# In[58]:
#if you specify axis, then you must provide exactly the same shape of array(s)
np.append(ar,[[5,5],[5,5]], axis=0)
#
# Tip: If you don't specify axis, then the result from `np.append` will be flattened.
#
# If you specify axis, then you must provide exactly the same shape of array(s)
#
# In[59]:
np.append(ar,[[5,5],[5,5]], axis=1)
# In[60]:
a = np.array([[1, 1], [2, 2], [3, 3]])
a
#
# Tip: np.insert(arr, obj, values, axis=None)
# obj=: Object that defines the index or indices before which `values` is inserted.
#
# In[61]:
#np.insert(arr, obj, values, axis=None)
# obj=: Object that defines the index or indices before which `values` is inserted.
# in this example, 0 means the 0th index
np.insert(a, 0, 5)
# In[62]:
np.insert(a, -1, 5)
# In[63]:
np.array([1,2,3]).shape
# In[64]:
np.array([[1],[2],[3]]).shape
# In[65]:
# in this example, [0] means to insert the 1D array as the very first column
np.insert(a, [0],[[1],[2],[3]], axis=1)
# In[66]:
# in this example, [1] means to insert the 1D array as the second column
np.insert(a, [1], [[1],[2],[3]], axis=1)
# In[67]:
a
# In[68]:
b = a.flatten()
b
# In[69]:
b
# In[70]:
np.insert(b, slice(2, 4), [5, 6])
# In[71]:
np.arange(8)
# In[72]:
x = np.arange(8).reshape(2, 4)
x
# In[73]:
idx = (1, 3) #this is row No. 2, column No. 3
np.insert(x, idx, 999, axis=1)
# [Back to top](#back-to-top)
# ## Indexing/slicing/subsetting
# - ar[5] | Returns the element at index 5
# - ar[2,5] | Returns the 2D aray element on index [2][5]
# - ar[1]=4 | Assigns aray element on index 1 the value 4
# - ar[1,3]=10 | Assigns aray element on index [1][3] the value 10
# - ar[0:3] | Returns the elements at indices 0,1,2 (On a 2D aray: returns rows 0,1,2)
# - ar[0:3,4] | Returns the elements on rows 0,1,2 at column 4
# - ar[:2] | Returns the elements at indices 0,1 (On a 2D aray: returns rows 0,1)
# - ar[:,1] | Returns the elements at index 1 on all rows
# - ar\\<5 | Returns an aray with boolean values
# - (ar1\\<3) & (ar2\\>5) | Returns an aray with boolean values
# - ~ar | Inverts a boolean aray
# - ar[ar<5] | Returns aray elements smaller than 5
#
#
# [Back to top](#back-to-top)
# ## More examples
# In[74]:
#array of 10 zeros
np.zeros(10, dtype=int)
# In[75]:
np.zeros((2,3),dtype=float)
# In[76]:
np.full((10,10),1)
# In[77]:
np.ones((10,10), dtype=int)
# ## broadcasting
# ### Ex. 1 long - wide
# - long duplicates itself horizontally to match wide's width
# - wide duplicates itself vertically to match long's width
# In[78]:
X = np.ones((2,1), dtype=int)
X
# In[79]:
Y = np.ones((1,2), dtype=int)
Y
# In[80]:
X - Y
# ### Ex. 2 long - wide
# - long duplicates itself horizontally to match wide's width
# - wide duplicates itself vertically to match long's width
# In[81]:
X = np.full((2,1),2)
X
# In[82]:
Y = np.full((1,2),1)
Y
# In[83]:
X - Y
# ### Ex. 3 long - wide
# - long duplicates itself horizontally to match wide's width
# - wide duplicates itself vertically to match long's width
# In[84]:
X = np. array([[1],[2]])
X
# In[85]:
Y = np.array([[2, 1]])
Y
# In[86]:
X -Y
# ## Tricking it into doing something with evey row
# If we think of each row is a point on 2-D space (like a sheet of paper), if we want to get its distance from all other points, including itself,which we called X here,
#
# then we reshape a copy of it into 3-D space, which we call Y. So when we take the difference between them, X will be duplicated along the 3rd dimension.
#
# The trick is that we do not reshape Y in (2,2,1). Rather, we reshape Y in (2,1,2).
#
# In the first 2D space, X is (2,2) whereas Y is (2,1). So Y has to duplicate itself to become (2,2).
#
# In the last dimension, X has to duplicate itself for Y.
# In[87]:
X = np.array([[1,0],
[2,1]])
X
# In[88]:
Y = X.reshape(2,1,2)
Y
# In[89]:
#[[0,0] ,[-1,-1]] = [[1, 0]] - [[1, 0],[2, 1]]
#[[ 1, 1],[ 0, 0]]] = [[2, 1]] - [[1, 0],[2, 1]]
Y-X
# #### Let's check to see if we can replicate what numpy did
# In[90]:
np.array([[1, 0]]) - np.array([[1, 0],[2, 1]])
# In[91]:
np.array([[2, 1]]) - np.array([[1, 0],[2, 1]])
# In[92]:
np.hstack((np.array([[1, 0]]) - np.array([[1, 0],[2, 1]]), np.array([[2, 1]])) - np.array([[1, 0],[2, 1]]) )