#!/usr/bin/env python
# coding: utf-8

# Now we know what these modules are, let's just import them now.  We will be adding to this "essential modules" list as we go through the course.

# In[1]:


import numpy as np
import matplotlib.pyplot as plt 
get_ipython().run_line_magic('matplotlib', 'inline')
from IPython.display import Image # just ignore this one for now....  next lecture, you will learn about it.
import os


# ### Lecture 7:
# 
# - Learn more about **NumPy** and **matplotlib**
# - Learn more about **NumPy** arrays.  
#  
# 
# 

# 
# ### NumPy and N-dimensional arrays
# 
# We briefly mentioned **arrays** in the last lecture but quickly moved into plotting (because that is more fun). But **arrays** are essential to our computational happiness, so we need to bite the bullet and learn about them now. 
# 
# **Arrays**  in **Numpy**  are somewhat  similar to  lists but there are important differences with advantages and disadvantages.
# Unlike lists,   **arrays** are usually all of the same data type (**dtype**), usually numbers (integers or floats) and at times characters.  A "feature" of arrays is that the size,  shape and type are fixed when it's created. 
# 
# Remember, we can define a list:
# 
# L=\[ \]
# 
# then append to it as desired using the **L.append( )** method. It is more complicated (but still possible) to **extend**   arrays. 
# 
# Why use arrays when you can use lists?  Arrays are far more computationally efficient than lists, particularly for things like matrix math.  You can perform calculations on the entire array in one go instead of looping through element by element as for lists.  
# 
# To make things a little confusing, there are  several different data objects that are loosely called arrays, e.g., arrays, character arrays and matrices.  These are all subclasses of **ndarray** (N-dimensional array).  We will just worry about **arrays** in this course.  
# 
# Apart from reading in a data file with **NumPy**, as we did in the last lecture, there are many different ways of creating arrays.  Here are a few examples:

# In[2]:


# define the values with the function array( ). For example a 3x3 array
A= np.array([[1, 2, 3],[4,2,0],[1,1,2]])
print (A)

# notice how there are no commas in arrays  


# As we learned in the last lecture, **NumPy** can also generate an array using the **np.arange( )** function which works in a manner similar to **range( )** but creates an array with floats or integers.  **range( )** makes a list generator.  
# 
# This is just a reminder from Lecture 6:

# In[3]:


# use list(range( )) to generate a one-dimensional (1D) list that ranges 
# from the first arguement up to (but not including) the second, that
# increments by the third:
#we learned that range( ) creates a list generator for integers
B=list(range(10)) 
print ("List made by 'range': ",B)
B_integers=np.arange(0,10,1) #arange( ) is an np function that creates an array of integers
print ("Array made by np.arange( ): ", B_integers)
B_real=np.arange(0,10,.2) #  and with floats
print ("Array with real numbers: \n",B_real) # notice the "\n"? that creates a new line in the text string?
# Notice that while "range" makes a list of integers, arange makes an array of integers 
#   or real numbers.  


# There are several ways to create special arrays, for example, arrays initialized by zeroes, ones, or any other value: 

# In[4]:


D=np.zeros((2,3)) # Notice the size is specified by a tuple of numbers of rows and columns.
print (D)


# In[5]:


E=np.ones((2,3))
print (E)


# To get any other value, just multiply your "ones" array by whatever number you want:

# In[6]:


print (E*42)


# As you might have guessed, **np.arange(start, end, step)** generates numbers between two endpoints (**start** and up to but not including **end**) that are spaced by **step**. 
# 
# At times, it is useful to have __N__ numbers equally spaced between two endpoints. For this, we use the function **np.linspace(start,end,N)** which generates an array starting with **start**, going up to (and including!)  **end** with $N$ linearly spaced elements:  

# In[7]:


F=np.linspace(0,10,14) # give me 14 numbers from 0 to 10, including 0 and 10.
print  (F)
print (len(F))


# To summarize: 
# 
# **np.linspace( )** creates an array with $N$ evenly spaced elements starting at **start** and including the **end** value,
# while **np.arange( )** creates an array with elements at **step** intervals between  **start** up to but NOT including the  **end** value.  

# Another trick for creating arrays, is to use the **np.loadtxt( )** function, which you encountered in Lecture 6.   It reads  a data file into an array.   This example uses a 'pathname' which we  learned about in Lecture 1. 

# In[8]:


newarray=np.loadtxt('Datasets/RecentEarthquakes/earthquakeLocations.txt')
print (newarray)


# ### A few words about array types
# 
# In the last example, **NumPy** figured out what array type was required - it decided to make  a floating point array without our having to specify the type.  But what if we wanted an integer array with numbers from 0. to 9. instead?  
# 
# There are a few solutions to this.  First, we could use integers in the **np.arange( )** call:

# In[9]:


np.arange(0.,10,1)


# Or, we could specify the array type with the _dtype_ argument, where _dtype_ can be _int_, _float_, _str_, among others. 

# In[10]:


print (np.arange(0,10,1,dtype='float'))
print (np.arange(0,10,1,dtype='int'))


# So, what is an _object_ array?  That would be an array that allows different data types:
# 

# In[11]:


np.array([[1, 2, 3],[4,2,0],['Xiao Long','Jill','Jose']],dtype='object')


# But object arrays have their own limitations, e.g., you can't multiply the array by anything.  

# So, what happens if we define an array without initializing it?   Let's make a 2x2 array of the **dtype** _float_.  

# In[12]:


G=np.ndarray(shape=(2,2), dtype=float) 
print  (G)   


# So... the array was initialized with teeny tiny numbers but not necessarily zeros.  

# ### Array attributes
# 
# Like other Python objects we have already encountered, arrays also have _attributes_ and _methods_. As before,  _attributes_ do not have parentheses while _methods_ do.  
# 
# We will start by looking at array _attributes_  which report on the state of the array. 
# 
# As an  example of the use of an _attribute_, we can find out what the data type of an array is with the _attribute_  **array.dtype**:   
# 

# In[13]:


D.dtype


# As you may have already figured out, arrays have dimensions and shape. Dimensions define the number of axes, as in the illustration below. 
# 
# Rember our first array, $A$?  It  had two dimensions (axis 0 and 1).   We can use the attribute **ndim** to find find this out:
#    
# 
# 

# In[14]:


Image(filename='Figures/ndim.jpg') # just ignore this - i just want to show you the pretty picture.


# In[15]:


A= np.array([[1,2,3],[4,2,0],[1,1,2]]) # just to remind you
print ("the dimensions of A are: ",A.ndim)


# 
# Notice how **np.zeros( )**, **np.ones( )** and **np.ndarray( )** used a shape tuple in order to define the arrays in the examples above.   The shape of an array tells us how many elements are along each axis.  Python returns a tuple with the shape information if we use the **shape** _attribute_:  
# 
# 

# In[16]:


A.shape


# ### Array methods
# 
# Arrays, like lists,   have a bunch of _methods_, but the _methods_ are different than the  _methods_ we learned about for lists.  For example, you can **append** to an array, but the results may surprise you. 
# 
# 

# In[17]:


print ('D: \n',D)
print ('\n')
print ('D after append: \n',np.append(D,[2,2,2]))


# See how we now have a 1-D array?  Not exactly what you expected?  We can deal with that problem by reshaping the array, as we shall see.  But first, you can also **concatenate** arrays which may be a simpler way to extend your array: 

# In[18]:


print (np.concatenate((D,E)))


# To solve the shape problem (2D versus 1D), you can  re-arrange a 1D array into a 2D array (as long as the total number of elements is the same).   To do that, we use the **array.reshape( )** _method_:

# In[19]:


# we can take a 1D array with 50 elements and reshape it into, say a 5 X 10 2-D array:
B_real_2D=B_real.reshape((5,10))
print ('B_real: \n',B_real)
print ('\n B_real after reshaping: \n',B_real_2D)


# You can go the other way, by taking a 2D (or more) array and turning it into one long 1D array using **array.flatten( )**.  

# In[20]:


B_real_1D=B_real_2D.flatten()
print (B_real_1D)


# Another super useful array method is **array.transpose( )** \[equivalent to **array.T( )**\] which swaps rows and columns:

# In[21]:


print ('B_real_2D: \n',B_real_2D)
print ('\n B_real_2D transposed: \n',B_real_2D.transpose())


# ### Slicing and indexing ndarrays 
# 
# The syntax for slicing an array is similar to that for a list:  

# In[22]:


B=A[0:2] # access the top two lines  of matrix A 
print (B)


# ### Masking Arrays
# 
# We can also 'mask' arrays. This is a handy thing a bit like doing an **if** statement for an array. For example, we could make an array of numbers,  say, times, between 0 and 10 minutes and then search just for the times greater than 5 minutes.

# In[23]:


time=np.linspace(0,10,11)
lateTime=time[time>5]
print(lateTime)


# If two arrays are the same shape, we can use one array to mask another array. For example, we could make an array of distances traveled at a constant speed of 20 miles per hour, and mask to show only the distances for the last 5 minutes.

# In[24]:


distance=time/3
lateDistance=distance[time>5]
print(lateDistance)


# How does this work? We can peek into this by looking at the result when we print (time>5). It turns out that this creates an array of True and False which tells the program what elements of the array to choose.

# In[25]:


boolTime=time>5
print(boolTime)


# 
# 
# For many more methods and attributes of ndarrays, visit the NumPy Reference website:  http://docs.scipy.org/doc/numpy/reference/.   
# 

# ### Converting between Data Structures
# 
# We can convert from an array to a list:

# In[26]:


L=A.tolist()
print ("Original array: \t", type(A)) # the '\t' inserts a tab
print ("List form: \t\t", type(L))
print (A)
print (L)

# notice the commas, the array turned into  a list of three lists


# From a list to an array:

# In[27]:


AfromL=np.array(L)# from a list
print ('AfromL: ')
print (AfromL)


# Or from a tuple to an array:
# 

# In[28]:


AfromT=np.array((4,2)) # from a tuple 
print ('AfromT: ')
print (AfromT)


# ### Saving NumPy arrays as text files
# 
# Having created, sliced and diced an array, it is often handy to save the data to a file for later use.  We can do that with the command **np.savetxt( )**.  
# 
# Let's save our **A** array to a file called _A.txt_.

# In[35]:


np.savetxt('A.txt',A)


# In[36]:


#and clean up
os.remove('A.txt')