#!/usr/bin/env python
# coding: utf-8

# # COMP 364: A brief tour of the Standard Library
# 
# There are three kinds of modules/packages:
# 
# * Modules you make yourself
# * Third-party modules (e.g. [matplotlib](https://matplotlib.org/))
# * Standard library modules
# 
# Standard library modules come included in Python and they contain many useful tools.
# 
# They are maintained by the core Python development team so you can count on them being reliable.
# 
# The Python Standard Library is very extensive, so I will just show you some highlights.
# 
# Refer to [this](https://docs.python.org/3/tutorial/stdlib.html) and [this](https://docs.python.org/3/library/index.html) for a more complete view on the Standard Library.
# 
# **Note:** Standard Library packages and modules are NOT the same thing as **built-in** objects (e.g. `print`, `open`, `zip`, `enumerate`). You still have to `import` standard library modules/packages you just don't have to install them from elsewhere.
# 
# 

# ## System-related 
# 
# * `sys`: functions and variables working on the Python interpreter
# * `os`: operating system functionality
# * `shutil`: file manipulation
# 
# ### `sys`

# In[1]:


import sys

#get the interpreter path
print(f"Interpreter is located at: {sys.executable}\n")
#get module search path
print(f"Look for modules in: {sys.path}\n")


# In[2]:


#kill the interpreter, stops your program's execution (works better outside of notebooks)
sys.exit()


# ### `sys`: command-line arguments
# 
# Until now we have been getting input from the user in an "interactive" way. 
# 
# That is, the program pauses execution and waits for the user to respond to the `input` query.
# 
# You can also let users give input to your program at the beginning of execution and then execution is never halted.
# 
# This is done through **command-line arguments**
# 
# Imagine you have a file `divide.py` that divides two numbers given by the user.
# 
# Using `input()` we had
# 
# ```python
# a = int(input("Give me the first number: "))
# b = int(input("Give me the second number: "))
# 
# print(a / b)
# ```
# 
# With command-line arguments, the information is taken **before** execution.
# 
# ```python
# import sys
# 
# a = int(sys.argv[1])
# b = int(sys.argv[2])
# 
# print(a/b)
# ```
# 
# From the command line, you would call the program as such:
# 
# ```
# $ python divide.py 3 2
# ```
# 
# `sys.argv` stores a list of strings given by the command line.
# 
# In this case:
# 
# ```python
# print(sys.argv)
# ```
# 
# Would produce:
# 
# ```python
# ["divide.py", "3", "2"]
# ```
# 
# Command line arguments are often preferred when it is desireable to automate the execution of a program.
# 
# ## `os`
# 
# This module lets you perform actions related to the operating system.
# 
# 

# In[3]:


import os

print(f"My operating system type is: {os.name}")

print(f"I am currently in directory: {os.getcwd()}")


# You can also change your current working directory

# In[4]:


os.chdir("/Users/carlosgonzalezoliver/Projects")
os.getcwd()


# You can see what files are in a directory. No arguments means, look in the current directory.

# In[5]:


os.listdir()


# Or you can give a path.

# In[6]:


os.listdir("/Users/carlosgonzalezoliver/Projects/Notebooks/COMP_364/L24")


# Let's go back to where we were.

# In[7]:


os.chdir("/Users/carlosgonzalezoliver/Projects/Notebooks/COMP_364/L24")


# You can also create new directories.

# In[8]:


os.mkdir("Temp")


# In[9]:


os.listdir()


# ### `shutil`
# 
# `shutil` is used for file manipulation (not file content manipulation)
# 

# In[10]:


with open("test.txt", "w") as t:
    t.write("Hello")


# In[11]:


os.listdir()


# In[12]:


import shutil
#copy the file
shutil.copyfile("test.txt", "test_copy.txt")


# In[ ]:


os.listdir()


# In[13]:


#delete a directory
shutil.rmtree("Temp")


# In[14]:


#deleting files is done with os
os.remove("test_copy.txt")


# In[ ]:


os.listdir()


# # Math
# 
# There are a couple convenient "math" modules
# 
# * math: basic math operations and quantities
# * random: pseudo-random numbers
# * statistics: basic statistics functions
# 
# 

# In[15]:


import math


print(f"e^2: {math.exp(2)}")

print(f"log(1): {math.log(1)}")

print(f"3^4: {math.pow(3, 4)}")

print(f"sin(4): {math.sin(4)}")


# ### Random
# 
# The `random` module gives you pseudo-random (no perfectly random generator exists) functionality.

# In[18]:


import random
#random number uniformly from 0 and 1
print(f"uniform random number: {random.random()}")

print(f"uniform random number between 4 and 15 {random.randrange(4, 16)}")

mu = 0
sigma = 1
print(f"gaussian random number with mean {mu} and variance {sigma}: {random.gauss(mu, sigma)} ")


# Let's check that we're actually getting uniform and Gaussian distributions.

# In[19]:


get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt

def rand_plot(samples):
    n, bins, patches = plt.hist(samples, 10, normed=0, facecolor='green', alpha=0.75)
    plt.xlabel("Value")
    plt.ylabel("Count")
    plt.show()
    
#uniform random number

unif = [random.uniform(10, 15) for _ in range(1000)]
rand_plot(unif)

#gaussian random number

gaussian = [random.gauss(mu, sigma) for _ in range(1000)]
rand_plot(gaussian)


# We can also do random things with lists.

# In[20]:


#randomly pick one item

birds = ["duck", "goose", "eagle", "swan"]

print(random.choice(birds))

#coin toss
coin = ["heads", "tails"]
print(random.choice(coin))

#shuffle the items of a list in place
random.shuffle(birds)
print(birds)


# # Data structures
# 
# The `collections` module lets us enhance some of the container types we've seen for more user friendliness.

# In[21]:


import collections

#count number of occurences from a list
c = collections.Counter(["red", "red", "red", "black", "red", "blue", "blue"])
print(c)
print(c['red'])
#get the 2 most common elements
print(c.most_common(2))


# `namedtuple` lets us give names to the indices of a tuple.

# In[25]:


Student = collections.namedtuple('Student', ['name', 'grade', 'major'])

s = Student('Carlos', 2.1, 'cs')
print(s.grade)
print(s.name)
print(s.major)


# Useful for giving CSV entries meaningful names.
# 
# `test.csv`:
# 
# ```
# carlos,2.4,cs
# jim,3.1,math
# joan,2.5,phys
# jack,3.6,cs
# ```

# In[28]:


with open("test.csv", "r") as students:
    for s in students:
        #the _make() function lets you make a NamedTuple from an iterable
        line = s.strip().split(",")
        tup = Student._make(line)
        print(tup)
        print(tup.name)


# The `datetime` module is useful for handling date formats.

# In[30]:


import datetime as dt

date = dt.date(2017, 11, 9)
print(date)
print(date.year)

#today's date
print(dt.date.today())

#compare dates
christmas = dt.date(2017, 12, 25)
till_christmas = christmas - dt.date.today()
#produces a timedelta object
print(type(till_christmas))

print(f"Days till Christmas: {till_christmas}")

#day of the week as an integer
print(dt.date.today().weekday())
print(christmas.weekday())


# # Quality Control
# 
# The `timeit` module helps you time the execution of some code snippets.

# In[31]:


import timeit

timeit.timeit("[x*x for x in range(100)]")


# The `doctest` module lets you put executable python in docstrings as test calls to make sure everything works as expected. The module looks for `>>>` interactive python calls and compares the actual call to what is in the string as the output. 

# In[33]:


import doctest

def mysquare(x):
    """
        This function computes the square of a number.
        >>> mysquare(5)
        25
    """
    return x*x
def mymean(nums):
    """
        This function computes the mean of a list of numbers.
        >>> mymean([2, 2, 3, 4])
        2.75
    """
    tot = 0
    for i in nums:
        tot += i
    return tot / len(nums)

doctest.testmod()


# # Data Storage
# 
# `pickle` is a very useful module for storing python objects in files so that you can keep working on them later.
# 

# In[35]:


import pickle

rand_dict = {}

animals = ["dog", "cat", "giraffe", "lion", "zebra"]

for a in animals:
    rand_dict[a] = random.random()
print(rand_dict)


# I can now store, or **dump** the dictionary to a file.
# 
# Pickle stores objects as a binary representation which is not human readable and only works in Python but is very fast.

# In[36]:


pickle.dump(rand_dict, open("rand_dict.pickle", "wb"))


# In[37]:


loaded = pickle.load(open("rand_dict.pickle", "rb"))


# In[38]:


print(loaded)


# `json` does a similar job but the contents are human-readable and can be read by any language. The downside is it's not as fast.
# 
# JSON cannot store any custom classes and not all python classes can be JSONed.

# In[39]:


import json

json.dump(rand_dict, open("rand_dict.json", "w"))


# In[40]:


jsoned = json.load(open("rand_dict.json", "r"))


# In[41]:


jsoned


# # Multiprocessing
# 
# Sometimes you can have tasks that can be easily parallelized.
# 
# Since most computers have more than one processor, we can let multiple processors work on our Python at the same time.
# 
# For example:
# 
# For a given number $n$ I want to compute the sum of every number **up to $n$** cubed. 
# 
# Obviously the process of squaring a particular number in the list is independent of squaring any other number.

# In[42]:


from multiprocessing import Pool
import time

def cube_sum(x):
    return sum([i**3 for i in range(x)])

#we use the context manager to take care of all the setup
#we create a Pool object which contains the processors we can send tasks to
#here we have chosen to use 4 processes

start = time.time()
nums = [i for i in range(10000)]

with Pool(4) as p:
    result = p.map(cube_sum, nums)
print(f"Parallel job took: {time.time() - start}")

### normally:
start_serial = time.time()
serial_result = [cube_sum(x) for x in nums]
print(f"Serial job took {time.time() - start_serial}")


# The reason I came up with such a weird function is that parallelizing is not always faster.
# 
# There is quite a bit of setup and communication that needs to happen to coordinate the processors (aka **overhead**).
# 
# If the actual comptuation is faster than the overhead then the normal serial method is faster.

# # Others
# 
# There are many other modules that I did not cover, and many other functionalities of the ones I did cover that I didn't have time to show you.
# 
# Some notable Standard Library modules worth looking into:
# 
# * `re`: searching for patterns inside strings
# * `statistics`: basics statistics function (mean, std, etc)
# * `os.path`, `glob`: handling file paths 
# * `csv`: automatic CSV file parsing
# * `logging`: code and error logging
# * `argpars`: command line argument parser
# * `tkinter`: building graphical user interfaces

# In[ ]: