#!/usr/bin/env python
# coding: utf-8

# ## 04-01 用電腦來學習
# 
# 今天我們會簡單介紹 Python 在算數學的時候, 可以用到的一些基本工具。做這個的目的, 除了介紹基本工具, 更重要的是希望大家知道, 「你一定要去寫有意思的程式, 你才有興趣寫下去。」
# 
# 也就是, 「你要覺得好玩才可以。」也許是我們同學太優秀, 所以似乎不太會找好玩的問題。
# 
# 另一方面, 如果我們要學的東西就是, 嗯, 什麼貝式統計啦, 假設檢定啦, information theory 啦, 或其他任何有點深奧生硬的主題, 要怎麼辦呢? 一個很好的方法, 就是用 Python 寫程式來幫你學習!
# 
# 很能代表這樣精神的, 就是 Allen Downey 教授, 他學什麼都寫成程式。他以前是寫 Java 的, 有本很出名的書叫 "How To Think Like a Computer Scientist", 第一版用的是 Java。之後有人覺得這本書太有意思, 和他合寫 Python 版, 然後他的主力程式語言就變成 Python 了!
# 
# [【Allen Downey 免費的書下載】](https://greenteapress.com/wp/)
# 
# 另一個很好的例子是 Mosky, 她是台科大資料系出身, 近來非常認真的唸數學和統計學, 她的
# 
# [【Hypothesis Testing with Python】](https://github.com/moskytw/hypothesis-testing-with-python)
# 
# 就是很好的「用電腦學習」的範例。
# 

# ## 04-02 進入 array 導向的程式設計
# 
# 數據分析最重要的概念, 大概是「不到最後關頭, 絕不輕言使用迴圈」。

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


# 有一班同學成績是這樣的:
#     
#     grades = [35, 74, 43, 66, 87, 55, 71, 65]

# 老師想要每位同學成績都是乘 1.1 倍!

# In[2]:


grades = [35, 74, 43, 66, 87, 55, 71, 65]


# In[3]:


curved = []

for g in grades:
    curved.append(g*1.1)


# In[4]:


curved


# 或許再加個 3 分, 於是又...

# In[5]:


curved2 = []

for g in curved:
    curved2.append(g + 4)


# In[6]:


curved2


# 換成 array, 又快又自然!

# In[7]:


grad_arr = np.array(grades)


# In[8]:


grad_arr


# In[9]:


grad_arr * 1.1


# 看來還是再加個 3 分...

# In[10]:


grad_arr * 1.1 + 4


# ### 這個就叫 "broadcasting!"

# ## 04-03 Broadcasting 比我們想像更有趣

# 一位老師成績這樣算的:
# 
# * 平時成績 20%
# * 期中考 35%
# * 期未考 45%
# 
# 有位同學
# 
# * 平時成績 85 分
# * 期中 70 分
# * 期末 80 分
# 
# 這位同學的學期成績是多少?

# In[11]:


grades = np.array([85, 70, 80])
weights = np.array([0.2, 0.35, 0.45])


# In[12]:


wgrades = grades * weights


# In[13]:


wgrades


# In[14]:


wgrades.sum()


# 我們其實可以用 dot product (矩陣乘法) 算出來!

# In[15]:


grades @ weights


# 很多同學一起來...

# In[16]:


grades = np.array([[85, 70, 80],
                 [12, 88, 77],
                 [85, 91, 33]])


# In[17]:


grades * weights


# 軸的概念!

# In[18]:


wgrades = grades * weights


# In[19]:


wgrades


# In[20]:


wgrades.sum(axis=1)


# 當然我這麼算有點誤導大家...

# In[21]:


grades @ weights


# 我把它弄成「正確的矩陣」呢?

# In[22]:


weights.shape


# In[23]:


weights.reshape(3,1)


# In[24]:


grades @ weights.reshape(3,1)


# ## 04-04 重要的 array 大變身
# 
# 我們在數據分析, 常常要改 array 的型式。

# ### [練習] 一個 50 個數字的 array
# 
# 先想辦法、用亂數做出 50 個數字的 array, 叫做 A 好了。

# In[25]:


A = np.random.randint(0, 10, 50)


# In[26]:


A


# ### 檢查 A 的 shape

# In[27]:


A.shape


# ### 更改 A 的 shape

# In[28]:


A.shape = (5,10)


# In[29]:


A


# 但用 `reshape` 其實是比較好的方式...

# In[30]:


A.reshape(10,5)


# In[31]:


A


# In[32]:


A = A.reshape(10, 5)


# In[33]:


A


# ### 拉回成一條向量...

# In[34]:


A.ravel()


# In[35]:


A


# ### 快速 array 生成法

# In[36]:


np.zeros(10)


# In[37]:


np.zeros((3,4))


# In[38]:


np.eye(5)


# In[39]:


np.ones((8,7))


# In[40]:


np.diag((1, 2, 3))


# 很有趣的是...

# In[41]:


A = np.array([[1, 2, 3],
             [4, 5, 6],
             [7, 8, 9]])


# In[42]:


A


# In[43]:


np.diag(A)


# ## 04-05 array 過濾器
# 
# array 有個很有趣的用法...

# In[44]:


L = np.array([3, -2, -1, 5, 7, -3])


# 我們用另一個 array, 把「要的」標 True, 不要的標 False

# In[45]:


want = np.array([True, False, False, True, True, False])


# 也就是我們只要正的...

# In[46]:


L[want]


# 其實我們可以這樣做...

# In[47]:


L>0


# In[48]:


L[L>0]


# In[49]:


a = np.array([1, 2, 3])


# In[50]:


a.shape


# In[51]:


a.shape = (1,3)


# In[52]:


L = np.array([3, -2, -1, 5, 7, -3, 87])


# In[53]:


L


# In[54]:


L>0


# In[55]:


L<10


# In[56]:


(L>0) & (L<10)


# In[57]:


L[(L>0) & (L<10)]


# ### [練習] 畫個函數 (如 sinc), 標出正的部份!¶

# In[58]:


x = np.linspace(-5, 5, 1000)
y = np.sinc(x)

plt.plot(x,y,lw=6)
plt.plot(x[y>0] , y[y>0], 'o')


# ## 04-06 像上數學課一樣算數學!

# In[59]:


import sympy as sym


# In[60]:


1/2 + 1/3


# In[61]:


sym.Rational(1, 2) + sym.Rational(1, 3)


# 先來變美一點...

# In[62]:


sym.init_printing()


# In[63]:


sym.Rational(1, 2)  + sym.Rational(1, 3)


# 更方便的做法 -- sympify

# In[64]:


sym.S(1)/2 + sym.S(1)/3


# 這太常用了, 所以我們讀進來...

# In[65]:


from sympy import S, N


# In[66]:


S(1)/2 + S(1)/3


# In[67]:


sym.pi


# In[68]:


π = sym.pi


# In[69]:


π


# In[70]:


N(π, 20)


# In[71]:


N(π, 10000)


# 什麼都在 π 中出現過...

# In[72]:


pistring = str(N(π, 100000))


# In[73]:


'1215' in pistring


# In[74]:


'9487' in pistring


# In[75]:


pistring.find('1215')


# In[76]:


pistring[11942:11946]


# ## 04-07 質數

# In[77]:


a = 9487


# In[78]:


sym.isprime(a)


# In[79]:


sym.factorint(9487)


# In[80]:


53*179


# In[81]:


sym.isprime(179)


# In[82]:


sym.prime(87)


# In[83]:


sym.nextprime(449)


# In[84]:


for i in range(2, 50):
    a = sym.prime(i)
    b = sym.prime(i+1)
    if b-a == 2:
        print(f'{a} 和 {b} 是孿生質數!')


# ## 04-08 Sympy 的矩陣

# In[85]:


A = sym.Matrix([[2, -1, 3],
               [0, 1, 2],
               [-1, 1, 2]])


# In[86]:


A.det()


# In[87]:


A.inv()


# ## 04-09 設定 Sympy 的變數

# ### 第一式

# In[88]:


x = sym.Symbol('x')


# In[89]:


f = x**2 - 3*x + 2


# In[90]:


f


# In[91]:


sym.diff(f, x)


# In[92]:


sym.integrate(f, x)


# In[93]:


g = 5*x**2 - 3*x + 9


# In[94]:


f + g


# In[95]:


f * g


# In[96]:


h = 3*k**2 - 3*k + 2


# ### 第二式

# In[97]:


from sympy.abc import x, y, z


# In[98]:


f = sym.atan(x)


# In[99]:


sym.diff(f, x)


# In[100]:


sym.integrate(f, x)


# In[101]:


sym.integrate(f, (x, 0, 1))


# ## 【下期預告】熊貓終於是來了

# In[102]:


import pandas as pd


# In[103]:


df = pd.read_csv('http://stats.moe.gov.tw/files/detail/107/107_student.csv')


# In[104]:


df.head()


# 如果我們要算日間部, 用大學部一年級同學計算, 什麼學校女生比男生比例是最高的 (或男生比女生比例最高), 前 20 名要怎麼做呢?