In [6]:
# !pip install yfinance # uncomment these to install missing packages if they are not already installed
# !pip install pandas 

import yfinance as yf
import pandas as pd

def get_price(tick,start='2020-10-01',end=None):
    return yf.Ticker(tick).history(start=start,end=end)['Close']

def get_prices(tickers,start='2020-10-01',end=None):
    df=pd.DataFrame()
    for s in tickers:
        df[s]=get_price(s,start,end)
    return df
Requirement already satisfied: yfinance in /home/phsamuel/p3/lib/python3.8/site-packages (0.1.55)
Requirement already satisfied: multitasking>=0.0.7 in /home/phsamuel/p3/lib/python3.8/site-packages (from yfinance) (0.0.9)
Requirement already satisfied: pandas>=0.24 in /home/phsamuel/p3/lib/python3.8/site-packages (from yfinance) (1.2.1)
Requirement already satisfied: requests>=2.20 in /home/phsamuel/p3/lib/python3.8/site-packages (from yfinance) (2.22.0)
Requirement already satisfied: lxml>=4.5.1 in /home/phsamuel/p3/lib/python3.8/site-packages (from yfinance) (4.6.2)
Requirement already satisfied: numpy>=1.15 in /home/phsamuel/p3/lib/python3.8/site-packages (from yfinance) (1.20.0)
Requirement already satisfied: pytz>=2017.3 in /usr/lib/python3/dist-packages (from pandas>=0.24->yfinance) (2019.3)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/lib/python3/dist-packages (from pandas>=0.24->yfinance) (2.7.3)
Requirement already satisfied: pandas in /home/phsamuel/p3/lib/python3.8/site-packages (1.2.1)
Requirement already satisfied: pytz>=2017.3 in /usr/lib/python3/dist-packages (from pandas) (2019.3)
Requirement already satisfied: numpy>=1.16.5 in /home/phsamuel/p3/lib/python3.8/site-packages (from pandas) (1.20.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/lib/python3/dist-packages (from pandas) (2.7.3)

Prepare training and testing data sets

In [2]:
feature_stocks=['tsla','fb','twtr','amzn','nflx','gbtc','gdx','intc','dal','c']
predict_stock='msft'

# training set
start_date_train='2020-10-01'
end_date_train='2020-12-31'

X_train=get_prices(feature_stocks,start=start_date_train,end=end_date_train)
y_train=get_prices([predict_stock],start=start_date_train,end=end_date_train)

# testing set
start_date_test='2021-01-01' # end date omit, default is doday
X_test=get_prices(feature_stocks,start=start_date_test)
y_test=get_prices([predict_stock],start=start_date_test)
In [4]:
X_train
Out[4]:
tsla fb twtr amzn nflx gbtc gdx intc dal c
Date
2020-10-01 448.160004 266.630005 46.700001 3221.260010 527.510010 10.870000 39.364471 51.862778 31.100000 42.545544
2020-10-02 415.089996 259.940002 46.119999 3125.000000 503.059998 10.860000 38.767590 50.641655 31.750000 42.761013
2020-10-05 425.679993 264.649994 47.310001 3199.199951 520.650024 11.280000 39.364471 51.316746 32.000000 43.985275
2020-10-06 413.980011 258.660004 45.599998 3099.959961 505.869995 10.845000 37.912056 50.999058 31.059999 43.495571
2020-10-07 425.299988 258.119995 45.869999 3195.689941 534.659973 10.970000 38.150806 52.289669 32.150002 43.916718
... ... ... ... ... ... ... ... ... ... ...
2020-12-23 645.979980 268.109985 54.299999 3185.270020 514.479980 28.879999 35.919998 46.570000 40.240002 60.266277
2020-12-24 661.770020 267.399994 53.970001 3172.689941 513.969971 27.350000 36.029999 47.070000 39.730000 60.058056
2020-12-28 663.690002 277.000000 54.430000 3283.959961 519.119995 30.450001 35.689999 47.070000 40.150002 60.613323
2020-12-29 665.989990 276.779999 54.360001 3322.000000 530.869995 30.080000 35.740002 49.389999 40.029999 60.395180
2020-12-30 694.780029 271.869995 54.330002 3285.850098 524.590027 32.900002 36.560001 48.750000 40.560001 60.345604

63 rows × 10 columns

In [5]:
y_train
Out[5]:
msft
Date
2020-10-01 211.905228
2020-10-02 205.651596
2020-10-05 209.830658
2020-10-06 205.372330
2020-10-07 209.282089
... ...
2020-12-23 221.020004
2020-12-24 222.750000
2020-12-28 224.960007
2020-12-29 224.149994
2020-12-30 221.679993

63 rows × 1 columns

Convert training and testing data into numpy array

In [ ]:
import numpy as np

X_train=np.array(X_train)
y_train=np.array(y_train)
X_test=np.array(X_test)
y_test=np.array(y_test)

Use linear regression to predict msft stock price from the other stocks' prices

1. Append a dummy feature to both X_train and X_test

In [ ]:
# Your solution here

2. Find the best linear regression model based on your training data ($w=(X X')^{-1} X y$)

Note that you may need to transpose the matrices to make things work

In [ ]:
# Your solution here

3. Report your training and testing error

How far your prediction from the actual price. Compute the mean square error for both training and testing

In [ ]:
# Your solution here
In [ ]:
 
In [ ]: