In [76]:
#AUTHOR: WHIRLDATA
#BY USING TWO PRODUCTS (PDT) SAMPLE SALES DATA (PDT1 -> X, PDT2 -> Y), PREDICTING THE VALUE OF PDT2 BASED ON PDT1

In [ ]:
#IMPORTING LIBRARIES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [90]:
#VALUE TO PREDICT (VALUE OF PDT1)
testData=10

In [91]:
#CREATING TRAINING DATA
trainingData = pd.DataFrame(data=[[10,12], [11.55,13.56], [10.60,14]], columns=['pdt1', 'pdt2'])
print(trainingData)

    pdt1   pdt2
0  10.00  12.00
1  11.55  13.56
2  10.60  14.00

In [92]:
#INSERTING CONSTANT (A IN "Y=A+BX")
trainingData.insert(0, 'cons', np.ones(3))
print(trainingData)

   cons   pdt1   pdt2
0   1.0  10.00  12.00
1   1.0  11.55  13.56
2   1.0  10.60  14.00

In [93]:
#ASSIGNING DEPENDENT & INDEPENDENT VARIABLES
X = trainingData[['cons', 'pdt1']]
y = trainingData[['pdt2']]

In [94]:
#TRANSPOSING X AND MULTIPLY IT WITH X
xTx = X.T.dot(X)
print(xTx)

       cons      pdt1
cons   3.00   32.1500
pdt1  32.15  345.7625

In [95]:
#INVERSING THE PREVIOUSLY CALCULATED VALUE
XtX = np.linalg.inv(xTx)
print(XtX)

[[ 94.34174625  -8.77216917]
[ -8.77216917   0.81855389]]

In [96]:
XtX_xT = XtX.dot(X.T)
print(XtX_xT)

[[ 6.62005457 -6.97680764  1.35675307]
[-0.58663029  0.68212824 -0.09549795]]

In [97]:
#FINDING THETA VALUE (VALUE FOR A, B)
theta = XtX_xT.dot(y)
print(theta)

[[ 3.82968622]
[ 0.87312415]]

In [98]:
consA=theta[0,0]
print(consA)
consB=theta[1,0]
print(consB)

3.82968622101
0.87312414734

In [99]:
#PREDICTION CALCULATION
prediction=consA+(consB*testData)

In [101]:
print(prediction)

12.5609276944

In [ ]:
#THE ACTUAL VALUE FOR 10(PDT1) IS 12(PDT2), THE PREDICTED VALUE IS 12.56