In [76]:
#AUTHOR: WHIRLDATA
#BY USING TWO PRODUCTS (PDT) SAMPLE SALES DATA (PDT1 -> X, PDT2 -> Y), PREDICTING THE VALUE OF PDT2 BASED ON PDT1
In [ ]:
#IMPORTING LIBRARIES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [90]:
#VALUE TO PREDICT (VALUE OF PDT1)
testData=10
In [91]:
#CREATING TRAINING DATA
trainingData = pd.DataFrame(data=[[10,12], [11.55,13.56], [10.60,14]], columns=['pdt1', 'pdt2'])
print(trainingData)
    pdt1   pdt2
0  10.00  12.00
1  11.55  13.56
2  10.60  14.00
In [92]:
#INSERTING CONSTANT (A IN "Y=A+BX")
trainingData.insert(0, 'cons', np.ones(3))
print(trainingData)
   cons   pdt1   pdt2
0   1.0  10.00  12.00
1   1.0  11.55  13.56
2   1.0  10.60  14.00
In [93]:
#ASSIGNING DEPENDENT & INDEPENDENT VARIABLES
X = trainingData[['cons', 'pdt1']]
y = trainingData[['pdt2']]
In [94]:
#TRANSPOSING X AND MULTIPLY IT WITH X
xTx = X.T.dot(X)
print(xTx)
       cons      pdt1
cons   3.00   32.1500
pdt1  32.15  345.7625
In [95]:
#INVERSING THE PREVIOUSLY CALCULATED VALUE
XtX = np.linalg.inv(xTx)
print(XtX)
[[ 94.34174625  -8.77216917]
 [ -8.77216917   0.81855389]]
In [96]:
XtX_xT = XtX.dot(X.T)
print(XtX_xT)
[[ 6.62005457 -6.97680764  1.35675307]
 [-0.58663029  0.68212824 -0.09549795]]
In [97]:
#FINDING THETA VALUE (VALUE FOR A, B)
theta = XtX_xT.dot(y)
print(theta)
[[ 3.82968622]
 [ 0.87312415]]
In [98]:
consA=theta[0,0]
print(consA)
consB=theta[1,0]
print(consB)
3.82968622101
0.87312414734
In [99]:
#PREDICTION CALCULATION
prediction=consA+(consB*testData)
In [101]:
print(prediction)
12.5609276944
In [ ]:
#THE ACTUAL VALUE FOR 10(PDT1) IS 12(PDT2), THE PREDICTED VALUE IS 12.56