#!/usr/bin/env python
# coding: utf-8

# Let's import some libraries first...

# In[1]:


import pandas
from pandas.plotting import scatter_matrix

from sklearn import datasets
from sklearn import model_selection
from sklearn import linear_model

# models
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt


# Load and examine dataset...

# In[2]:


dataset = pandas.read_csv("data/ccp-consumer-lending-half-year.csv")
print (dataset)


# In[46]:


array = dataset.values
num_data_points = 3

for i in range(0, len(array) - (num_data_points - 1)):
    begin = i
    end = i + (num_data_points - 1)
    
    X = array[begin:end+1,2:4]   # data = avg_gross_loan_book, net_lending
    Y = array[begin:end+1,1]     # result = NPAT

    model = LinearRegression()
    model.fit(X, Y) # train model
    
    print("Period %s to %s: " % (array[begin,0], array[end,0]), end =" ")
    print("p = %sbr + %sl + %s" % (model.coef_[0], model.coef_[1], model.intercept_))


# Where
# ```
# p = Net profit before tax (NPBT).
# r = Reporting period. Full year = 2, half year = 1.
# b = Average gross loan book.
# l = Net lending for the period.
# ```

# ## FY19 Predictions
# 
# Assumptions:
# 
# * Average gross loan book will be $196m.
# 
# * Net lending will be $50m, on the upper range of the forecast. Quoting a high number here will actually reduce EBIT.
# 

# In[47]:


b = 196000
l = 50000
r = 2

p = model.coef_[0] * b * r + model.coef_[1] * l + model.intercept_

print("EBIT = %s" % p)
print("NPAT = %s" % (p * 0.7))