#!/usr/bin/env python # coding: utf-8 # Let's import some libraries first... # In[1]: import pandas from pandas.plotting import scatter_matrix from sklearn import datasets from sklearn import model_selection from sklearn import linear_model # models from sklearn.ensemble import RandomForestRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.preprocessing import StandardScaler, PolynomialFeatures from sklearn.linear_model import LinearRegression, Ridge from sklearn.tree import DecisionTreeRegressor from sklearn.metrics import mean_squared_error, r2_score import matplotlib.pyplot as plt # Load and examine dataset... # In[2]: dataset = pandas.read_csv("data/ccp-consumer-lending-half-year.csv") print (dataset) # In[46]: array = dataset.values num_data_points = 3 for i in range(0, len(array) - (num_data_points - 1)): begin = i end = i + (num_data_points - 1) X = array[begin:end+1,2:4] # data = avg_gross_loan_book, net_lending Y = array[begin:end+1,1] # result = NPAT model = LinearRegression() model.fit(X, Y) # train model print("Period %s to %s: " % (array[begin,0], array[end,0]), end =" ") print("p = %sbr + %sl + %s" % (model.coef_[0], model.coef_[1], model.intercept_)) # Where # ``` # p = Net profit before tax (NPBT). # r = Reporting period. Full year = 2, half year = 1. # b = Average gross loan book. # l = Net lending for the period. # ``` # ## FY19 Predictions # # Assumptions: # # * Average gross loan book will be $196m. # # * Net lending will be $50m, on the upper range of the forecast. Quoting a high number here will actually reduce EBIT. # # In[47]: b = 196000 l = 50000 r = 2 p = model.coef_[0] * b * r + model.coef_[1] * l + model.intercept_ print("EBIT = %s" % p) print("NPAT = %s" % (p * 0.7))