Let's import some libraries first...
import pandas
from pandas.plotting import scatter_matrix
from sklearn import datasets
from sklearn import model_selection
from sklearn import linear_model
# models
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
Load and examine dataset...
dataset = pandas.read_csv("data/ccp-consumer-lending-half-year.csv")
print (dataset)
period npbt avg_gross_loan_book net_lending revenue 0 1HFY14 -2830 27424.5 17511 7000 1 2HFY14 -692 49041.0 31619 12104 2 1HFY15 2907 67541.0 16220 16603 3 2HFY15 -1400 85802.0 34800 19223 4 1HFY16 2053 110302.0 31900 26204 5 2HFY16 6100 127976.5 23200 26214 6 1HFY17 4513 145476.5 30391 29735 7 2HFY17 13083 158238.0 15793 36639 8 1HFY18 8292 166238.0 24394 37566 9 2HFY18 14736 177548.0 28011 41770
array = dataset.values
num_data_points = 3
for i in range(0, len(array) - (num_data_points - 1)):
begin = i
end = i + (num_data_points - 1)
X = array[begin:end+1,2:4] # data = avg_gross_loan_book, net_lending
Y = array[begin:end+1,1] # result = NPAT
model = LinearRegression()
model.fit(X, Y) # train model
print("Period %s to %s: " % (array[begin,0], array[end,0]), end =" ")
print("p = %sbr + %sl + %s" % (model.coef_[0], model.coef_[1], model.intercept_))
Period 1HFY14 to 1HFY15: p = 0.140936039634br + -0.064399199089l + -5567.40604369 Period 2HFY14 to 2HFY15: p = 0.000873579448887br + -0.232666977089l + 6621.85593883 Period 1HFY15 to 1HFY16: p = 0.101672186835br + -0.331734973293l + 1420.70009581 Period 2HFY15 to 2HFY16: p = 0.113066609345br + -0.235471748635l + -2906.92436252 Period 1HFY16 to 1HFY17: p = 0.0547529021572br + -0.353939061014l + 7304.3014326 Period 2HFY16 to 2HFY17: p = 0.110760929767br + -0.490240059925l + 3298.77326195 Period 1HFY17 to 1HFY18: p = 0.0166480875203br + -0.572513045014l + 19490.3384469 Period 2HFY17 to 2HFY18: p = 1.0645668191br + -1.54720783081l + -130936.871049
Where
p = Net profit before tax (NPBT).
r = Reporting period. Full year = 2, half year = 1.
b = Average gross loan book.
l = Net lending for the period.
Assumptions:
Average gross loan book will be $196m.
Net lending will be $50m, on the upper range of the forecast. Quoting a high number here will actually reduce EBIT.
b = 196000
l = 50000
r = 2
p = model.coef_[0] * b * r + model.coef_[1] * l + model.intercept_
print("EBIT = %s" % p)
print("NPAT = %s" % (p * 0.7))
EBIT = 209012.930498 NPAT = 146309.051349