Models


In [1]:
def f(x, p):
    return p[0] + x * p[1]


Analysis


In [2]:
import seaborn as sns

# Load a dataset.
penguins = sns.load_dataset("penguins")
# Have a look at it.
sns.pairplot(penguins, hue="species")
Out[2]:
<seaborn.axisgrid.PairGrid at 0x1a19a840130>
In [3]:
# Pick out two variables.
flipper = penguins[["body_mass_g", "flipper_length_mm"]].dropna()
# Scatter and fit line for just those two variables.
sns.regplot(x="body_mass_g", y="flipper_length_mm", data=penguins)
Out[3]:
<AxesSubplot:xlabel='body_mass_g', ylabel='flipper_length_mm'>


Train


In [7]:
import sklearn.linear_model as lin

x = flipper["body_mass_g"].to_numpy()
y = flipper["flipper_length_mm"].to_numpy()

x = x.reshape(-1, 1)

model = lin.LinearRegression()
model.fit(x, y)
r = model.score(x, y)
p = [model.intercept_, model.coef_[0]]
In [8]:
r
Out[8]:
0.7589925193571176
In [9]:
p
Out[9]:
[136.729559272662, 0.015275915608037302]


Predict


In [10]:
f(4500.0, p)
Out[10]:
205.47117950882983
In [11]:
def predict(x):
    return f(x, p)
In [12]:
predict(4500.0)
Out[12]:
205.47117950882983