import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('/Users/danielforsyth/Desktop/salaries.csv')
df.head()
df = df[['Player','Cap Hit']]
df.head()
per = pd.read_csv('/Users/danielforsyth/Desktop/per.csv')
per.head()
per['MPG'] = per['MP'] / per['G']
per = per[['Player','PER','MPG']]
per.head()
final = pd.merge(df, per, on='Player', how='outer')
final.head()
final = final[final.MPG >6.09]
final.dropna()
final.head()
pd.options.display.mpl_style = 'default'
from matplotlib import rcParams
rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
plt.scatter(final['PER'],final['Cap Hit'])
`smaller_frame=final[['Cap Hit', 'PER', 'MPG']]
from pandas.tools.plotting import scatter_matrix
axeslist=scatter_matrix(smaller_frame, alpha=0.8, figsize=(12, 12), diagonal="kde")
for ax in axeslist.flatten():
ax.grid(False)
final.corr()
final.dtypes
final = final.dropna()
cap = final['Cap Hit'][:, np.newaxis]
per = final['PER'][:, np.newaxis]
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(per, cap)
clf1 = LinearRegression()
clf1.fit(X_train, y_train)
predicted_train = clf1.predict(X_train)
predicted_test = clf1.predict(X_test)
trains=X_train.reshape(1,-1).flatten()
tests=X_test.reshape(1,-1).flatten()
print clf1.coef_, clf1.intercept_
plt.scatter(per,cap,c='r')
plt.plot(trains, predicted_train, c='b', alpha=0.5)
lr = LinearRegression()
lr.fit(per,cap)
b_0 = lr.intercept_
coeff = lr.coef_
pred = lr.predict(33.68)
pred
ncaa = pd.read_csv('/Users/danielforsyth/Desktop/ncaa.csv')
ncaa = ncaa[ncaa.MPG > 6.09]
ncaa.head()
ncaa = ncaa[['PLAYER','PER']]
ncaa.head()
ncaa_per = ncaa['PER'][:, np.newaxis]
ncaa_player = ncaa['PLAYER'][:, np.newaxis]
predictions = []
for i in ncaa_per:
pred = lr.predict(i)
pred = float(pred)
pred = format(pred, '.2f')
predictions.append(pred)
ncaa['Predicted Salary'] = predictions
ncaa.head(15)