#!/usr/bin/env python
# coding: utf-8

# In[86]:


import tensorflow as tf
import pandas as pd
import numpy as np


# In[87]:


def read_csv(file):
    return pd.read_csv(file)

def cleanup_data(data):
    # remove games when he didn't play
    data = data.drop(data[data.G.isnull()].index)

    # normalize age to years
    data['Age'] = data['Age'].str.split('-', 1).str[0].astype(int)

    # normalize minutes to exclude seconds
    data['MP'] = data["MP"].str.split(":").str[0].astype(int)
    
    data['PTS'] = data['PTS'].astype(int)
    return data

def read_and_clean_csv(file, columns=[]):
    data = read_csv(file)
    clean_data = cleanup_data(data)
    return clean_data[columns]

csv_label_name = "PTS" # points per game "+/-"
feature_names = ["G", "MP", "Age"]
csv_column_names = feature_names + [csv_label_name]

feature_columns = [
    tf.feature_column.numeric_column(feature) for feature in feature_names
]

def read_seasons(seasons=range(2002, 2018)):
    stats = []
    for season in seasons:
        file = str(season) + ".csv"
        season_stat = read_and_clean_csv(file, columns=csv_column_names)
        stats.append(season_stat)
    return pd.concat(stats)

stats = read_seasons()
train_factor = 0.8
train, test = stats[:int(len(stats)*train_factor)], stats[int(len(stats)*train_factor):]


# In[88]:


print("test:" + str(test.shape) + " train: " + str(train.shape) + " stats: " + str(stats.shape))


# In[89]:


input_fn_train = tf.estimator.inputs.pandas_input_fn(
    x = pd.DataFrame({
        "Age" : train["Age"].values,
        "MP" : train["MP"].values,
        "G" : train["G"].values
    }),
    y = pd.Series(train.PTS.values),
    shuffle=False,
    num_epochs=20000
)


# In[90]:


estimator = tf.estimator.DNNRegressor(hidden_units=[5, 5], feature_columns=feature_columns)
# estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns)


# In[91]:


# train
estimator.train(input_fn=input_fn_train, steps=20000)


# In[92]:


input_fn_test = tf.estimator.inputs.pandas_input_fn(
    x = pd.DataFrame({
        "Age" : test["Age"].values,
        "MP" : test["MP"].values,
        "G" : test["G"].values
    }),
    y = pd.Series(test.PTS.values),
    shuffle=False
)
evaluation = estimator.evaluate(input_fn=input_fn_test)
loss_score = evaluation["loss"]
print("Loss: {0:f}".format(loss_score))


# In[93]:


import itertools

input_fn_predict = tf.estimator.inputs.pandas_input_fn(
    x = pd.DataFrame({
        "Age" : test["Age"].values,
        "MP" : test["MP"].values,
        "G" : test["G"].values
    }),
    y=None, # we are predicting
    shuffle=False
)

predictions = estimator.predict(input_fn=input_fn_predict)
predictions = list(p["predictions"][0] for p in predictions)


# In[94]:


get_ipython().run_line_magic('matplotlib', 'inline')

import matplotlib.pyplot as plt
plt.figure(figsize=(10,10))
plt.plot(test.PTS.values)
plt.plot(predictions)


# In[95]:


delta=test.PTS-predictions
np.abs(np.mean(delta)).round() # mean prediction error


# In[96]:


plt.plot(stats.Age, stats.PTS)
plt.show()


# In[100]:


plt.scatter(stats["Age"], stats["PTS"])
plt.show()