from random import random

from IPython.display import SVG
import pygal

from pybrain.structure import SigmoidLayer
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError

from utils import get_matches, get_team_stats, extract_samples, normalize, split_samples, graph_teams_stat_bars, graph_matches_results_scatter


# the features I will feed to the classifier as input data.
input_features = ['year',
                  'matches_won_percent',
                  'podium_score_yearly',
                  'matches_won_percent_2',
                  'podium_score_yearly_2',]

# the feature giving the result the classifier must learn to predict (I recommend allways using 'winner')
output_feature = 'winner'

# used to avoid including tied matches in the learning process. I found this greatly improves the classifier accuracy.
# I know there will be some ties, but I'm willing to fail on those and have better accuracy with all the rest.
# at this point, this code will break if you set it to False, because the network uses a sigmoid function with a 
# threeshold for output, so it is able to distinquish only 2 kinds of results.
exclude_ties = True

# used to duplicate matches data, reversing the teams (team1->team2, and viceversa). 
# This helps on visualizations, and also improves precission of the predictions avoiding a dependence on the
# order of the teams from the input.
duplicate_with_reversed = True

def show(graph):
    '''Small utility to display pygal graphs'''
    return SVG(graph.render())

team_stats = get_team_stats()
team_stats

show(graph_teams_stat_bars(team_stats, 'matches_won_percent'))

show(graph_teams_stat_bars(team_stats, 'podium_score_yearly'))

matches = get_matches(with_team_stats=True,
                      duplicate_with_reversed=duplicate_with_reversed,
                      exclude_ties=exclude_ties)
        
matches

show(graph_matches_results_scatter(matches, 'matches_won_percent', 'matches_won_percent_2'))

show(graph_matches_results_scatter(matches, 'podium_score_yearly', 'podium_score_yearly_2'))

inputs, outputs = extract_samples(matches,
                                  input_features,
                                  output_feature)

normalizer, inputs = normalize(inputs)

train_inputs, train_outputs, test_inputs, test_outputs = split_samples(inputs, outputs)

n = buildNetwork(len(input_features),
                 10 * len(input_features),
                 10 * len(input_features),
                 1,
                 outclass=SigmoidLayer,
                 bias=True)


def neural_result(input):
    """Call the neural network, and translates its output to a match result."""
    n_output = n.activate(input) 
    if n_output >= 0.5:
        return 2
    else:
        return 1
    
def test_network():
    """Calculate train and test sets errors."""
    print (100 - percentError(map(neural_result, train_inputs), train_outputs), 
           100 - percentError(map(neural_result, test_inputs), test_outputs))

train_set = ClassificationDataSet(len(input_features))

for i, input_line in enumerate(train_inputs):
    train_set.addSample(train_inputs[i], [train_outputs[i] - 1])

trainer = BackpropTrainer(n, dataset=train_set, momentum=0.5, weightdecay=0.0)

train_set.assignClasses()

test_network()

for i in range(20):
    trainer.train()
    test_network()

def predict(year, team1, team2):
    inputs = []
    
    for feature in input_features:
        from_team_2 = '_2' in feature
        feature = feature.replace('_2', '')
        
        if feature in team_stats.columns.values:
            team = team2 if from_team_2 else team1
            value = team_stats.loc[team, feature]
        elif feature == 'year':
            value = year
        else:
            raise ValueError("Don't know where to get feature: " + feature)
            
        inputs.append(value)
        
    inputs = normalizer.transform(inputs)
    result = neural_result(inputs)
    
    if result == 0:
        return 'tie'
    elif result == 1:
        return team1
    elif result == 2:
        return team2
    else:
        return 'Unknown result: ' + str(result)

predict(1950, 'Mexico', 'Brazil')  # real result: 4-0 wins Brazil

predict(1990, 'United Arab Emirates', 'Colombia')  # real result: 2-0 wins Colombia


predict(2002, 'South Africa', 'Spain')  # real result: 2-3 wins Spain

predict(2010, 'Japan', 'Cameroon')  # real result: 1-0 wins Japan

predict(2014, 'Argentina', 'Brazil')

predict(2014, 'Spain', 'Haiti')

predict(2014, 'Russia', 'Germany')

predict(2014, 'Russia', 'Russia')