#!/usr/bin/env python
# coding: utf-8

# # Project 1
# 
# # Used Vehicle Price Prediction

# ## Introduction
# 
# - 1.2 Million listings scraped from TrueCar.com - Price, Mileage, Make, Model dataset from Kaggle: [data](https://www.kaggle.com/jpayne/852k-used-car-listings)
# - Each observation represents the price of an used car

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd


# In[2]:


data = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/dataTrain_carListings.zip')


# In[3]:


data.head()


# In[4]:


data.shape


# In[5]:


data.Price.describe()


# In[6]:


data.plot(kind='scatter', y='Price', x='Year')


# In[7]:


data.plot(kind='scatter', y='Price', x='Mileage')


# In[8]:


data.columns


# # Exercise P1.1 (50%)
# 
# Develop a machine learning model that predicts the price of the of car using as an input ['Year', 'Mileage', 'State', 'Make', 'Model']
# 
# Submit the prediction of the testing set to Kaggle
# https://www.kaggle.com/c/miia4200-20191-p1-usedcarpriceprediction
# 
# #### Evaluation:
# - 25% - Performance of the model in the Kaggle Private Leaderboard
# - 25% - Notebook explaining the modeling process
# 

# In[2]:


data_test = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/dataTest_carListings.zip', index_col=0)


# In[3]:


data_test.head()


# In[4]:


data_test.shape


# ### Submission example

# In[6]:


import numpy as np


# In[7]:


np.random.seed(42)
y_pred = pd.DataFrame(np.random.rand(data_test.shape[0]) * 75000 + 5000, index=data_test.index, columns=['Price'])


# In[8]:


y_pred.to_csv('test_submission.csv', index_label='ID')


# In[9]:


y_pred.head()


# # Exercise P1.2 (50%)
# 
# Create an API of the model.
# 
# Example:
# ![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img015.PNG)
# 
# #### Evaluation:
# - 40% - API hosted on a cloud service
# - 10% - Show screenshots of the model doing the predictions on the local machine
#