#!/usr/bin/env python # coding: utf-8 # # Project 1 # # # Used Vehicle Price Prediction # ## Introduction # # - 1.2 Million listings scraped from TrueCar.com - Price, Mileage, Make, Model dataset from Kaggle: [data](https://www.kaggle.com/jpayne/852k-used-car-listings) # - Each observation represents the price of an used car # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd # In[2]: data = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/dataTrain_carListings.zip') # In[3]: data.head() # In[4]: data.shape # In[5]: data.Price.describe() # In[6]: data.plot(kind='scatter', y='Price', x='Year') # In[7]: data.plot(kind='scatter', y='Price', x='Mileage') # In[8]: data.columns # # Exercise P1.1 (50%) # # Develop a machine learning model that predicts the price of the of car using as an input ['Year', 'Mileage', 'State', 'Make', 'Model'] # # Submit the prediction of the testing set to Kaggle # https://www.kaggle.com/c/miia4200-20191-p1-usedcarpriceprediction # # #### Evaluation: # - 25% - Performance of the model in the Kaggle Private Leaderboard # - 25% - Notebook explaining the modeling process # # In[2]: data_test = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/dataTest_carListings.zip', index_col=0) # In[3]: data_test.head() # In[4]: data_test.shape # ### Submission example # In[6]: import numpy as np # In[7]: np.random.seed(42) y_pred = pd.DataFrame(np.random.rand(data_test.shape[0]) * 75000 + 5000, index=data_test.index, columns=['Price']) # In[8]: y_pred.to_csv('test_submission.csv', index_label='ID') # In[9]: y_pred.head() # # Exercise P1.2 (50%) # # Create an API of the model. # # Example: # ![](https://raw.githubusercontent.com/albahnsen/PracticalMachineLearningClass/master/notebooks/images/img015.PNG) # # #### Evaluation: # - 40% - API hosted on a cloud service # - 10% - Show screenshots of the model doing the predictions on the local machine #