#!/usr/bin/env python # coding: utf-8 # # Digit Recognizer using Random Forest # * https://www.kaggle.com/c/digit-recognizer # In[1]: import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split # ## Prepare Data # In[3]: train = pd.read_csv('../input/digit-recognizer/train.csv') test = pd.read_csv('../input/digit-recognizer/test.csv') train.shape, test.shape # In[6]: images = train.iloc[:,1:] labels = train.iloc[:,:1] images.shape, labels.shape, test.shape # In[14]: train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, train_size=0.8, test_size=0.2, random_state=0) train_images.shape, valid_images.shape # ## Build Model # In[15]: clf = RandomForestClassifier(random_state=0) clf.fit(train_images.values, train_labels.values.ravel()) # In[18]: clf.score(valid_images, valid_labels) # ## Submit # In[19]: predictions = clf.predict(test) # In[20]: submissions = pd.DataFrame({ "ImageId": list(range(1, len(predictions)+1)), "Label": predictions}) submissions.to_csv("output.csv", index=False, header=True) # In[ ]: