# First let's import the dataset, using Pandas.
import pandas as pd
train = pd.read_csv("train.csv") # make sure you're in the right directory if using iPython!
test = pd.read_csv("test.csv")
train.head() # ignore the first column, it's how I split the data.
class | petal_length | petal_width | sepal_length | sepal_width | |
---|---|---|---|---|---|
0 | Iris-virginica | 5.5 | 1.8 | 6.4 | 3.1 |
1 | Iris-virginica | 5.9 | 2.3 | 6.8 | 3.2 |
2 | Iris-virginica | 5.4 | 2.3 | 6.2 | 3.4 |
3 | Iris-virginica | 4.8 | 1.8 | 6.0 | 3.0 |
4 | Iris-virginica | 5.1 | 2.3 | 6.9 | 3.1 |
from sklearn.ensemble import RandomForestClassifier
# however, are data has to be in a numpy array in order for the random forest algorithm to except it!
cols = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']
colsRes = ['class']
trainArr = train.as_matrix(cols) # training array
trainRes = train.as_matrix(colsRes) # training results
## Training!
rf = RandomForestClassifier(n_estimators=100) # 100 decision trees is a good enough number
rf.fit(trainArr, trainRes) # finally, we fit the data to the algorithm!!! :)
# note - you might get an warning saying you entered a 2 column vector..ignore it.
/Users/alexwoods/Downloads/ipython-3.2.0/IPython/kernel/__main__.py:16: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False)
## Testing!
# put the test results in the same format!
testArr = test.as_matrix(cols)
results = rf.predict(testArr)
# something I like to do is to add it back to the dataframe, so I can compare side-by-side
test['predictions'] = results
test.head()
class | petal_length | petal_width | sepal_length | sepal_width | predictions | |
---|---|---|---|---|---|---|
0 | Iris-virginica | 6.6 | 2.1 | 7.6 | 3.0 | Iris-virginica |
1 | Iris-virginica | 6.3 | 1.8 | 7.3 | 2.9 | Iris-virginica |
2 | Iris-virginica | 5.5 | 2.1 | 6.8 | 3.0 | Iris-virginica |
3 | Iris-virginica | 5.1 | 2.4 | 5.8 | 2.8 | Iris-virginica |
4 | Iris-virginica | 5.3 | 2.3 | 6.4 | 3.2 | Iris-virginica |