In [2]:

# First let's import the dataset, using Pandas.
import pandas as pd

train = pd.read_csv("train.csv")    # make sure you're in the right directory if using iPython!
test = pd.read_csv("test.csv") 

train.head()             # ignore the first column, it's how I split the data.

Out[2]:

	class	petal_length	petal_width	sepal_length	sepal_width
0	Iris-virginica	5.5	1.8	6.4	3.1
1	Iris-virginica	5.9	2.3	6.8	3.2
2	Iris-virginica	5.4	2.3	6.2	3.4
3	Iris-virginica	4.8	1.8	6.0	3.0
4	Iris-virginica	5.1	2.3	6.9	3.1

In [3]:

from sklearn.ensemble import RandomForestClassifier

# however, are data has to be in a numpy array in order for the random forest algorithm to except it!
cols = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']
colsRes = ['class']
trainArr = train.as_matrix(cols)    # training array
trainRes = train.as_matrix(colsRes) # training results

## Training!

rf = RandomForestClassifier(n_estimators=100)    # 100 decision trees is a good enough number
rf.fit(trainArr, trainRes)          # finally, we fit the data to the algorithm!!! :)

# note - you might get an warning saying you entered a 2 column vector..ignore it.

/Users/alexwoods/Downloads/ipython-3.2.0/IPython/kernel/__main__.py:16: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().

Out[3]:

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [5]:

## Testing!

# put the test results in the same format!
testArr = test.as_matrix(cols)

results = rf.predict(testArr)

# something I like to do is to add it back to the dataframe, so I can compare side-by-side
test['predictions'] = results
test.head()

Out[5]:

	class	petal_length	petal_width	sepal_length	sepal_width	predictions
0	Iris-virginica	6.6	2.1	7.6	3.0	Iris-virginica
1	Iris-virginica	6.3	1.8	7.3	2.9	Iris-virginica
2	Iris-virginica	5.5	2.1	6.8	3.0	Iris-virginica
3	Iris-virginica	5.1	2.4	5.8	2.8	Iris-virginica
4	Iris-virginica	5.3	2.3	6.4	3.2	Iris-virginica