[RandomForestDemo]

Random Forest Classifier Demo¶

From scikit learn package import digits dataset

In [2]:

from sklearn.datasets import load_digits

In [3]:

digits_data = load_digits()
X = digits_data['data']
Y = digits_data['target']

In [4]:

X.shape

Out[4]:

(1797, 64)

In [5]:

Y
Y.shape

Out[5]:

(1797,)

In [6]:

import pylab as pl
pl.gray()
pl.matshow(digits_data.images[0])
pl.show()

<matplotlib.figure.Figure at 0xe483af0>

In [7]:

X[0]

Out[7]:

array([  0.,   0.,   5.,  13.,   9.,   1.,   0.,   0.,   0.,   0.,  13.,
        15.,  10.,  15.,   5.,   0.,   0.,   3.,  15.,   2.,   0.,  11.,
         8.,   0.,   0.,   4.,  12.,   0.,   0.,   8.,   8.,   0.,   0.,
         5.,   8.,   0.,   0.,   9.,   8.,   0.,   0.,   4.,  11.,   0.,
         1.,  12.,   7.,   0.,   0.,   2.,  14.,   5.,  10.,  12.,   0.,
         0.,   0.,   0.,   6.,  13.,  10.,   0.,   0.,   0.])

In [8]:

Y[0]

Out[8]:

In [9]:

pl.matshow(digits_data.images[1])
pl.show()

In [10]:

X[1]

Out[10]:

array([  0.,   0.,   0.,  12.,  13.,   5.,   0.,   0.,   0.,   0.,   0.,
        11.,  16.,   9.,   0.,   0.,   0.,   0.,   3.,  15.,  16.,   6.,
         0.,   0.,   0.,   7.,  15.,  16.,  16.,   2.,   0.,   0.,   0.,
         0.,   1.,  16.,  16.,   3.,   0.,   0.,   0.,   0.,   1.,  16.,
        16.,   6.,   0.,   0.,   0.,   0.,   1.,  16.,  16.,   6.,   0.,
         0.,   0.,   0.,   0.,  11.,  16.,  10.,   0.,   0.])

In [11]:

Y[1]

Out[11]:

From the corpus let us create Train and Test Dataset

In [12]:

from sklearn.cross_validation import train_test_split

In [13]:

x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [14]:

x_train.shape

Out[14]:

(1437, 64)

In [15]:

x_test.shape

Out[15]:

(360, 64)

In [24]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=1,criterion="entropy")

clf.fit(x_train,y_train)
predictions = clf.predict(x_train)

In [25]:

predictions

Out[25]:

array([6, 0, 0, ..., 2, 7, 1])

In [26]:

print "Train Accuracy = %f "%(accuracy_score(y_train,predictions)*100)

predictions_test = clf.predict(x_test)

print "Test Accuracy = %f "%(accuracy_score(y_test,predictions_test)*100)

Train Accuracy = 92.414753 
Test Accuracy = 79.722222

In [22]:

In []:

Vail Lab

Tuesday, December 2, 2014

Random Forest using scikit learn in IPython

Random Forest Classifier Demo¶

No comments:

Post a Comment