import pandas as pd
import matplotlib.pyplot as plt
# Dataseti aldığım adres:
# https://a...content-available-to-author-only...i.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
def load(path_test, path_train):
#Kullanacağımız verileri yüklüyoruz
with open(path_test, 'r') as f: testing = pd.read_csv(f)
with open(path_train, 'r') as f: training = pd.read_csv(f)
n_features = testing.shape[1]
X_test = testing.ix[:,:n_features-1]
X_train = training.ix[:,:n_features-1]
y_test = testing.ix[:,n_features-1:].values.ravel()
y_train = training.ix[:,n_features-1:].values.ravel()
return X_train, X_test, y_train, y_test
def peekData(X_train):
# The 'targets' or labels are stored in y. The 'samples' or data is stored in X
print ("Peeking your data...")
fig = plt.figure()
cnt = 0
for col in range(5):
for row in range(10):
plt.subplot(5, 10, cnt + 1)
plt.imshow(X_train.ix[cnt,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')
plt.axis('off')
cnt += 1
fig.set_tight_layout(True)
plt.show()
def drawPredictions(X_train, X_test, y_train, y_test):
fig = plt.figure()
# Make some guesses#Bazı tahminlerin yapılması
y_guess = model.predict(X_test)
num_rows = 10 #Test için kullanılacak satır sayısı
num_cols = 5 #Test için kullanılacak sütun sayısı
index = 0
for col in range(num_cols):
for row in range(num_rows):
plt.subplot(num_cols, num_rows, index + 1)
# 8x8 is the size of the image, 64 pixels #8x8 resimlerin boyutu,64 piksel olacak şekilde
plt.imshow(X_test.ix[index,:].reshape(8,8), cmap=plt.cm.gray_r, interpolation='nearest')
# Doğru tahminlerin etiketleri yeşil renkte
# Yanlış tahminlerin etiketleri kırmızı renkte
fontcolor = 'g' if y_test[index] == y_guess[index] else 'r'
plt.title('Label: %i' % y_guess[index], fontsize=6, color=fontcolor)
plt.axis('off')
index += 1
fig.set_tight_layout(True)
plt.show()
#
# TODO: Pass in the file paths to the .tes and the .tra files
X_train, X_test, y_train, y_test = load('C:\\Users\\Fatih\\Desktop\\Bitirme\\Optical-Recognition-of-Handwritten-Digits\\optdigits.test', 'C:\\Users\\Fatih\\Desktop\\Bitirme\\Optical-Recognition-of-Handwritten-Digits\\optdigits.train')
from sklearn import svm
#
# Get to know your data. It seems its already well organized in
# [n_samples, n_features] form. Our dataset looks like (4389, 784).
# Also your labels are already shaped as [n_samples].
peekData(X_train)
#
# TODO: Create an SVC classifier. Leave C=1, but set gamma to 0.001
# and set the kernel to linear. Then train the model on the training
# data / labels:
print( "Training SVC Classifier...")
#
# .. your code here ..
from sklearn.svm import SVC
model=SVC(kernel='rbf',C=1,gamma=0.001)
model.fit(X_train,y_train)
# TODO: Calculate the score of your SVC against the testing data
print ("Scoring SVC Classifier...")
#
# .. your code here ..
score=model.score(X_test,y_test)
print( "Score:\n", score)
# Visual Confirmation of accuracy
drawPredictions(X_train, X_test, y_train, y_test)
#
# TODO: Print out the TRUE value of the 1000th digit in the test set
# By TRUE value, we mean, the actual provided label for that sample
#
# .. your code here ..
true_1000th_test_value=X_test.loc[1000]
print( "1000th test label: ", true_1000th_test_value)
print(true_1000th_test_value)
#
# TODO: Predict the value of the 1000th digit in the test set.
# Was your model's prediction correct?
# INFO: If you get a warning on your predict line, look at the
# notes from the previous module's labs.
#
# .. your code here ..
guess_1000th_test_value=model.predict(true_1000th_test_value)
print ("1000th test prediction: ", guess_1000th_test_value)
#
# TODO: Use IMSHOW to display the 1000th test image, so you can
# visually check if it was a hard image, or an easy image
#
# .. your code here ..
true_1000th_test_value=true_1000th_test_value.reshape(8,8)
plt.imshow(true_1000th_test_value,cmap=plt.cm.gray_r,interpolation='nearest')
plt.show()
# your code goes here