Skip to content

Instantly share code, notes, and snippets.

@nandunbandara
Created June 6, 2021 02:27
Show Gist options
  • Save nandunbandara/76f114232fd8d6687af3c9a0a0fe2625 to your computer and use it in GitHub Desktop.
Save nandunbandara/76f114232fd8d6687af3c9a0a0fe2625 to your computer and use it in GitHub Desktop.
MCS2203 - Assignment 1
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
# Calculate sensitivity using the confusion matrix
def calculate_sensitivity(c):
return c[0, 0] / (c[0, 0] + c[0, 1])
# Calculate specificity using the confusion matrix
def calculate_specificity(c):
return c[1, 1] / (c[1, 0] + c[1, 1])
# print output (scores)
def print_result(y_prediction, y_test):
print("Accuracy: {0}".format(accuracy_score(y_prediction, y_test)))
print("Precision: {0}\n".format(precision_score(y_test, y_prediction, average="macro")))
print("Confusion Matrix:")
cm = confusion_matrix(y_prediction, y_test)
sensitivity = calculate_sensitivity(cm)
specificity = calculate_specificity(cm)
print(cm)
print("\nSensitivity: {0}".format(sensitivity))
print("Specificity: {0}".format(specificity))
# read datasets. use the openpyxl engine to read from the spreadsheet
training_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Training-Data")
test_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Test-Data")
# preprocess training data
# remove the ID column
training_data.drop("ID", axis="columns", inplace=True)
# remove the last three row of the data set showing stats and NaN
training_data.drop(training_data.tail(3).index, inplace=True)
# replace '?' with None
training_data.replace('?', np.nan, inplace=True)
# set average values to None
training_data = training_data.fillna(value=training_data.mean())
# preprocess test data
# drop ID column
test_data.drop("ID", axis="columns", inplace=True)
# drop last 4 rows
test_data.drop(test_data.tail(4).index, inplace=True)
# replace '?' with None and set mean to None
test_data.replace('?', np.nan, inplace=True)
# test_data.apply(lambda x: x.fillna(x.mean()), axis=0)
test_data = test_data.fillna(value=test_data.mean())
# split x (features) and y from the training dataset
x_training = training_data.loc[:, 'A1':'A9']
y_training = training_data[["Class"]].values.flatten()
# split x (features) and y from the test dataset
x_test = test_data.loc[:, 'A1':'A9']
y_test = test_data[["Class"]].values.flatten()
# SVC Model
print("Training SVC Model")
SVC_model = SVC()
SVC_model.fit(x_training, y_training)
SVC_prediction = SVC_model.predict(x_test)
print_result(SVC_prediction, y_test)
# KNN model
print("\n\nTraining KNN Model")
KNN_model = KNeighborsClassifier(n_neighbors=5)
KNN_model.fit(x_training, y_training)
KNN_prediction = KNN_model.predict(x_test)
print_result(KNN_prediction, y_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment