Created
June 6, 2021 02:27
-
-
Save nandunbandara/76f114232fd8d6687af3c9a0a0fe2625 to your computer and use it in GitHub Desktop.
MCS2203 - Assignment 1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn.metrics import confusion_matrix | |
from sklearn.metrics import accuracy_score, precision_score | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.svm import SVC | |
# Calculate sensitivity using the confusion matrix | |
def calculate_sensitivity(c): | |
return c[0, 0] / (c[0, 0] + c[0, 1]) | |
# Calculate specificity using the confusion matrix | |
def calculate_specificity(c): | |
return c[1, 1] / (c[1, 0] + c[1, 1]) | |
# print output (scores) | |
def print_result(y_prediction, y_test): | |
print("Accuracy: {0}".format(accuracy_score(y_prediction, y_test))) | |
print("Precision: {0}\n".format(precision_score(y_test, y_prediction, average="macro"))) | |
print("Confusion Matrix:") | |
cm = confusion_matrix(y_prediction, y_test) | |
sensitivity = calculate_sensitivity(cm) | |
specificity = calculate_specificity(cm) | |
print(cm) | |
print("\nSensitivity: {0}".format(sensitivity)) | |
print("Specificity: {0}".format(specificity)) | |
# read datasets. use the openpyxl engine to read from the spreadsheet | |
training_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Training-Data") | |
test_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Test-Data") | |
# preprocess training data | |
# remove the ID column | |
training_data.drop("ID", axis="columns", inplace=True) | |
# remove the last three row of the data set showing stats and NaN | |
training_data.drop(training_data.tail(3).index, inplace=True) | |
# replace '?' with None | |
training_data.replace('?', np.nan, inplace=True) | |
# set average values to None | |
training_data = training_data.fillna(value=training_data.mean()) | |
# preprocess test data | |
# drop ID column | |
test_data.drop("ID", axis="columns", inplace=True) | |
# drop last 4 rows | |
test_data.drop(test_data.tail(4).index, inplace=True) | |
# replace '?' with None and set mean to None | |
test_data.replace('?', np.nan, inplace=True) | |
# test_data.apply(lambda x: x.fillna(x.mean()), axis=0) | |
test_data = test_data.fillna(value=test_data.mean()) | |
# split x (features) and y from the training dataset | |
x_training = training_data.loc[:, 'A1':'A9'] | |
y_training = training_data[["Class"]].values.flatten() | |
# split x (features) and y from the test dataset | |
x_test = test_data.loc[:, 'A1':'A9'] | |
y_test = test_data[["Class"]].values.flatten() | |
# SVC Model | |
print("Training SVC Model") | |
SVC_model = SVC() | |
SVC_model.fit(x_training, y_training) | |
SVC_prediction = SVC_model.predict(x_test) | |
print_result(SVC_prediction, y_test) | |
# KNN model | |
print("\n\nTraining KNN Model") | |
KNN_model = KNeighborsClassifier(n_neighbors=5) | |
KNN_model.fit(x_training, y_training) | |
KNN_prediction = KNN_model.predict(x_test) | |
print_result(KNN_prediction, y_test) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment