nandunbandara · June 6, 2021 02:27
diff --git a/MCS2203-A1-19440081.py b/MCS2203-A1-19440081.py
 import numpy as np
 import pandas as pd

 from sklearn.metrics import confusion_matrix
 from sklearn.metrics import accuracy_score, precision_score
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.svm import SVC


 # Calculate sensitivity using the confusion matrix
 def calculate_sensitivity(c):
    return c[0, 0] / (c[0, 0] + c[0, 1])


 # Calculate specificity using the confusion matrix
 def calculate_specificity(c):
    return c[1, 1] / (c[1, 0] + c[1, 1])


 # print output (scores)
 def print_result(y_prediction, y_test):
    print("Accuracy: {0}".format(accuracy_score(y_prediction, y_test)))
    print("Precision: {0}\n".format(precision_score(y_test, y_prediction, average="macro")))
    print("Confusion Matrix:")
    cm = confusion_matrix(y_prediction, y_test)
    sensitivity = calculate_sensitivity(cm)
    specificity = calculate_specificity(cm)
    print(cm)
    print("\nSensitivity: {0}".format(sensitivity))
    print("Specificity: {0}".format(specificity))


 # read datasets. use the openpyxl engine to read from the spreadsheet
 training_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Training-Data")
 test_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Test-Data")

 # preprocess training data
 # remove the ID column
 training_data.drop("ID", axis="columns", inplace=True)

 # remove the last three row of the data set showing stats and NaN
 training_data.drop(training_data.tail(3).index, inplace=True)

 # replace '?' with None
 training_data.replace('?', np.nan, inplace=True)

 # set average values to None
 training_data = training_data.fillna(value=training_data.mean())

 # preprocess test data
 # drop ID column
 test_data.drop("ID", axis="columns", inplace=True)

 # drop last 4 rows
 test_data.drop(test_data.tail(4).index, inplace=True)

 # replace '?' with None and set mean to None
 test_data.replace('?', np.nan, inplace=True)
 # test_data.apply(lambda x: x.fillna(x.mean()), axis=0)
 test_data = test_data.fillna(value=test_data.mean())

 # split x (features) and y from the training dataset
 x_training = training_data.loc[:, 'A1':'A9']
 y_training = training_data[["Class"]].values.flatten()

 # split x (features) and y from the test dataset
 x_test = test_data.loc[:, 'A1':'A9']
 y_test = test_data[["Class"]].values.flatten()

 # SVC Model
 print("Training SVC Model")
 SVC_model = SVC()
 SVC_model.fit(x_training, y_training)
 SVC_prediction = SVC_model.predict(x_test)

 print_result(SVC_prediction, y_test)

 # KNN model
 print("\n\nTraining KNN Model")
 KNN_model = KNeighborsClassifier(n_neighbors=5)
 KNN_model.fit(x_training, y_training)
 KNN_prediction = KNN_model.predict(x_test)

 print_result(KNN_prediction, y_test)
	import numpy as np
	import pandas as pd

	from sklearn.metrics import confusion_matrix
	from sklearn.metrics import accuracy_score, precision_score
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.svm import SVC


	# Calculate sensitivity using the confusion matrix
	def calculate_sensitivity(c):
	return c[0, 0] / (c[0, 0] + c[0, 1])


	# Calculate specificity using the confusion matrix
	def calculate_specificity(c):
	return c[1, 1] / (c[1, 0] + c[1, 1])


	# print output (scores)
	def print_result(y_prediction, y_test):
	print("Accuracy: {0}".format(accuracy_score(y_prediction, y_test)))
	print("Precision: {0}\n".format(precision_score(y_test, y_prediction, average="macro")))
	print("Confusion Matrix:")
	cm = confusion_matrix(y_prediction, y_test)
	sensitivity = calculate_sensitivity(cm)
	specificity = calculate_specificity(cm)
	print(cm)
	print("\nSensitivity: {0}".format(sensitivity))
	print("Specificity: {0}".format(specificity))


	# read datasets. use the openpyxl engine to read from the spreadsheet
	training_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Training-Data")
	test_data = pd.read_excel("./dataset.xlsx", engine="openpyxl", sheet_name="Test-Data")

	# preprocess training data
	# remove the ID column
	training_data.drop("ID", axis="columns", inplace=True)

	# remove the last three row of the data set showing stats and NaN
	training_data.drop(training_data.tail(3).index, inplace=True)

	# replace '?' with None
	training_data.replace('?', np.nan, inplace=True)

	# set average values to None
	training_data = training_data.fillna(value=training_data.mean())

	# preprocess test data
	# drop ID column
	test_data.drop("ID", axis="columns", inplace=True)

	# drop last 4 rows
	test_data.drop(test_data.tail(4).index, inplace=True)

	# replace '?' with None and set mean to None
	test_data.replace('?', np.nan, inplace=True)
	# test_data.apply(lambda x: x.fillna(x.mean()), axis=0)
	test_data = test_data.fillna(value=test_data.mean())

	# split x (features) and y from the training dataset
	x_training = training_data.loc[:, 'A1':'A9']
	y_training = training_data[["Class"]].values.flatten()

	# split x (features) and y from the test dataset
	x_test = test_data.loc[:, 'A1':'A9']
	y_test = test_data[["Class"]].values.flatten()

	# SVC Model
	print("Training SVC Model")
	SVC_model = SVC()
	SVC_model.fit(x_training, y_training)
	SVC_prediction = SVC_model.predict(x_test)

	print_result(SVC_prediction, y_test)

	# KNN model
	print("\n\nTraining KNN Model")
	KNN_model = KNeighborsClassifier(n_neighbors=5)
	KNN_model.fit(x_training, y_training)
	KNN_prediction = KNN_model.predict(x_test)

	print_result(KNN_prediction, y_test)