Last active
March 22, 2019 02:33
-
-
Save sparkydogX/651e21f3fd66cda2afc100e9f67d79b2 to your computer and use it in GitHub Desktop.
Get array from confusion matrix image. And save it to xlsx file for manual correction.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import cv2 | |
import pytesseract | |
import numpy as np | |
from os import listdir | |
from os.path import join | |
import pandas | |
import xlsxwriter | |
def getCFMatrix(imgpath, class_num=4, start_position=(40, 128), block_height=95, block_width=93): | |
''' | |
Convert confusion matrix to numpy matrix via ocr. | |
:param imgpath: Confusion matrix file path. | |
:return: Numpy matrix. | |
''' | |
img = cv2.imread(imgpath) | |
ErrorRows = [] | |
CFMatrix = np.zeros([class_num, class_num]) | |
for i in range(class_num): | |
for j in range(class_num): | |
cropped_img = img[start_position[0] + block_height * i:block_height * (i + 1) + start_position[0], | |
start_position[1] + block_width * j:start_position[1] + block_width * (j + 1)] | |
text = pytesseract.image_to_string(cropped_img) | |
CFMatrix[i, j] = float(text) | |
RowSum = CFMatrix.sum(axis=1) | |
for i in range(RowSum.shape[0]): | |
if np.abs(RowSum[i] - 1.0) > 0.00005: | |
ErrorRows.append(i) | |
print('-'*20) | |
print("Error occurs in row {0}".format(i+1)) | |
print(imgpath) | |
return CFMatrix, ErrorRows | |
def func_for_sort_eatract_int_from_file_name(name): | |
return int(name.split('.')[0]) | |
def handleDFentry(frame): | |
assert frame.shape[0] == 1 | |
assert frame.shape[1] == 9 | |
code_location = frame['code_location'].values[0] | |
model_location = frame['model_location'].values[0] | |
if pandas.isnull(model_location): | |
print("Nan value",frame) | |
return np.zeros([4, 4]), None ,[0,1,2,3] | |
log_location = join(code_location, 'logs', model_location[model_location.find('/') + 1:], 'confusion_matrix') | |
cfmLists = list(listdir(log_location)) | |
cfmLists.sort(key=func_for_sort_eatract_int_from_file_name) | |
cfm_file_path = join(log_location, cfmLists[-1]) | |
CFMatrix, ErrIndex = getCFMatrix(cfm_file_path) | |
return CFMatrix, cfm_file_path, ErrIndex | |
if __name__ == '__main__': | |
df = pandas.read_excel('./Experiments.xlsx') | |
# Rename dafaframs header. | |
df.columns = ['version', 'baseline', 'name', 'description', 'accuracy', 'commit', 'code_location', 'model_location', | |
'device'] | |
# frame = df[df['version'] == 'v51'] | |
# Create an new Excel file and add a worksheet. | |
workbook = xlsxwriter.Workbook('Results.xlsx') | |
rows = df.shape[0] | |
for row in range(rows): | |
frame = df[row:row+1] | |
CFMatrix,cfm_file_path, ErrIndex = handleDFentry(frame) | |
error_format = workbook.add_format({'bold': True, 'font_color': 'red'}) | |
worksheet = workbook.add_worksheet(frame['version'].values[0]) | |
for i in range(4): | |
for j in range(4): | |
if i in ErrIndex: | |
worksheet.write(i+1,j+1,CFMatrix[i,j],error_format) | |
else: | |
worksheet.write(i+1, j+1, CFMatrix[i, j]) | |
worksheet.write_formula(i+1,5,'=SUM(B{0}:E{0})'.format(i+2), value=np.sum(CFMatrix[i,:])) | |
if cfm_file_path: | |
worksheet.insert_image('G2', cfm_file_path,{'x_scale': 1.0, 'y_scale': 1.0}) | |
worksheet.write('A1','GT/Pred') | |
worksheet.write('A2','Sit') | |
worksheet.write('A3','Walk') | |
worksheet.write('A4','Ride') | |
worksheet.write('A5','Stand') | |
worksheet.write('B1','Sit') | |
worksheet.write('C1','Walk') | |
worksheet.write('D1','Ride') | |
worksheet.write('E1','Stand') | |
worksheet.write('A7','meanAcc') | |
worksheet.write('F1','Sum') | |
worksheet.write_formula(6,1,'=AVERAGE(B2,C3,D4,E5)',value=np.trace(CFMatrix)/4) | |
worksheet.set_column('A:E', 10) | |
workbook.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment