Created
January 13, 2020 09:53
-
-
Save devarshi16/864790d9ee0a1cdfb6b91c5edb1dce6c to your computer and use it in GitHub Desktop.
Convert Form Labeller output json format to NAF data json format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import cv2 | |
import sys | |
textBB_items = ["Printed Key","Written Key","Check Box Key","Comment/Info" ] | |
fieldBB_items = [None,"detectorPrediction","None","Printed Value", "Written Value", "Check Box Value", "Logo", "Signature", "Photograph"] | |
extensions = ['.JPEG','.png','.jpg','.tif','.tiff'] | |
folder_names = ['internal28','internal17','internal7','internal8','internal9'] | |
#folder_names = ['internal7'] | |
for folder in folder_names: | |
files = os.listdir(folder) | |
json_files = [x for x in files if x.split('.')[-1] == 'json'] ############ | |
save_dir = os.path.join(folder) | |
if not os.path.exists(save_dir): | |
os.makedirs(save_dir) | |
for jf in json_files: | |
json_file_path = os.path.join(folder,jf) | |
print ("Reading json:", json_file_path) | |
with open(json_file_path,'r') as f: | |
file_data = json.load(f) | |
data = {} | |
data["textBBs"] = [] | |
data["fieldBBs"] = [] | |
data["pairs"] = [] | |
data["samePairs"] = [] | |
image_file_start = '.'.join(jf.split('.')[:-1]) ################### | |
image_loc = None | |
for ext in extensions: | |
image_loc = os.path.join(folder,image_file_start+ext) | |
if os.path.exists(image_loc): | |
break | |
print ("Image Loc",image_loc) | |
img = cv2.imread(image_loc) | |
h,w,_ = img.shape | |
data["height"] = h | |
data["widhth"] = w | |
for i,item in enumerate(file_data["textBBs"]): | |
new_data = {} | |
if len(item["poly_points"]) != 4: | |
continue | |
elif item["poly_points"][0][0] == item["poly_points"][1][0] and item["poly_points"][1][0] == item["poly_points"][2][0]: | |
continue | |
new_data["poly_points"] = item["poly_points"] | |
item_type = item["type"] | |
if item_type in [None,"None","detectorPrediction","Printed Key","Check Box Key"]: | |
new_data["type"] = "text" | |
elif item_type in ["Written Key","Printed Value","Written Value","Signature"]: | |
new_data["type"] = "field" | |
elif item_type in ["Comment/Info"]: | |
new_data["type"] = "comment" | |
elif item_type in ["Logo", "Photograph"]: | |
new_data["type"] = "graphic" | |
elif item_type in ["Check Box Value"]: | |
new_data["type"] = "fieldCheckBox" | |
#new_data["type"] = item_type | |
new_data["id"] = i | |
if item_type in textBB_items: | |
if item_type == "Written Key": | |
new_data["isBlank"] = 1 | |
else: | |
new_data["isBlank"] = 0 | |
data["textBBs"].append(new_data) | |
elif item_type in fieldBB_items: | |
if item_type == "Signature": | |
new_data["isBlank"] = 4 | |
elif item_type == "Logo": | |
new_data["isBlank"] = 3 | |
elif item_type == "Written Key": | |
new_data["isBlank"] = 0 | |
else: | |
new_data["isBlank"] = 1 | |
data["fieldBBs"].append(new_data) | |
else: | |
print ("Belongs to no group",item_type) | |
with open(os.path.join(save_dir,jf),'w') as new_f: | |
json.dump(data,new_f) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment