benjamindoron · March 4, 2022 17:22
diff --git a/guid_converter_base10.py b/guid_converter_base10.py
 #!/bin/env python

 import os
 import re
 import sys


 compliance_test = re.compile(r"^\t\"[\w\d]+\": \[ [\d, ]+\],$")

 # TODO: Consider offering to parse a directory of inputs
 if len(sys.argv) != 3:
 	print("Usage:", sys.argv[0], "<input> <output>")
 	os._exit(1)


 # Operating on separate files at present; TODO: add feature later
 guidfile_input = open(sys.argv[1], 'r')
 guidfile_data = guidfile_input.read()
 guidfile_output = open(sys.argv[2], 'w')


 # Normalise input for compliance, seems fairly durable against input
 ## Change quotation style
 guidfile_data = guidfile_data.replace('\'', '\"')

 ## Strip comments
 guidfile_data = re.sub(r"\s*# [^,\n]+", "", guidfile_data)

 ## Remove array name from update_edk2_guids.py python output
 guidfile_data = re.sub(r".*([{}]).*", "\\1", guidfile_data)

 ## Normalise JSON definition whitespace
 guidfile_data = re.sub(r"\"\s*([\w\d]+)\s*\"\s*:\s*\[", "\"\\1\": [", guidfile_data.replace('  ', ' '))

 ## Separate nybbles with whitespace
 guidfile_data = re.sub(r"([\[,])0x", "\\1 0x", guidfile_data)

 ## Separate final nybble with whitespace
 guidfile_data = guidfile_data.replace('],', ' ],').replace('  ', ' ')


 # HACK: Initialise word bound workaround
 # - String replacement is many times faster than regex replacement.
 #   While the find-replace-all algorithm may need some work (it's possibly
 #   quadratic time), this is a necessary optimisation of the method
 guidfile_data = guidfile_data.replace(' ', 'start_wordboundHACK_space')
 guidfile_data = guidfile_data.replace(',', 'end_wordboundHACK_comma')


 # Strip word bound and comma delimeter from bytes before tokenising
 for word in guidfile_data.replace('start_wordboundHACK_space', ' ').replace('end_wordboundHACK_comma', ',').replace(',', '').split():
 	try:
 		# Convert singular token and replace workaround
 		guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'end_wordboundHACK_comma']), "".join([' ', str(int(word, 16)), ',']))
 		# Also catch the final nybbles
 		guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'start_wordboundHACK_space']), "".join([' ', str(int(word, 16)), ' ']))
 	except:
 		continue


 # HACK: End word bound workaround
 guidfile_data = guidfile_data.replace('start_wordboundHACK_space', ' ')
 guidfile_data = guidfile_data.replace('end_wordboundHACK_comma', ',')


 for line in guidfile_data.splitlines():
 	if re.match(compliance_test, line) == None:
 		print("The following line fails compliance testing, please validate!")
 		print(line)
 	# NOTE: `re.match()` operates from the beginning of a line
 	if re.search(r"\w*[A-Z]{2,}\w*", line) != None or re.search(r"\"[a-z]*\"", line) != None:
 		print("The following line has potentially problematic capitalisation, please validate!")
 		print(line)


 guidfile_output.write(guidfile_data)

 guidfile_output.close()
 guidfile_input.close()
	#!/bin/env python

	import os
	import re
	import sys


	compliance_test = re.compile(r"^\t\"[\w\d]+\": \[ [\d, ]+\],$")

	# TODO: Consider offering to parse a directory of inputs
	if len(sys.argv) != 3:
	print("Usage:", sys.argv[0], "<input> <output>")
	os._exit(1)


	# Operating on separate files at present; TODO: add feature later
	guidfile_input = open(sys.argv[1], 'r')
	guidfile_data = guidfile_input.read()
	guidfile_output = open(sys.argv[2], 'w')


	# Normalise input for compliance, seems fairly durable against input
	## Change quotation style
	guidfile_data = guidfile_data.replace('\'', '\"')

	## Strip comments
	guidfile_data = re.sub(r"\s*# [^,\n]+", "", guidfile_data)

	## Remove array name from update_edk2_guids.py python output
	guidfile_data = re.sub(r".([{}]).", "\\1", guidfile_data)

	## Normalise JSON definition whitespace
	guidfile_data = re.sub(r"\"\s([\w\d]+)\s\"\s:\s\[", "\"\\1\": [", guidfile_data.replace(' ', ' '))

	## Separate nybbles with whitespace
	guidfile_data = re.sub(r"([\[,])0x", "\\1 0x", guidfile_data)

	## Separate final nybble with whitespace
	guidfile_data = guidfile_data.replace('],', ' ],').replace(' ', ' ')


	# HACK: Initialise word bound workaround
	# - String replacement is many times faster than regex replacement.
	# While the find-replace-all algorithm may need some work (it's possibly
	# quadratic time), this is a necessary optimisation of the method
	guidfile_data = guidfile_data.replace(' ', 'start_wordboundHACK_space')
	guidfile_data = guidfile_data.replace(',', 'end_wordboundHACK_comma')


	# Strip word bound and comma delimeter from bytes before tokenising
	for word in guidfile_data.replace('start_wordboundHACK_space', ' ').replace('end_wordboundHACK_comma', ',').replace(',', '').split():
	try:
	# Convert singular token and replace workaround
	guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'end_wordboundHACK_comma']), "".join([' ', str(int(word, 16)), ',']))
	# Also catch the final nybbles
	guidfile_data = guidfile_data.replace("".join(['start_wordboundHACK_space', word, 'start_wordboundHACK_space']), "".join([' ', str(int(word, 16)), ' ']))
	except:
	continue


	# HACK: End word bound workaround
	guidfile_data = guidfile_data.replace('start_wordboundHACK_space', ' ')
	guidfile_data = guidfile_data.replace('end_wordboundHACK_comma', ',')


	for line in guidfile_data.splitlines():
	if re.match(compliance_test, line) == None:
	print("The following line fails compliance testing, please validate!")
	print(line)
	# NOTE: `re.match()` operates from the beginning of a line
	if re.search(r"\w[A-Z]{2,}\w", line) != None or re.search(r"\"[a-z]*\"", line) != None:
	print("The following line has potentially problematic capitalisation, please validate!")
	print(line)


	guidfile_output.write(guidfile_data)

	guidfile_output.close()
	guidfile_input.close()