Created
March 28, 2021 15:06
-
-
Save physikerwelt/21c9ac51ac762a65ad8a32350452c6e6 to your computer and use it in GitHub Desktop.
Can minidom generate invalid XML?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyexpat import ExpatError | |
from xml.dom import minidom | |
def create_xml_sample(text: str) -> str: | |
doc = minidom.Document() | |
root = doc.createElement("root") | |
normal_text = doc.createTextNode(text) | |
root.appendChild(normal_text) | |
doc.appendChild(root) | |
return doc.toprettyxml(indent="\t", encoding="utf-8") | |
if __name__ == '__main__': | |
simple_sample = create_xml_sample("Sample Text") | |
minidom.parseString(simple_sample) | |
html_sample = create_xml_sample("Adää䛢&<> Text") | |
minidom.parseString(html_sample) | |
bib_ref = create_xml_sample("{\x08f 67}") | |
try: | |
minidom.parseString(bib_ref) | |
except ExpatError as e: | |
assert e.code == 4 | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment