Skip to content

Instantly share code, notes, and snippets.

@maksverver
Created August 28, 2024 17:35
Show Gist options
  • Save maksverver/7ec9221f163070cbd98f4f38a3932036 to your computer and use it in GitHub Desktop.
Save maksverver/7ec9221f163070cbd98f4f38a3932036 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import io
import libxml2
import xml.sax.xmlreader
from drv_libxml2 import LibXml2Reader
def parse_binary():
source = xml.sax.xmlreader.InputSource()
source.setByteStream(io.BytesIO(b'<?xml version="1.0" encoding="UTF-8"?>\n<root />\n'))
return LibXml2Reader().parse(source)
def parse_text():
source = xml.sax.xmlreader.InputSource()
source.setCharacterStream(io.StringIO('<?xml version="1.0"?>\n<root />\n'))
return LibXml2Reader().parse(source)
# Succeeds (prints "None")
print(parse_binary())
# Fails, and prints:
#
# xmlPythonFileRead: result is not a String
# <unknown>:1:-1: Unknown IO error
#
# None
#
print(parse_text())
# Root cause is that LibXml2Reader.parse() does [1]:
#
# input = libxml2.inputBuffer(source.getByteStream())
#
# but since source.getByteStraem() returns None, this creates an ioWrapper that
# only ever returns -1 [2], triggering the "result is not a String" errror [3].
#
# 1. https://github.com/GNOME/libxml2/blob/master/python/drv_libxml2.py#L149
# 2. https://github.com/GNOME/libxml2/blob/67ff748c3eba93745a09157e11759d09f864492f/python/libxml.py#L73-L74
# 3. https://github.com/GNOME/libxml2/blob/67ff748c3eba93745a09157e11759d09f864492f/python/libxml.c#L333
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment