Created
November 18, 2018 11:28
-
-
Save myrddian/ea84d85e9d88511186a4c0bf4d36b874 to your computer and use it in GitHub Desktop.
Example conversion of XML to JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import io | |
from lxml import etree | |
def read_xml(file_name): | |
in_file = open(file_name,"rb") | |
xml_file = etree.parse(in_file) | |
return xml_file | |
def generate_rterms_dic(xml_element): | |
##Something something darkside | |
entry = [] | |
for rterm in xml_element: | |
dict_term = dict() | |
for rentry in rterm.getchildren(): | |
dict_term[rentry.tag] = rentry.text | |
entry.append(dict_term) | |
return entry | |
def generate_dic_entry(xml_element): | |
terms = dict() | |
for rtags in xml_element.getchildren(): | |
if rtags.tag == "RelatedTerms": | |
terms["RelatedTerms"] = generate_rterms_dic(rtags) | |
else: | |
terms[rtags.tag] = rtags.text | |
return terms | |
def convertXml2Json(file_name,json_filename): | |
tree = read_xml(file_name) | |
tree_root = tree.getroot() | |
json_out = dict() | |
ths_array = [] | |
json_out["thesaurus"] = ths_array | |
for term in tree_root: | |
ths_array.append(generate_dic_entry(term)) | |
with open(json_filename,"w") as write_file: | |
generated_json = json.dumps(json_out, indent=2,sort_keys=False) | |
write_file.write(generated_json) | |
write_file.close() | |
if __name__ == "__main__": | |
convertXml2Json('/Users/myrddian/Projects/MonashGDDS/datawrangling/xml_parsing/xml.xml', "json.dat") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment