Created
October 2, 2017 10:52
-
-
Save Pablo-R/e941e9e1d76c9ce51bd713615910018a to your computer and use it in GitHub Desktop.
Gedcom parser to XML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
require "rexml/document" | |
include REXML | |
xml_string = "<gedcom>" | |
unclosed_tags = ["</gedcom>"] | |
previous_level = -1 | |
ARGF.each_line do |line| | |
next if line =~ (!(/(^([0-9])\s+(@(\w\d)+@|\w+)(\s(.*))?$)/) && /^\s+$/) #Regex parse each line to check if valid | |
line = line.delete("\n").split(/\s+/, 3) | |
current_level = line.shift.to_i | |
tag_or_id, data = line | |
#Add unclosed tags at mid-way loop | |
#cases: when current level == previous level | |
#cases: when current level < previous level | |
(current_level..previous_level).to_a.reverse.each do |idx| | |
xml_string.concat( "\t" * (idx + 1)) if xml_string.slice(xml_string.length - 1) == "\n" | |
xml_string.concat("#{unclosed_tags.shift}\n") | |
end | |
id_attribute = '' | |
if tag_or_id =~ /@.+@/ | |
tag_or_id.delete!("@") | |
id_attribute = " id=\"#{tag_or_id}\"" | |
tag_or_id = data | |
data = '' | |
elsif data =~ /@.+@/ | |
data.delete!("@") | |
data = "<xref>#{data}</xref>" | |
else | |
data ||= '' | |
end | |
if tag_or_id == 'CONC' || tag_or_id == 'CONT' | |
xml_string.concat((tag_or_id == 'CONC' ? " " : "\n")) | |
xml_string.concat(data) | |
current_level -= 1 | |
else | |
xml_string.concat("\n") if current_level > previous_level | |
tag_or_id.downcase! | |
xml_string.concat("\t" * (current_level + 1) + "<#{tag_or_id}#{id_attribute}>#{data}") | |
unclosed_tags.unshift "</#{tag_or_id}>" | |
end | |
#update level for next iteration | |
previous_level = current_level | |
end | |
until unclosed_tags.first.nil? do | |
xml_string.concat("\t" * (unclosed_tags.size) + "#{unclosed_tags.shift}\n") | |
end | |
d = Document.new xml_string | |
formatter = Formatters::Pretty.new(2) | |
formatter.compact = true | |
File.open("gedcom.xml", 'w') do |file| | |
file.puts formatter.write(d.root, "") | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment