fire-eggs · June 5, 2017 23:18
diff --git a/gedcombine.py b/gedcombine.py
 import os, sys

 # NOTE: assuming 'clean' files: level/tag/etc separated by spaces
 # NOTE: assuming 'normal' ids: e.g. Ixxx, where xxx is a number

 dict = {}
 file2_ids = {}

 def file1_ids(line):
    # track largest id values
    if (not line.startswith("0")):
        return
    
    vals = line.split(' ')
    if (len(vals) < 3): # HEAD, TRLR
        return
        
    junk,realid,junk = vals[1].split('@')
    # deal with ids like 'NI03'
    val = ''.join([c for c in realid if c in '1234567890'])    
    
    #print(">>>{}:{}".format(vals[2],realid))

    try:
        global dict
        dict[vals[2][0]] = int(val)
    except:
        pass
        
 def read_file1(path):
    # for first file, keep all but trailer, track largest id value
    with open(path, "r") as f:
        for line in iter(f):
        
            line2 = line.strip()
            file1_ids(line2)
            # skip trailer only
            if (line2 != "0 TRLR"):
                print(line2)

 def newid(oldid):
    if (not oldid[0] in dict):
        dict[oldid[0]] = 0
    val = dict[oldid[0]]
    val = val + 1
    dict[oldid[0]] = val
    return oldid[0] + str(val)
    
 def fixids(line):
    global file2_ids
    
    # no id in line: nothing to do
    if (line.find("@") == -1):
        return line
        
    try:
        p1,oldid,p2 = line.split('@')
    except ValueError:
        return line
    if (len(oldid) == 0):
        return line
    # have we already translated this id?
    if (oldid in file2_ids):
        #print(">>>>>{}:{}".format(oldid, file2_ids[oldid]))
        return "{}@{}@{}".format(p1, file2_ids[oldid],p2)
    else:
        newval = newid(oldid)
        #print(">>>>>{}:{}".format(oldid, newval))
        if (newval == ""):
            print("****FAIL:",oldid)
        file2_ids[oldid] = newval
        return "{}@{}@{}".format(p1, newval,p2)
        
 def read_filen(path):
    global file2_ids
    file2_ids = {}

    # for files 2-n, need to:
    # a) ignore HEAD/SUBM/SUBN/TRLR
    # b) translate any id reference
    seen_indi = False
    with open(path,"r") as f:
        for line in iter(f):
        
            line2 = line.strip()
            
            # skip HEAD/SUBM/SUBN
            seen_indi = seen_indi or (line2[0] == '0' and line2.find("INDI") != -1)
            if (not seen_indi):
                continue
            if (line2 == "0 TRLR"):
                return
            
            line3 = fixids(line2)
            print(line3)
            
 if len(sys.argv) == 1:
    print("Usage: python combine.py <path-to-file>")
    sys.exit(0)

 path = sys.argv[1]
 if (not os.path.isfile(path)):
    print("Not a file!")
    sys.exit(0)

 read_file1(path)
 for i in range(2,len(sys.argv)):
    read_filen(sys.argv[i])

 print("0 TRLR")
	import os, sys

	# NOTE: assuming 'clean' files: level/tag/etc separated by spaces
	# NOTE: assuming 'normal' ids: e.g. Ixxx, where xxx is a number

	dict = {}
	file2_ids = {}

	def file1_ids(line):
	# track largest id values
	if (not line.startswith("0")):
	return

	vals = line.split(' ')
	if (len(vals) < 3): # HEAD, TRLR
	return

	junk,realid,junk = vals[1].split('@')
	# deal with ids like 'NI03'
	val = ''.join([c for c in realid if c in '1234567890'])

	#print(">>>{}:{}".format(vals[2],realid))

	try:
	global dict
	dict[vals[2][0]] = int(val)
	except:
	pass

	def read_file1(path):
	# for first file, keep all but trailer, track largest id value
	with open(path, "r") as f:
	for line in iter(f):

	line2 = line.strip()
	file1_ids(line2)
	# skip trailer only
	if (line2 != "0 TRLR"):
	print(line2)

	def newid(oldid):
	if (not oldid[0] in dict):
	dict[oldid[0]] = 0
	val = dict[oldid[0]]
	val = val + 1
	dict[oldid[0]] = val
	return oldid[0] + str(val)

	def fixids(line):
	global file2_ids

	# no id in line: nothing to do
	if (line.find("@") == -1):
	return line

	try:
	p1,oldid,p2 = line.split('@')
	except ValueError:
	return line
	if (len(oldid) == 0):
	return line
	# have we already translated this id?
	if (oldid in file2_ids):
	#print(">>>>>{}:{}".format(oldid, file2_ids[oldid]))
	return "{}@{}@{}".format(p1, file2_ids[oldid],p2)
	else:
	newval = newid(oldid)
	#print(">>>>>{}:{}".format(oldid, newval))
	if (newval == ""):
	print("****FAIL:",oldid)
	file2_ids[oldid] = newval
	return "{}@{}@{}".format(p1, newval,p2)

	def read_filen(path):
	global file2_ids
	file2_ids = {}

	# for files 2-n, need to:
	# a) ignore HEAD/SUBM/SUBN/TRLR
	# b) translate any id reference
	seen_indi = False
	with open(path,"r") as f:
	for line in iter(f):

	line2 = line.strip()

	# skip HEAD/SUBM/SUBN
	seen_indi = seen_indi or (line2[0] == '0' and line2.find("INDI") != -1)
	if (not seen_indi):
	continue
	if (line2 == "0 TRLR"):
	return

	line3 = fixids(line2)
	print(line3)

	if len(sys.argv) == 1:
	print("Usage: python combine.py <path-to-file>")
	sys.exit(0)

	path = sys.argv[1]
	if (not os.path.isfile(path)):
	print("Not a file!")
	sys.exit(0)

	read_file1(path)
	for i in range(2,len(sys.argv)):
	read_filen(sys.argv[i])

	print("0 TRLR")