rpw · October 22, 2020 14:24
diff --git a/find_baseaddr.py b/find_baseaddr.py
 # Reverse engineering tricks: 
 # Determine load addresses using differences between string references 
 # (c) 2017 Comsecuris UG

 from idc import *
 from idautils import *
 from sets import Set

 ATTEMPTS = 10

 #############################################################################

 def memory_dwords_as_sorted_list(start, end):
    print("Gathering dwords in memory. This may take a while...")
    values = []
    for a in range(start, end, 4):
        values += [ Dword(a) ]
    values = list(Set(values))
    values.sort()
    return values

 def compute_differences(values):
    differences = [0] * (len(values)-1)
    for i in range(len(values)-1):
        differences[i] = values[i+1] - values[i] 
    return differences

 # Knuth Morris Pratt algorithm to find all subsequences
 # https://www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch05s14.html
 def KMP(text, pattern):
    # ensure we can index into pattern, and also make a copy to protect
    # against changes to 'pattern' while we're suspended by `yield'
    pattern = list(pattern)
    length = len(pattern)
    # build the KMP "table of shift amounts" and name it 'shifts'
    shifts = [1] * (length + 1)
    shift = 1
    for pos, pat in enumerate(pattern):
        while shift <= pos and pat != pattern[pos-shift]:
                shift += shifts[pos-shift]
        shifts[pos+1] = shift
    # perform the actual search
    startPos = 0
    matchLen = 0
    for c in text:
        while matchLen == length or matchLen >= 0 and pattern[matchLen] != c:
                startPos += shifts[matchLen]
                matchLen -= shifts[matchLen]
        matchLen += 1
        if matchLen == length: yield startPos

 def identify_base_addresses(values, strs):
    string_differences = compute_differences(strs)
    differences = compute_differences(values)

    for v in string_differences:
        if v <= 0:
            print("Invalid sequence of strings. Memory addresses have to be in strictly increasing order.")
            return []

    matches = KMP(differences, string_differences)
    baseaddrs = []

    for pos in matches:
        baseaddrs += [(values[pos]-strs[0])]

    return baseaddrs

 def find_adjacent_strings(strs):
    from random import randint
    sample_nr = 6

    for attempt in xrange(0, ATTEMPTS):
        start_off = randint(0, len(strs) - sample_nr)
        matches = []
        print("Looking for adjacent strings (attempt %d at offset %d)" %(attempt + 1, start_off))
        for i in xrange(start_off, len(strs)-1):
            if len(matches) == sample_nr:
                return matches
            if strs[i][0] + strs[i][1] == strs[i+1][0]:
                matches.append(strs[i][0])
            else:
                del matches[:]

    print("No adjacent strings found...")
    return []


 #############################################################################

 strings = []
 found   = False
 ea      = ScreenEA()
 values  = memory_dwords_as_sorted_list(SegStart(ea), SegEnd(ea))
 for s in Strings():
    strings.append((s.ea, s.length))

 baseaddrs = []
 for attempt in xrange(0, ATTEMPTS):
    # lists of start addresses of strings that are consecutively placed in memory
    stringaddrs = find_adjacent_strings(strings)
    if stringaddrs == []: continue
    print("using adjacent strings %s" %(["%x" %(s) for s in stringaddrs]))

    baseaddrs = identify_base_addresses(values, stringaddrs) 
    for baseaddr in baseaddrs:
        print "Identified potential base address for image: %08x" % baseaddr
        found = True

    if found: break

 if len(baseaddrs) == 0:
    print("NO potential base address found, you may try again")
	# Reverse engineering tricks:
	# Determine load addresses using differences between string references
	# (c) 2017 Comsecuris UG

	from idc import *
	from idautils import *
	from sets import Set

	ATTEMPTS = 10

	#############################################################################

	def memory_dwords_as_sorted_list(start, end):
	print("Gathering dwords in memory. This may take a while...")
	values = []
	for a in range(start, end, 4):
	values += [ Dword(a) ]
	values = list(Set(values))
	values.sort()
	return values

	def compute_differences(values):
	differences = [0] * (len(values)-1)
	for i in range(len(values)-1):
	differences[i] = values[i+1] - values[i]
	return differences

	# Knuth Morris Pratt algorithm to find all subsequences
	# https://www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch05s14.html
	def KMP(text, pattern):
	# ensure we can index into pattern, and also make a copy to protect
	# against changes to 'pattern' while we're suspended by `yield'
	pattern = list(pattern)
	length = len(pattern)
	# build the KMP "table of shift amounts" and name it 'shifts'
	shifts = [1] * (length + 1)
	shift = 1
	for pos, pat in enumerate(pattern):
	while shift <= pos and pat != pattern[pos-shift]:
	shift += shifts[pos-shift]
	shifts[pos+1] = shift
	# perform the actual search
	startPos = 0
	matchLen = 0
	for c in text:
	while matchLen == length or matchLen >= 0 and pattern[matchLen] != c:
	startPos += shifts[matchLen]
	matchLen -= shifts[matchLen]
	matchLen += 1
	if matchLen == length: yield startPos

	def identify_base_addresses(values, strs):
	string_differences = compute_differences(strs)
	differences = compute_differences(values)

	for v in string_differences:
	if v <= 0:
	print("Invalid sequence of strings. Memory addresses have to be in strictly increasing order.")
	return []

	matches = KMP(differences, string_differences)
	baseaddrs = []

	for pos in matches:
	baseaddrs += [(values[pos]-strs[0])]

	return baseaddrs

	def find_adjacent_strings(strs):
	from random import randint
	sample_nr = 6

	for attempt in xrange(0, ATTEMPTS):
	start_off = randint(0, len(strs) - sample_nr)
	matches = []
	print("Looking for adjacent strings (attempt %d at offset %d)" %(attempt + 1, start_off))
	for i in xrange(start_off, len(strs)-1):
	if len(matches) == sample_nr:
	return matches
	if strs[i][0] + strs[i][1] == strs[i+1][0]:
	matches.append(strs[i][0])
	else:
	del matches[:]

	print("No adjacent strings found...")
	return []


	#############################################################################

	strings = []
	found = False
	ea = ScreenEA()
	values = memory_dwords_as_sorted_list(SegStart(ea), SegEnd(ea))
	for s in Strings():
	strings.append((s.ea, s.length))

	baseaddrs = []
	for attempt in xrange(0, ATTEMPTS):
	# lists of start addresses of strings that are consecutively placed in memory
	stringaddrs = find_adjacent_strings(strings)
	if stringaddrs == []: continue
	print("using adjacent strings %s" %(["%x" %(s) for s in stringaddrs]))

	baseaddrs = identify_base_addresses(values, stringaddrs)
	for baseaddr in baseaddrs:
	print "Identified potential base address for image: %08x" % baseaddr
	found = True

	if found: break

	if len(baseaddrs) == 0:
	print("NO potential base address found, you may try again")