Created
October 22, 2020 14:24
-
-
Save rpw/2c4064712638bce602755a938991e5e9 to your computer and use it in GitHub Desktop.
Find base addresses of flat firmware binaries using differences between string references
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reverse engineering tricks: | |
# Determine load addresses using differences between string references | |
# (c) 2017 Comsecuris UG | |
from idc import * | |
from idautils import * | |
from sets import Set | |
ATTEMPTS = 10 | |
############################################################################# | |
def memory_dwords_as_sorted_list(start, end): | |
print("Gathering dwords in memory. This may take a while...") | |
values = [] | |
for a in range(start, end, 4): | |
values += [ Dword(a) ] | |
values = list(Set(values)) | |
values.sort() | |
return values | |
def compute_differences(values): | |
differences = [0] * (len(values)-1) | |
for i in range(len(values)-1): | |
differences[i] = values[i+1] - values[i] | |
return differences | |
# Knuth Morris Pratt algorithm to find all subsequences | |
# https://www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch05s14.html | |
def KMP(text, pattern): | |
# ensure we can index into pattern, and also make a copy to protect | |
# against changes to 'pattern' while we're suspended by `yield' | |
pattern = list(pattern) | |
length = len(pattern) | |
# build the KMP "table of shift amounts" and name it 'shifts' | |
shifts = [1] * (length + 1) | |
shift = 1 | |
for pos, pat in enumerate(pattern): | |
while shift <= pos and pat != pattern[pos-shift]: | |
shift += shifts[pos-shift] | |
shifts[pos+1] = shift | |
# perform the actual search | |
startPos = 0 | |
matchLen = 0 | |
for c in text: | |
while matchLen == length or matchLen >= 0 and pattern[matchLen] != c: | |
startPos += shifts[matchLen] | |
matchLen -= shifts[matchLen] | |
matchLen += 1 | |
if matchLen == length: yield startPos | |
def identify_base_addresses(values, strs): | |
string_differences = compute_differences(strs) | |
differences = compute_differences(values) | |
for v in string_differences: | |
if v <= 0: | |
print("Invalid sequence of strings. Memory addresses have to be in strictly increasing order.") | |
return [] | |
matches = KMP(differences, string_differences) | |
baseaddrs = [] | |
for pos in matches: | |
baseaddrs += [(values[pos]-strs[0])] | |
return baseaddrs | |
def find_adjacent_strings(strs): | |
from random import randint | |
sample_nr = 6 | |
for attempt in xrange(0, ATTEMPTS): | |
start_off = randint(0, len(strs) - sample_nr) | |
matches = [] | |
print("Looking for adjacent strings (attempt %d at offset %d)" %(attempt + 1, start_off)) | |
for i in xrange(start_off, len(strs)-1): | |
if len(matches) == sample_nr: | |
return matches | |
if strs[i][0] + strs[i][1] == strs[i+1][0]: | |
matches.append(strs[i][0]) | |
else: | |
del matches[:] | |
print("No adjacent strings found...") | |
return [] | |
############################################################################# | |
strings = [] | |
found = False | |
ea = ScreenEA() | |
values = memory_dwords_as_sorted_list(SegStart(ea), SegEnd(ea)) | |
for s in Strings(): | |
strings.append((s.ea, s.length)) | |
baseaddrs = [] | |
for attempt in xrange(0, ATTEMPTS): | |
# lists of start addresses of strings that are consecutively placed in memory | |
stringaddrs = find_adjacent_strings(strings) | |
if stringaddrs == []: continue | |
print("using adjacent strings %s" %(["%x" %(s) for s in stringaddrs])) | |
baseaddrs = identify_base_addresses(values, stringaddrs) | |
for baseaddr in baseaddrs: | |
print "Identified potential base address for image: %08x" % baseaddr | |
found = True | |
if found: break | |
if len(baseaddrs) == 0: | |
print("NO potential base address found, you may try again") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment