-
-
Save simonsan/56cd88c674415a33c63265c904610d0f to your computer and use it in GitHub Desktop.
Ghidra script for demangling Rust symbols
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Attempts to demangle all mangled symbols in the current program using the Rust | |
# mangling schemes, and replace the default symbol and function signature | |
# (if applicable) with the demangled symbol. | |
# | |
# License: MIT OR Apache-2.0 | |
#@author Jack Grigg <thestr4d@gmail.com> | |
#@category Symbol | |
from ghidra.app.util.demangler import ( | |
DemanglerOptions, | |
DemangledType, | |
DemangledFunction, | |
DemangledUnknown, | |
) | |
from ghidra.program.model.symbol import SourceType | |
# See src/librustc_codegen_utils/symbol_names/legacy.rs for these mappings. | |
ESCAPE_MAP = { | |
'SP': '@', | |
'BP': '*', | |
'RF': '&', | |
'LT': '<', | |
'GT': '>', | |
'LP': '(', | |
'RP': ')', | |
'C': ',', | |
} | |
def is_rust_hash(s): | |
import string | |
return s.startswith('h') and all([c in string.hexdigits[:16] for c in s[1:]]) | |
# Adapted from the rustc-demangle crate, written by Alex Crichton. | |
# | |
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96 | |
# License: MIT OR Apache-2.0 | |
def demangle_legacy(mangled): | |
if mangled.startswith('_ZN'): | |
inner = mangled[3:] | |
elif mangled.startswith('ZN'): | |
# On Windows, dbghelp strips leading underscores, so we accept 'ZN...E' | |
# form too. | |
inner = mangled[2:] | |
elif mangled.startswith('__ZN'): | |
# On OSX, symbols are prefixed with an extra _ | |
inner = mangled[4:] | |
else: | |
return None | |
# Only works with ASCII text. | |
if any([ord(c) >= 128 for c in inner]): | |
return None | |
# Count the number of elements. | |
elements = 0 | |
chars = iter(inner) | |
c = next(chars) | |
while c != 'E': | |
# Decode an identifier element's length. | |
if not c.isdigit(): | |
return None | |
len = 0 | |
while c.isdigit(): | |
len = 10 * len + int(c) | |
c = next(chars) | |
# `c` already contains the first character of this identifier, skip it and | |
# all the other characters of this identifier, to reach the next element. | |
for _ in range(len): | |
c = next(chars) | |
elements += 1 | |
# Parse the elements. | |
demangled = '' | |
parts = [] | |
for element in range(elements): | |
split = 1 | |
while inner[:split].isdigit(): | |
split += 1 | |
split -= 1 | |
i = int(inner[:split]) | |
rest = inner[split:] | |
inner = rest[i:] | |
rest = rest[:i] | |
# Skip the hash at the end. | |
if element + 1 == elements and is_rust_hash(rest): | |
break | |
if element != 0: | |
demangled += '::' | |
if rest.startswith('_$'): | |
rest = rest[1:] | |
demangled_name = '' | |
while True: | |
if rest.startswith('.'): | |
if next(iter(rest[1:])) == '.': | |
demangled_name += '::' | |
rest = rest[2:] | |
else: | |
demangled_name += '.' | |
rest = rest[1:] | |
elif rest.startswith('$'): | |
try: | |
end = rest[1:].index('$') | |
except ValueError: | |
break | |
escape = rest[1:end + 1] | |
after_escape = rest[end + 2:] | |
unescaped = ESCAPE_MAP.get(escape) | |
if unescaped is not None: | |
demangled_name += unescaped | |
rest = after_escape | |
else: | |
if not escape.startswith('u'): | |
# Invalid escape sequence | |
break | |
digits = escape[1:] | |
# TODO: Confirm all lowercase hex | |
try: | |
c = chr(int(digits, 16)) | |
# TODO: Maybe filter out control codepoints? | |
demangled_name += c | |
rest = after_escape | |
continue | |
except ValueError: | |
# Invalid hex digit, or out of range for unicode codepoint. | |
break | |
else: | |
def find_next(char): | |
try: | |
return rest.index(char) | |
except ValueError: | |
return None | |
i_dollar = find_next('$') | |
i_dot = find_next('.') | |
if i_dollar is not None and i_dot is not None: | |
i = min(i_dollar, i_dot) | |
elif i_dollar is not None: | |
i = i_dollar | |
elif i_dot is not None: | |
i = i_dot | |
else: | |
break | |
demangled_name += rest[:i] | |
rest = rest[i:] | |
demangled_name += rest | |
demangled += demangled_name | |
parts.append((mangled, demangled, demangled_name)) | |
return parts | |
# Adapted from the rustc-demangle crate, written by Alex Crichton. | |
# | |
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96 | |
# License: MIT OR Apache-2.0 | |
def demangle_v0(mangled): | |
# TODO: Port from rustc-demangle | |
return None | |
# Constructs a `DemangledObject` from its parts. | |
# | |
# `fn` must be a subclass of `DemangledObject` with the required constructor. | |
def objectify(fn, namespace, mangled, demangled, demangled_name): | |
result = fn(mangled, demangled, demangled_name) | |
if namespace is not None: | |
result.setNamespace(namespace) | |
return result | |
# Attempts to demangle the given symbol. | |
# | |
# Returns: | |
# - `None` if the symbol couldn't be demangled. | |
# - `(namespace, mangled, demangled, demangled_name)` otherwise, where: | |
# - `namespace` is either a `DemangledObject` or `None` | |
# - `mangled` is the original symbol. | |
# - `demangled` is the fully-demangled symbol. | |
# - `demangled_name` is the last part of the symbol. | |
def demangle(mangled): | |
parts = demangle_legacy(mangled) | |
if not parts: | |
parts = demangle_v0(mangled) | |
if not parts: | |
return None | |
# Handle any namespace parts. | |
namespace = None | |
for part in parts[:-1]: | |
namespace = objectify(DemangledType, namespace, *part) | |
return (namespace,) + parts[-1] | |
symbol_table = currentProgram.getSymbolTable() | |
namespace = currentProgram.getNamespaceManager().getGlobalNamespace() | |
num_demangled = 0 | |
failures = [] | |
for symbol in symbol_table.getSymbols(namespace): | |
if symbol.getSource() == SourceType.DEFAULT: | |
continue | |
addr = symbol.getAddress() | |
name = symbol.getName() | |
demangled = demangle(name) | |
if demangled is not None: | |
# Delete the existing symbol, otherwise we get duplicates. | |
symbol.delete() | |
# Try treating the symbol as a function. | |
try: | |
if objectify(DemangledFunction, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor): | |
num_demangled += 1 | |
else: | |
print('Couldn\'t apply demangling for %s' % name) | |
failures.append(name) | |
except java.lang.IllegalArgumentException: | |
# Not a function. This is probably a static, but treat it as unknown. | |
if objectify(DemangledUnknown, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor): | |
num_demangled += 1 | |
else: | |
print('Couldn\'t apply demangling for %s' % name) | |
failures.append(name) | |
print('Demangled %d names' % num_demangled) | |
if len(failures) > 0: | |
print('Failed to demangle (%d):' % len(failures)) | |
for n in sorted(failures): | |
print('- %s' % n) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment