Skip to content

Instantly share code, notes, and snippets.

@simonsan
Forked from str4d/DemangleRust.py
Created December 29, 2021 18:52
Show Gist options
  • Save simonsan/56cd88c674415a33c63265c904610d0f to your computer and use it in GitHub Desktop.
Save simonsan/56cd88c674415a33c63265c904610d0f to your computer and use it in GitHub Desktop.
Ghidra script for demangling Rust symbols
# Attempts to demangle all mangled symbols in the current program using the Rust
# mangling schemes, and replace the default symbol and function signature
# (if applicable) with the demangled symbol.
#
# License: MIT OR Apache-2.0
#@author Jack Grigg <thestr4d@gmail.com>
#@category Symbol
from ghidra.app.util.demangler import (
DemanglerOptions,
DemangledType,
DemangledFunction,
DemangledUnknown,
)
from ghidra.program.model.symbol import SourceType
# See src/librustc_codegen_utils/symbol_names/legacy.rs for these mappings.
ESCAPE_MAP = {
'SP': '@',
'BP': '*',
'RF': '&',
'LT': '<',
'GT': '>',
'LP': '(',
'RP': ')',
'C': ',',
}
def is_rust_hash(s):
import string
return s.startswith('h') and all([c in string.hexdigits[:16] for c in s[1:]])
# Adapted from the rustc-demangle crate, written by Alex Crichton.
#
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96
# License: MIT OR Apache-2.0
def demangle_legacy(mangled):
if mangled.startswith('_ZN'):
inner = mangled[3:]
elif mangled.startswith('ZN'):
# On Windows, dbghelp strips leading underscores, so we accept 'ZN...E'
# form too.
inner = mangled[2:]
elif mangled.startswith('__ZN'):
# On OSX, symbols are prefixed with an extra _
inner = mangled[4:]
else:
return None
# Only works with ASCII text.
if any([ord(c) >= 128 for c in inner]):
return None
# Count the number of elements.
elements = 0
chars = iter(inner)
c = next(chars)
while c != 'E':
# Decode an identifier element's length.
if not c.isdigit():
return None
len = 0
while c.isdigit():
len = 10 * len + int(c)
c = next(chars)
# `c` already contains the first character of this identifier, skip it and
# all the other characters of this identifier, to reach the next element.
for _ in range(len):
c = next(chars)
elements += 1
# Parse the elements.
demangled = ''
parts = []
for element in range(elements):
split = 1
while inner[:split].isdigit():
split += 1
split -= 1
i = int(inner[:split])
rest = inner[split:]
inner = rest[i:]
rest = rest[:i]
# Skip the hash at the end.
if element + 1 == elements and is_rust_hash(rest):
break
if element != 0:
demangled += '::'
if rest.startswith('_$'):
rest = rest[1:]
demangled_name = ''
while True:
if rest.startswith('.'):
if next(iter(rest[1:])) == '.':
demangled_name += '::'
rest = rest[2:]
else:
demangled_name += '.'
rest = rest[1:]
elif rest.startswith('$'):
try:
end = rest[1:].index('$')
except ValueError:
break
escape = rest[1:end + 1]
after_escape = rest[end + 2:]
unescaped = ESCAPE_MAP.get(escape)
if unescaped is not None:
demangled_name += unescaped
rest = after_escape
else:
if not escape.startswith('u'):
# Invalid escape sequence
break
digits = escape[1:]
# TODO: Confirm all lowercase hex
try:
c = chr(int(digits, 16))
# TODO: Maybe filter out control codepoints?
demangled_name += c
rest = after_escape
continue
except ValueError:
# Invalid hex digit, or out of range for unicode codepoint.
break
else:
def find_next(char):
try:
return rest.index(char)
except ValueError:
return None
i_dollar = find_next('$')
i_dot = find_next('.')
if i_dollar is not None and i_dot is not None:
i = min(i_dollar, i_dot)
elif i_dollar is not None:
i = i_dollar
elif i_dot is not None:
i = i_dot
else:
break
demangled_name += rest[:i]
rest = rest[i:]
demangled_name += rest
demangled += demangled_name
parts.append((mangled, demangled, demangled_name))
return parts
# Adapted from the rustc-demangle crate, written by Alex Crichton.
#
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96
# License: MIT OR Apache-2.0
def demangle_v0(mangled):
# TODO: Port from rustc-demangle
return None
# Constructs a `DemangledObject` from its parts.
#
# `fn` must be a subclass of `DemangledObject` with the required constructor.
def objectify(fn, namespace, mangled, demangled, demangled_name):
result = fn(mangled, demangled, demangled_name)
if namespace is not None:
result.setNamespace(namespace)
return result
# Attempts to demangle the given symbol.
#
# Returns:
# - `None` if the symbol couldn't be demangled.
# - `(namespace, mangled, demangled, demangled_name)` otherwise, where:
# - `namespace` is either a `DemangledObject` or `None`
# - `mangled` is the original symbol.
# - `demangled` is the fully-demangled symbol.
# - `demangled_name` is the last part of the symbol.
def demangle(mangled):
parts = demangle_legacy(mangled)
if not parts:
parts = demangle_v0(mangled)
if not parts:
return None
# Handle any namespace parts.
namespace = None
for part in parts[:-1]:
namespace = objectify(DemangledType, namespace, *part)
return (namespace,) + parts[-1]
symbol_table = currentProgram.getSymbolTable()
namespace = currentProgram.getNamespaceManager().getGlobalNamespace()
num_demangled = 0
failures = []
for symbol in symbol_table.getSymbols(namespace):
if symbol.getSource() == SourceType.DEFAULT:
continue
addr = symbol.getAddress()
name = symbol.getName()
demangled = demangle(name)
if demangled is not None:
# Delete the existing symbol, otherwise we get duplicates.
symbol.delete()
# Try treating the symbol as a function.
try:
if objectify(DemangledFunction, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor):
num_demangled += 1
else:
print('Couldn\'t apply demangling for %s' % name)
failures.append(name)
except java.lang.IllegalArgumentException:
# Not a function. This is probably a static, but treat it as unknown.
if objectify(DemangledUnknown, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor):
num_demangled += 1
else:
print('Couldn\'t apply demangling for %s' % name)
failures.append(name)
print('Demangled %d names' % num_demangled)
if len(failures) > 0:
print('Failed to demangle (%d):' % len(failures))
for n in sorted(failures):
print('- %s' % n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment