Skip to content

Instantly share code, notes, and snippets.

Created March 15, 2022 02:18
Show Gist options
  • Save devanlai/bd2c6b2587e7ee802df8cb72d6e239e9 to your computer and use it in GitHub Desktop.
Save devanlai/bd2c6b2587e7ee802df8cb72d6e239e9 to your computer and use it in GitHub Desktop.
Sample script to parse Zephyr shell command trees from an ELF file
Parse a Zephyr firmware ELF file to extract its statically registered
shell commands using the linker section data structure.
This script requires pyelftools, which can be installed using:
pip install pyelftools
import argparse
import struct
import sys
import elftools
import elftools.elf.elffile
import elftools.elf.sections
from collections import OrderedDict, namedtuple
def extract_symbol(sym_addr, sym_size, section_data, section_addr):
Extracts the byte contents of the named symbol object, looking in
the contents of `section_data`, assuming the data starts at `section_addr`
offset = sym_addr - section_addr
if offset < 0 or (offset + sym_size) > len(section_data):
raise ValueError("Symbol address 0x{:08X} out of bounds [0x{:08X}, 0x{:08X}))".format(
sym_addr, section_addr, section_addr + len(section_data)))
return section_data[offset:offset+sym_size]
def read_string(ptr, lookup_func, max_len=1024, encoding="utf-8"):
count = 0
string_bytes = bytearray()
b = lookup_func(ptr, 1)
ptr += 1
while b != b'\x00':
count += 1
b = lookup_func(ptr, 1)
ptr += 1
if max_len is not None and count > max_len:
raise ValueError("String would exceed maximum expected length")
if encoding is not None:
return bytes(string_bytes).decode(encoding)
return bytes(string_bytes)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Extract statically registered shell command info")
help="Firmware elf file to read")
args = parser.parse_args()
elf = elftools.elf.elffile.ELFFile(args.elf_file)
# Read the symbol table so we can lookup symbols by name
symtab_section = elf.get_section_by_name(".symtab")
def find_symbol(name):
[symbol] = symtab_section.get_symbol_by_name(name)
except (TypeError, ValueError) as e:
raise ValueError
return symbol
# Read the iterable root shell command list section
root_cmds_section = elf.get_section_by_name("shell_root_cmds_sections")
root_cmds_data =
root_cmds_addr = root_cmds_section["sh_addr"]
# Read the general rodata section for everything else
rodata_section = elf.get_section_by_name("rodata")
rodata_data =
rodata_addr = rodata_section["sh_addr"]
def read_shell_root_cmd_bytes(ptr, size):
"Retrieve the `size` bytes stored at `ptr`"
return extract_symbol(ptr, size, root_cmds_data, root_cmds_addr)
def read_rodata_bytes(ptr, size):
"Retrieve the `size` bytes stored at `ptr`"
return extract_symbol(ptr, size, rodata_data, rodata_addr)
# Lookup the array of shell root commands
shell_root_cmd_array_start_symbol = find_symbol("__shell_root_cmds_start")
shell_root_cmd_array_end_symbol = find_symbol("__shell_root_cmds_end")
except ValueError:
sys.stderr.write('Failed to find shell root cmd array symbols\n')
shell_root_cmd_array_start = shell_root_cmd_array_start_symbol["st_value"]
shell_root_cmd_array_end = shell_root_cmd_array_end_symbol["st_value"]
# Walk the array of root shell command entries
ptr = shell_root_cmd_array_start
static_command_pointers = []
while ptr < shell_root_cmd_array_end:
shell_cmd_entry_bytes = read_shell_root_cmd_bytes(ptr, 8)
is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes)
if not is_dynamic:
ptr += 8
# Recursively decode all commands, starting from the root commands
# and traversing all reachable static sub commands
root_commands = []
command_registry = {}
def decode_shell_static_entry(ptr):
if ptr in command_registry:
return command_registry[ptr]
shell_static_entry_bytes = read_rodata_bytes(ptr, 20)
return None
fields = struct.unpack("<IIIIBBxx", shell_static_entry_bytes)
syntax_ptr, help_ptr, subcmd_ptr, handler_func_ptr, num_req_args, num_opt_args = fields
if syntax_ptr == 0:
return None
syntax_string = read_string(syntax_ptr, read_rodata_bytes) if syntax_ptr != 0 else ""
help_string = read_string(help_ptr, read_rodata_bytes) if help_ptr != 0 else ""
sub_commands = []
command_entry = (syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args)
command_registry[ptr] = command_entry
if subcmd_ptr != 0:
shell_cmd_entry_bytes = read_rodata_bytes(subcmd_ptr, 8)
is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes)
if not is_dynamic and entry_ptr != 0:
sub_command = decode_shell_static_entry(entry_ptr)
while sub_command is not None:
entry_ptr += 20
sub_command = decode_shell_static_entry(entry_ptr)
return command_entry
for ptr in static_command_pointers:
root_commands.sort(key = lambda x:x[0])
# Convert to a flattened command list with nesting information
command_list = []
visited = set()
fringe = [(root_command, 0) for root_command in root_commands]
while fringe:
(command, depth) = fringe.pop(0)
(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
for sub_command in sorted(sub_commands, key=lambda x:x[0]):
fringe.insert(0, ((sub_command, depth + 1)))
command_list.append((command, depth))
# Map each command handler to a symbol name if possible
handlers_to_lookup = []
for (command, depth) in command_list:
(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
if handler_func_ptr != 0:
handler_name_table = {}
for symbol in symtab_section.iter_symbols():
if symbol.entry.get("st_value") in handlers_to_lookup:
handler_name_table[symbol.entry.get("st_value")] =
# Look up the source file and line of each command handler if possible
handler_source_table = {}
dwarfinfo = elf.get_dwarf_info()
if dwarfinfo:
for CU in dwarfinfo.iter_CUs():
# First, look at line programs to find the file/line for the address
lineprog = dwarfinfo.line_program_for_CU(CU)
prevstate = None
for entry in lineprog.get_entries():
# We're interested in those entries where a new state is assigned
if entry.state is None:
# Looking for a range of addresses in two consecutive states that
# contain the required address.
for handler_func_ptr in handlers_to_lookup:
if prevstate and prevstate.address <= handler_func_ptr < entry.state.address:
filename = lineprog['file_entry'][prevstate.file - 1].name.decode("utf-8")
line = prevstate.line
handler_source_table[handler_func_ptr] = (filename, line)
if entry.state.end_sequence:
# For the state with `end_sequence`, `address` means the address
# of the first byte after the target machine instruction
# sequence and other information is meaningless. We clear
# prevstate so that it's not used in the next iteration. Address
# info is used in the above comparison to see if we need to use
# the line information for the prevstate.
prevstate = None
prevstate = entry.state
# Display command tree
for (command, depth) in command_list:
(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
if handler_func_ptr != 0:
handler_name = handler_name_table.get(handler_func_ptr)
handler_source = handler_source_table.get(handler_func_ptr)
if handler_name is not None and handler_source is not None:
print("{:s}{:s} - {:s} [{} from {}:{}]".format(" "*depth, syntax_string, help_string, handler_name, handler_source[0], handler_source[1]))
elif handler_name is not None:
print("{:s}{:s} - {:s} [{}]".format(" "*depth, syntax_string, help_string, handler_name))
print("{:s}{:s} - {:s}".format(" "*depth, syntax_string, help_string))
print("{:s}{:s} - {:s}".format(" "*depth, syntax_string, help_string))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment