Skip to content

Instantly share code, notes, and snippets.

@tomilov
Forked from yyny/a_minimal_example.c
Created August 1, 2024 19:12
Show Gist options
  • Save tomilov/a1f8f1a495f793ef82b25770df74fee0 to your computer and use it in GitHub Desktop.
Save tomilov/a1f8f1a495f793ef82b25770df74fee0 to your computer and use it in GitHub Desktop.
GDB JIT Interface -- Minimal Example
/* I spent an entire evening getting JIT debug symbols to work in GDB.
* Here is a minimal example to get you started.
* Have fun!
*/
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <elf.h>
#include "jit.h"
#define BUFFER_IMPLEMENTATION
#include "buffer.h"
#define ARRAYSIZE(...) (sizeof(__VA_ARGS__) / sizeof(*(__VA_ARGS__)))
static size_t buf_append_sym(Buffer *buf, Elf64_Sym sym)
{
return buf_append(buf, &sym, sizeof(sym));
}
static Buffer buf_make_executable(Buffer buf)
{
uint8_t *executable = mmap(
NULL,
buf.num_bytes,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0
);
if (executable == MAP_FAILED) {
fprintf(stderr, "Failed to mmap %zu bytes: %s\n", buf.num_bytes, strerror(errno));
exit(EXIT_FAILURE);
}
memcpy(executable, buf.bytes, buf.num_bytes);
if (mprotect(executable, buf.num_bytes, PROT_READ | PROT_EXEC)) {
fprintf(stderr, "Failed to mprotect(%p, %zu, PROT_READ | PROT_EXEC).\n", executable, buf.num_bytes);
exit(EXIT_FAILURE);
}
buf_free(buf);
Buffer result;
result.bytes = executable;
result.num_bytes = buf.num_bytes;
result.max_bytes = 0;
return result;
}
static void buf_free_executable(Buffer buf)
{
if (munmap(buf.bytes, buf.num_bytes) < 0) {
fprintf(stderr, "Failed to munmap(%p, %zu)\n", buf.bytes, buf.num_bytes);
exit(EXIT_FAILURE);
}
}
enum {
/* You can add more sections, like `.rodata` or debug sections */
SECTION_NULL,
SECTION_TEXT,
SECTION_DATA,
SECTION_SYMTAB,
SECTION_STRTAB,
SECTION_SHSTRTAB,
SECTION_COUNT
};
typedef struct JitObject {
/* The elf header. */
Elf64_Ehdr ehdr;
/* We don't need a program header.
* A program header is used to prepare a program for execution,
* but because we are JIT compiling, we prepare the program ourselves.
*/
Elf64_Phdr phdr[0];
/* The section headers that tell GDB about the memory we JIT compiled. */
Elf64_Shdr shdr[SECTION_COUNT];
/* NOTE: You could totally pre-calculate the sizes of these buffers,
* and allocate the entire object up front.
*/
Buffer symtab;
Buffer strtab;
Buffer shstrtab;
} JitObject;
/* Prepare a `JitObject` for adding symbols to. */
JitObject jit_begin(void)
{
JitObject object;
memset(&object, 0x00, sizeof(JitObject));
object.ehdr.e_ident[EI_MAG0] = ELFMAG0;
object.ehdr.e_ident[EI_MAG1] = ELFMAG1;
object.ehdr.e_ident[EI_MAG2] = ELFMAG2;
object.ehdr.e_ident[EI_MAG3] = ELFMAG3;
object.ehdr.e_ident[EI_CLASS] = ELFCLASS64;
object.ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
object.ehdr.e_ident[EI_VERSION] = EV_CURRENT;
object.ehdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
object.ehdr.e_ident[EI_ABIVERSION] = 0;
/* NOTE: `ET_EXEC` will work too, that makes GDB treat `.st_value`s as VMAs. */
object.ehdr.e_type = ET_REL;
object.ehdr.e_machine = EM_X86_64;
object.ehdr.e_version = EV_CURRENT;
/* NOTE: `.e_entry` is completely unused. */
object.ehdr.e_entry = 0x0;
/* NOTE: `readelf` gives a warning if `.e_phoff` is non-zero, but `.e_phnum` is zero.
* Setting this to `offsetof(...)` is otherwise harmless. */
object.ehdr.e_phoff = ARRAYSIZE(object.phdr) ? offsetof(JitObject, phdr) : 0;
/* NOTE: `readelf` gives a warning if `.e_shoff` is non-zero, but `.e_shnum` is zero.
* Setting this to `offsetof(...)` is otherwise harmless. */
object.ehdr.e_shoff = ARRAYSIZE(object.shdr) ? offsetof(JitObject, shdr) : 0;
/* EM_X86_64 doesn't have machine flags. */
object.ehdr.e_flags = 0;
object.ehdr.e_ehsize = sizeof(Elf64_Ehdr);
/* NOTE: `gcc` sets this to zero if `.e_phnum` is zero, so let's do the same. */
object.ehdr.e_phentsize = ARRAYSIZE(object.phdr) ? sizeof(Elf64_Phdr) : 0;
object.ehdr.e_phnum = ARRAYSIZE(object.phdr);
/* NOTE: `gcc` sets this to zero if `.e_shnum` is zero, so let's do the same. */
object.ehdr.e_shentsize = ARRAYSIZE(object.shdr) ? sizeof(Elf64_Shdr) : 0;
object.ehdr.e_shnum = ARRAYSIZE(object.shdr);
object.ehdr.e_shstrndx = SECTION_SHSTRTAB;
/* The NULL symbol, MUST exist as the first symbol. */
buf_append_sym(&object.symtab, (Elf64_Sym){
/* Can be any name. Most tools set this to 0 and place the empty string there. */
.st_name = buf_append_str(&object.strtab, ""),
.st_value = 0,
.st_size = 0,
.st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE), /* = 0 */
.st_other = STV_DEFAULT, /* = 0 */
.st_shndx = 0,
});
return object;
}
/* Finish adding symbols to a `JitObject`, and return the object as a continuous buffer. */
Buffer jit_complete(JitObject object, Buffer text, Buffer data)
{
size_t header_sizes = sizeof(object.ehdr) + sizeof(object.phdr) + sizeof(object.shdr);
size_t symtab_offset = header_sizes;
size_t strtab_offset = symtab_offset + object.symtab.num_bytes;
size_t shstrtab_offset = strtab_offset + object.strtab.num_bytes;
/* SHT_NULL, MUST exist as the first section.
* Can be any name. Most tools set this to 0 and place the empty string there. */
object.shdr[SECTION_NULL].sh_name = buf_append_str(&object.shstrtab, "");
object.shdr[SECTION_NULL].sh_type = SHT_NULL;
object.shdr[SECTION_NULL].sh_flags = 0;
object.shdr[SECTION_NULL].sh_addr = 0;
object.shdr[SECTION_NULL].sh_offset = 0;
object.shdr[SECTION_NULL].sh_size = 0;
object.shdr[SECTION_NULL].sh_link = 0;
object.shdr[SECTION_NULL].sh_info = 0;
object.shdr[SECTION_NULL].sh_addralign = 0;
object.shdr[SECTION_NULL].sh_entsize = 0;
/* .text */
object.shdr[SECTION_TEXT].sh_name = buf_append_str(&object.shstrtab, ".text");
object.shdr[SECTION_TEXT].sh_type = SHT_PROGBITS;
object.shdr[SECTION_TEXT].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
object.shdr[SECTION_TEXT].sh_addr = (uintptr_t)text.bytes;
object.shdr[SECTION_TEXT].sh_offset = 0;
object.shdr[SECTION_TEXT].sh_size = text.num_bytes;
object.shdr[SECTION_TEXT].sh_link = 0;
object.shdr[SECTION_TEXT].sh_info = 0;
object.shdr[SECTION_TEXT].sh_addralign = 1 << 0;
object.shdr[SECTION_TEXT].sh_entsize = 0;
/* .data */
object.shdr[SECTION_DATA].sh_name = buf_append_str(&object.shstrtab, ".data");
object.shdr[SECTION_DATA].sh_type = SHT_PROGBITS;
object.shdr[SECTION_DATA].sh_flags = SHF_ALLOC | SHF_WRITE;
object.shdr[SECTION_DATA].sh_addr = (uintptr_t)data.bytes;
object.shdr[SECTION_DATA].sh_offset = 0;
object.shdr[SECTION_DATA].sh_size = data.num_bytes;
object.shdr[SECTION_DATA].sh_link = 0;
object.shdr[SECTION_DATA].sh_info = 0;
object.shdr[SECTION_DATA].sh_addralign = 1 << 0;
object.shdr[SECTION_DATA].sh_entsize = 0;
/* .symtab */
object.shdr[SECTION_SYMTAB].sh_name = buf_append_str(&object.shstrtab, ".symtab");
object.shdr[SECTION_SYMTAB].sh_type = SHT_SYMTAB;
object.shdr[SECTION_SYMTAB].sh_flags = SHF_ALLOC;
object.shdr[SECTION_SYMTAB].sh_addr = (uintptr_t)object.symtab.bytes;
object.shdr[SECTION_SYMTAB].sh_offset = symtab_offset;
object.shdr[SECTION_SYMTAB].sh_size = object.symtab.num_bytes;
/* NOTE: This can be any `SHT_STRTAB` section. You could re-use `.shstrtab` to save space. Most tools don't. */
object.shdr[SECTION_SYMTAB].sh_link = SECTION_STRTAB;
object.shdr[SECTION_SYMTAB].sh_info = (object.symtab.num_bytes / sizeof(Elf64_Sym));
object.shdr[SECTION_SYMTAB].sh_addralign = 1 << 0;
object.shdr[SECTION_SYMTAB].sh_entsize = sizeof(Elf64_Sym);
/* .strtab */
object.shdr[SECTION_STRTAB].sh_name = buf_append_str(&object.shstrtab, ".strtab");
object.shdr[SECTION_STRTAB].sh_type = SHT_STRTAB;
object.shdr[SECTION_STRTAB].sh_flags = SHF_ALLOC | SHF_STRINGS; /* NOTE: `SHF_STRINGS` is optional. */
object.shdr[SECTION_STRTAB].sh_addr = (uintptr_t)object.strtab.bytes;
object.shdr[SECTION_STRTAB].sh_offset = strtab_offset;
object.shdr[SECTION_STRTAB].sh_size = object.strtab.num_bytes;
object.shdr[SECTION_STRTAB].sh_link = 0;
object.shdr[SECTION_STRTAB].sh_info = 0;
object.shdr[SECTION_STRTAB].sh_addralign = 1 << 0;
/* Because we set `SHF_STRINGS`, this is "the size of each character". */
object.shdr[SECTION_STRTAB].sh_entsize = 1;
/* .shstrtab */
object.shdr[SECTION_SHSTRTAB].sh_name = buf_append_str(&object.shstrtab, ".shstrtab");
object.shdr[SECTION_SHSTRTAB].sh_type = SHT_STRTAB;
object.shdr[SECTION_SHSTRTAB].sh_flags = SHF_ALLOC | SHF_STRINGS; /* NOTE: `SHF_STRINGS` is optional. */
object.shdr[SECTION_SHSTRTAB].sh_addr = (uintptr_t)object.shstrtab.bytes;
object.shdr[SECTION_SHSTRTAB].sh_offset = shstrtab_offset;
object.shdr[SECTION_SHSTRTAB].sh_size = object.shstrtab.num_bytes;
object.shdr[SECTION_SHSTRTAB].sh_link = 0;
object.shdr[SECTION_SHSTRTAB].sh_info = 0;
object.shdr[SECTION_SHSTRTAB].sh_addralign = 1 << 0;
/* Because we set `SHF_STRINGS`, this is "the size of each character". */
object.shdr[SECTION_SHSTRTAB].sh_entsize = 1;
Buffer result = buf_new_with_capacity(
header_sizes + object.symtab.num_bytes + object.strtab.num_bytes + object.shstrtab.num_bytes
);
buf_append(&result, &object.ehdr, sizeof(object.ehdr));
buf_append(&result, &object.phdr, sizeof(object.phdr));
buf_append(&result, &object.shdr, sizeof(object.shdr));
buf_append(&result, object.symtab.bytes, object.symtab.num_bytes);
buf_append(&result, object.strtab.bytes, object.strtab.num_bytes);
buf_append(&result, object.shstrtab.bytes, object.shstrtab.num_bytes);
buf_free(object.shstrtab);
buf_free(object.strtab);
buf_free(object.symtab);
return result;
}
/*
* You can break on this function to step into the JIT code,
* and then print a backtrace to see if the symbols are working.
*/
void jit_run(void (*func)(void))
{
func();
}
int main(void)
{
Buffer data = buf_new();
Buffer text = buf_new();
/* Add some code to run... */
buf_append_str(&data, "Hello, world!");
buf_append_hex(&text, "55"); /* push rbp */
buf_append_hex(&text, "4889e5"); /* mov rbp, rsp */
buf_append_hex(&text, "48bf"); /* mov rdi, msg */
buf_append_addr(&text, (uintptr_t)data.bytes);
buf_append_hex(&text, "48b8"); /* mov rax, puts */
buf_append_addr(&text, (uintptr_t)puts);
buf_append_hex(&text, "ffd0"); /* call rax */
buf_append_hex(&text, "31c0"); /* xor eax, eax */
buf_append_hex(&text, "5d"); /* pop rbp */
buf_append_hex(&text, "c3"); /* ret */
text = buf_make_executable(text);
JitObject object = jit_begin();
/* Add the symbols. */
buf_append_sym(&object.symtab, (Elf64_Sym){
.st_name = buf_append_str(&object.strtab, "jit_main"),
.st_value = 0, /* Offset into `.text` */
.st_size = text.num_bytes, /* Size of the function. MUST be non-zero, or symbol will be unusable. */
.st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), /* A function. */
.st_other = STV_DEFAULT,
.st_shndx = SECTION_TEXT, /* The section index of this symbol (`.text`). */
});
buf_append_sym(&object.symtab, (Elf64_Sym){
.st_name = buf_append_str(&object.strtab, "msg"),
.st_value = 0, /* Offset into `.data`` */
.st_size = data.num_bytes, /* Size of the object. MUST be non-zero, or symbol will be unusable. */
.st_info = ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), /* An object. */
.st_other = STV_DEFAULT,
.st_shndx = SECTION_DATA, /* The section index of this symbol (`.data`). */
});
/* Create the object file in memory for GDB. */
Buffer buf = jit_complete(object, text, data);
{
/* Save the object file to disk.
* Useful for checking the content with `readelf -a jit.o`
* or `objdump -x jit.o` */
FILE *fp = fopen("jit.o", "wb");
if (!fp) {
fprintf(stderr, "Failed to open \"jit.o\": %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
fwrite(buf.bytes, 1, buf.num_bytes, fp);
fclose(fp);
}
static struct jit_code_entry entry;
{
/* Tell GDB about the object file we created. */
/* https://sourceware.org/gdb/current/onlinedocs/gdb.html/Registering-Code.html */
entry.next_entry = NULL;
entry.prev_entry = NULL;
entry.symfile_addr = (void*)buf.bytes;
entry.symfile_size = buf.num_bytes;
__jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
__jit_debug_descriptor.relevant_entry = &entry;
__jit_debug_descriptor.first_entry = &entry;
__jit_debug_register_code();
}
/* Run the code! */
jit_run((void(*)(void))text.bytes);
/* Cleanup */
{
/* https://sourceware.org/gdb/current/onlinedocs/gdb.html/Unregistering-Code.html */
if (entry.next_entry) entry.next_entry->prev_entry = entry.prev_entry;
if (entry.prev_entry) entry.prev_entry->next_entry = entry.next_entry;
entry.next_entry = NULL;
entry.prev_entry = NULL;
__jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
__jit_debug_descriptor.relevant_entry = &entry;
__jit_debug_descriptor.first_entry = NULL;
__jit_debug_register_code();
}
buf_free(buf);
buf_free_executable(text);
buf_free(data);
return 0;
}
#pragma once
#include <stddef.h>
#include <stdint.h>
typedef struct buffer {
uint8_t *bytes;
size_t num_bytes;
size_t max_bytes;
} Buffer;
Buffer buf_new(void);
Buffer buf_new_with_capacity(size_t num_bytes);
void buf_free(Buffer buf);
void buf_grow_to(Buffer *buf, size_t num_bytes);
void buf_grow_by(Buffer *buf, size_t num_bytes);
size_t buf_append(Buffer *buf, const void *bytes, size_t len);
size_t buf_append_byte(Buffer *buf, uint8_t value);
size_t buf_append_half(Buffer *buf, uint16_t value);
size_t buf_append_word(Buffer *buf, uint32_t value);
size_t buf_append_long(Buffer *buf, uint64_t value);
size_t buf_append_addr(Buffer *buf, uintptr_t value);
size_t buf_append_str(Buffer *buf, const char *str);
size_t buf_append_hex(Buffer *buf, const char *str);
#ifdef BUFFER_IMPLEMENTATION
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Buffer buf_new(void)
{
static Buffer result = { NULL, 0, 0 };
return result;
}
Buffer buf_new_with_capacity(size_t num_bytes)
{
Buffer result = buf_new();
buf_grow_to(&result, num_bytes);
return result;
}
void buf_free(Buffer buf)
{
free(buf.bytes);
}
void buf_grow_to(Buffer *buf, size_t num_bytes)
{
if (num_bytes <= buf->max_bytes) return;
size_t max_bytes = 8;
while (num_bytes > max_bytes) max_bytes *= 2;
uint8_t *new_bytes = realloc(buf->bytes, max_bytes);
if (!new_bytes) {
fprintf(stderr, "Failed to allocate %zu bytes.\n", max_bytes);
exit(EXIT_FAILURE);
}
buf->bytes = new_bytes;
buf->max_bytes = max_bytes;
}
void buf_grow_by(Buffer *buf, size_t num_bytes)
{
buf_grow_to(buf, buf->num_bytes + num_bytes);
}
size_t buf_append(Buffer *buf, const void *bytes, size_t len)
{
buf_grow_by(buf, len);
size_t off = buf->num_bytes;
memcpy(&buf->bytes[buf->num_bytes], bytes, len);
buf->num_bytes += len;
return off;
}
size_t buf_append_byte(Buffer *buf, uint8_t value)
{
return buf_append(buf, &value, sizeof(value));
}
size_t buf_append_half(Buffer *buf, uint16_t value)
{
return buf_append(buf, &value, sizeof(value));
}
size_t buf_append_word(Buffer *buf, uint32_t value)
{
return buf_append(buf, &value, sizeof(value));
}
size_t buf_append_long(Buffer *buf, uint64_t value)
{
return buf_append(buf, &value, sizeof(value));
}
size_t buf_append_addr(Buffer *buf, uintptr_t value)
{
return buf_append(buf, &value, sizeof(value));
}
size_t buf_append_str(Buffer *buf, const char *str)
{
return buf_append(buf, str, strlen(str) + 1);
}
size_t buf_append_hex(Buffer *buf, const char *str)
{
size_t off = buf->num_bytes;
while (*str) {
int hi = *str++;
int lo = *str++;
char hexval[3] = { hi, lo, 0 };
if (!isxdigit(hi)) lo = hi;
if (!isxdigit(lo)) {
if (isgraph(lo)) {
fprintf(stderr, "'%c' is not a valid hex digit.\n", lo);
} else {
fprintf(stderr, "'\\x%02x' is not a valid hex digit.\n", lo);
}
exit(EXIT_FAILURE);
}
buf_append_byte(buf, strtoul(hexval, NULL, 16));
}
return off;
}
#endif
/* https://sourceware.org/gdb/current/onlinedocs/gdb.html/Declarations.html#Declarations */
#pragma once
#include <stdint.h>
typedef enum
{
JIT_NOACTION = 0,
JIT_REGISTER_FN,
JIT_UNREGISTER_FN
} jit_actions_t;
struct jit_code_entry
{
struct jit_code_entry *next_entry;
struct jit_code_entry *prev_entry;
const char *symfile_addr;
uint64_t symfile_size;
};
struct jit_descriptor
{
uint32_t version;
/* This type should be jit_actions_t, but we use uint32_t
to be explicit about the bitwidth. */
uint32_t action_flag;
struct jit_code_entry *relevant_entry;
struct jit_code_entry *first_entry;
};
/* GDB puts a breakpoint in this function. */
void __attribute__((noinline)) __jit_debug_register_code() { };
/* Make sure to specify the version statically, because the
debugger may check the version before we can set it. */
struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment