Skip to main content

riscv_emulator/
elf.rs

1//! ELF32 parser and symbol table loader.
2//!
3//! Parses RISC-V 32-bit little-endian ELF files and extracts two things:
4//!
5//! 1. **Loadable segments** ([`ElfImage`]) — `PT_LOAD` program headers copied
6//!    into RAM by [`crate::emulator::Emulator::load_elf`].
7//! 2. **Symbol table** ([`SymbolTable`]) — `STT_FUNC` and `STT_OBJECT` symbols
8//!    used by the disassembler to resolve addresses to names.
9//!
10//! ## Usage
11//!
12//! ```ignore
13//! let data  = std::fs::read("freertos.elf")?;
14//! let image = elf::parse_elf(&data)?;
15//! let syms  = elf::parse_symbol_table(&data)?; // None if stripped
16//! ```
17//!
18//! ## Symbol lookup
19//!
20//! [`SymbolTable`] supports two lookup directions:
21//!
22//! - **Address → symbol**: [`SymbolTable::lookup_addr`] — used by the
23//!   disassembler to annotate branch targets and call destinations.
24//! - **Name → symbol**: [`SymbolTable::lookup_name`] — reserved for future
25//!   breakpoint-by-name support.
26//!
27//! Symbols with `size == 0` (common in hand-written assembly) only match on
28//! an exact address. Symbols with a known `size` match any address inside
29//! `[sym.addr, sym.addr + sym.size)`.
30
31#[derive(Debug)]
32pub enum ElfError {
33    /// File is too short to contain an ELF header.
34    TooShort,
35    /// Magic bytes (`\x7fELF`) do not match.
36    BadMagic,
37    /// ELF class is not 32-bit.
38    Not32Bit,
39    /// ELF data encoding is not little-endian.
40    WrongEndian,
41    /// `e_machine` is not `EM_RISCV` (243).
42    NotRiscV,
43    /// A program header is out of bounds or overflows.
44    InvalidProgramHeader,
45    /// A section header is out of bounds or overflows.
46    InvalidSectionHeader,
47}
48
49impl std::fmt::Display for ElfError {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        match self {
52            ElfError::TooShort => write!(f, "file too short to be ELF"),
53            ElfError::BadMagic => write!(f, "bad ELF magic bytes"),
54            ElfError::Not32Bit => write!(f, "not a 32-bit ELF (EI_CLASS != 1)"),
55            ElfError::WrongEndian => write!(f, "not a little-endian ELF (EI_DATA != 1)"),
56            ElfError::NotRiscV => write!(f, "not a RISC-V ELF (e_machine != 243)"),
57            ElfError::InvalidProgramHeader => write!(f, "invalid or out-of-bounds program header"),
58            ElfError::InvalidSectionHeader => write!(f, "invalid or out-of-bounds section header"),
59        }
60    }
61}
62
63impl std::error::Error for ElfError {}
64
65// ── ELF constants ────────────────────────────────────────────────────────────
66
67const EM_RISCV: u16 = 243;
68const PT_LOAD: u32 = 1;
69const SHT_SYMTAB: u32 = 2; // symbol table section
70
71const STT_OBJECT: u8 = 1; // data / global variable symbol
72const STT_FUNC: u8 = 2; // function symbol
73
74const STB_LOCAL: u8 = 0; // local symbol
75const STB_GLOBAL: u8 = 1; // global symbol
76const STB_WEAK: u8 = 2; // weak symbol
77
78// ── ELF32 on-disk structures (repr(C) for zero-copy parsing) ─────────────────
79
80#[repr(C)]
81struct Elf32Ehdr {
82    e_ident: [u8; 16],
83    e_type: u16,
84    e_machine: u16,
85    e_version: u32,
86    e_entry: u32,
87    e_phoff: u32, // offset da program header table
88    e_shoff: u32, // offset da section header table
89    _e_flags: u32,
90    _e_ehsize: u16,
91    e_phentsize: u16,
92    e_phnum: u16,
93    e_shentsize: u16,
94    e_shnum: u16,
95    e_shstrndx: u16, // index of the section name string table
96}
97
98#[repr(C)]
99struct Elf32Phdr {
100    p_type: u32,
101    p_offset: u32,
102    p_vaddr: u32,
103    _p_paddr: u32,
104    p_filesz: u32,
105    p_memsz: u32,
106    _p_flags: u32,
107    _p_align: u32,
108}
109
110#[repr(C)]
111struct Elf32Shdr {
112    sh_name: u32, // offset do nome na string table
113    sh_type: u32,
114    _sh_flags: u32,
115    _sh_addr: u32,
116    sh_offset: u32, // offset no arquivo
117    sh_size: u32,
118    sh_link: u32, // for SHT_SYMTAB: index of the associated string table
119    _sh_info: u32,
120    _sh_addralign: u32,
121    sh_entsize: u32, // tamanho de cada entrada (para tabelas)
122}
123
124/// One entry in the ELF32 symbol table (`Elf32_Sym`).
125#[repr(C)]
126struct Elf32Sym {
127    st_name: u32,  // offset do nome na string table
128    st_value: u32, // symbol virtual address
129    st_size: u32,  // tamanho em bytes (0 se desconhecido)
130    st_info: u8,   // type (bits 3:0) + binding (bits 7:4)
131    _st_other: u8,
132    _st_shndx: u16,
133}
134
135impl Elf32Sym {
136    fn stt(&self) -> u8 {
137        self.st_info & 0xf
138    } // tipo
139    fn stb(&self) -> u8 {
140        (self.st_info >> 4) & 0xf
141    } // binding
142}
143
144// ── Public types — segments ──────────────────────────────────────────────────
145
146/// A loadable segment (`PT_LOAD`) from an ELF file.
147#[derive(Debug, Clone)]
148pub struct ElfSegment {
149    /// Virtual address where this segment should be loaded.
150    pub vaddr: u32,
151    /// Raw bytes from the ELF file (`p_filesz` bytes).
152    pub data: Vec<u8>,
153    /// Total size in memory (`p_memsz`). Bytes beyond `data.len()` are zero-filled (BSS).
154    pub mem_size: u32,
155}
156
157/// A parsed ELF image ready to be loaded into emulator RAM.
158#[derive(Debug, Clone)]
159pub struct ElfImage {
160    /// Entry point virtual address (`e_entry`).
161    pub entry: u32,
162    /// All `PT_LOAD` segments, in file order.
163    pub segments: Vec<ElfSegment>,
164}
165
166// ── Public types — symbol table ──────────────────────────────────────────────
167
168/// The kind of an ELF symbol, derived from `st_type`.
169#[derive(Debug, Clone, Copy, PartialEq, Eq)]
170pub enum SymbolKind {
171    /// Function symbol (`STT_FUNC`).
172    Func,
173    /// Data / global variable symbol (`STT_OBJECT`).
174    Object,
175}
176
177/// A resolved symbol from the `.symtab` section.
178#[derive(Debug, Clone)]
179pub struct Symbol {
180    /// Symbol name (e.g. `"vTaskStartScheduler"`).
181    pub name: String,
182    /// Virtual address.
183    pub addr: u32,
184    /// Size in bytes. `0` means unknown — common for hand-written assembly.
185    pub size: u32,
186    /// Whether this is a function or a data object.
187    pub kind: SymbolKind,
188}
189
190/// Symbol table extracted from an ELF `.symtab` section.
191///
192/// Internally maintained as two sorted indices for O(log n) lookup in both
193/// directions:
194/// - `by_addr` — sorted by address, used by [`lookup_addr`].
195/// - `by_name` — sorted by name, used by [`lookup_name`].
196///
197/// [`lookup_addr`]: SymbolTable::lookup_addr
198/// [`lookup_name`]: SymbolTable::lookup_name
199#[derive(Debug, Default)]
200pub struct SymbolTable {
201    by_addr: Vec<Symbol>,
202    by_name: Vec<(String, usize)>, // (name, index into by_addr)
203}
204
205impl SymbolTable {
206    /// Returns `true` if the table contains no symbols.
207    pub fn is_empty(&self) -> bool {
208        self.by_addr.is_empty()
209    }
210
211    /// Number of symbols in the table.
212    pub fn len(&self) -> usize {
213        self.by_addr.len()
214    }
215
216    /// Find the symbol that contains `addr`.
217    ///
218    /// Returns the symbol whose range `[sym.addr, sym.addr + sym.size)` covers
219    /// `addr`. If `sym.size == 0`, only an exact address match is accepted.
220    /// Returns `None` if no symbol covers the address.
221    pub fn lookup_addr(&self, addr: u32) -> Option<&Symbol> {
222        // Binary search: count = number of symbols with addr <= target.
223        // If none, there is no candidate below addr.
224        let count = self.by_addr.partition_point(|s| s.addr <= addr);
225        if count == 0 {
226            return None;
227        }
228
229        let sym = &self.by_addr[count - 1];
230
231        if sym.addr == addr {
232            return Some(sym); // exact match
233        }
234
235        // Range match: only valid when size is known and addr falls within it.
236        if sym.size > 0 && addr < sym.addr.saturating_add(sym.size) {
237            Some(sym)
238        } else {
239            None
240        }
241    }
242
243    /// Find a symbol by its exact name.
244    ///
245    /// Returns `None` if no symbol with that name exists.
246    pub fn lookup_name(&self, name: &str) -> Option<&Symbol> {
247        let idx = self
248            .by_name
249            .binary_search_by_key(&name, |(n, _)| n.as_str())
250            .ok()?;
251        let sym_idx = self.by_name[idx].1;
252        Some(&self.by_addr[sym_idx])
253    }
254
255    /// Iterate over all symbols in ascending address order.
256    pub fn iter(&self) -> impl Iterator<Item = &Symbol> {
257        self.by_addr.iter()
258    }
259
260    fn build(mut symbols: Vec<Symbol>) -> Self {
261        // Sort by address for binary search in lookup_addr.
262        symbols.sort_by_key(|s| s.addr);
263
264        // Build name index for binary search in lookup_name.
265        let mut by_name: Vec<(String, usize)> = symbols
266            .iter()
267            .enumerate()
268            .map(|(i, s)| (s.name.clone(), i))
269            .collect();
270        by_name.sort_by(|a, b| a.0.cmp(&b.0));
271
272        SymbolTable {
273            by_addr: symbols,
274            by_name,
275        }
276    }
277}
278
279// ─────────────────────────────────────────────────────────────────────────────
280// Helpers internos de parse
281/// Reinterpret a byte slice as a slice of `repr(C)` structs.
282///
283/// # Safety
284/// The caller must ensure `offset + count * size_of::<T>() <= data.len()`
285/// and that the alignment requirements of `T` are satisfied. ELF32 structs
286/// are guaranteed to meet both conditions when parsed from a valid ELF file.
287unsafe fn read_slice<T>(data: &[u8], offset: usize, count: usize) -> &[T] {
288    std::slice::from_raw_parts(data[offset..].as_ptr() as *const T, count)
289}
290
291/// Read a null-terminated UTF-8 string from a string table at `offset`.
292fn read_cstr(strtab: &[u8], offset: usize) -> &str {
293    let slice = &strtab[offset..];
294    let len = slice.iter().position(|&b| b == 0).unwrap_or(slice.len());
295    std::str::from_utf8(&slice[..len]).unwrap_or("<invalid utf8>")
296}
297
298// ── Public API ───────────────────────────────────────────────────────────────
299
300/// Parse an ELF32 RV32 little-endian file and return its loadable segments.
301///
302/// Only `PT_LOAD` program headers are extracted. Section headers are ignored.
303pub fn parse_elf(data: &[u8]) -> Result<ElfImage, ElfError> {
304    let hdr = parse_header(data)?;
305
306    let segments = parse_segments(data, hdr)?;
307
308    Ok(ElfImage {
309        entry: hdr.e_entry,
310        segments,
311    })
312}
313
314/// Parse the `.symtab` section of an ELF32 RV32 file.
315///
316/// Extracts `STT_FUNC` and `STT_OBJECT` symbols with `global`, `weak`, or
317/// `local` binding. Linker-internal names (starting with `.` or `$`) are
318/// filtered out.
319///
320/// Returns `Ok(None)` if the ELF has no `.symtab` (i.e. it was stripped).
321/// Returns `Err` only if the file is not a valid ELF32 RV32 binary.
322pub fn parse_symbol_table(data: &[u8]) -> Result<Option<SymbolTable>, ElfError> {
323    let hdr = parse_header(data)?;
324
325    let shoff = hdr.e_shoff as usize;
326    let shnum = hdr.e_shnum as usize;
327    let shentsize = hdr.e_shentsize as usize;
328
329    // No section headers — the ELF is stripped or is a relocatable object.
330    if shoff == 0 || shnum == 0 {
331        return Ok(None);
332    }
333
334    let sh_end = shoff
335        .checked_add(
336            shnum
337                .checked_mul(shentsize)
338                .ok_or(ElfError::InvalidSectionHeader)?,
339        )
340        .ok_or(ElfError::InvalidSectionHeader)?;
341
342    if sh_end > data.len() {
343        return Err(ElfError::InvalidSectionHeader);
344    }
345
346    // SAFETY: bounds verificados acima.
347    let shdrs: &[Elf32Shdr] = unsafe { read_slice(data, shoff, shnum) };
348
349    // Find the SHT_SYMTAB section.
350    let symtab_shdr = shdrs.iter().find(|s| s.sh_type == SHT_SYMTAB);
351    let symtab_shdr = match symtab_shdr {
352        Some(s) => s,
353        None => return Ok(None), // ELF stripped
354    };
355
356    // The associated string table is pointed to by sh_link.
357    let strtab_idx = symtab_shdr.sh_link as usize;
358    if strtab_idx >= shnum {
359        return Err(ElfError::InvalidSectionHeader);
360    }
361    let strtab_shdr = &shdrs[strtab_idx];
362
363    // Validate and extract the section data.
364    let sym_offset = symtab_shdr.sh_offset as usize;
365    let sym_size = symtab_shdr.sh_size as usize;
366    let sym_entsize = symtab_shdr.sh_entsize as usize;
367
368    if sym_entsize == 0 {
369        return Err(ElfError::InvalidSectionHeader);
370    }
371
372    let sym_end = sym_offset
373        .checked_add(sym_size)
374        .ok_or(ElfError::InvalidSectionHeader)?;
375    if sym_end > data.len() {
376        return Err(ElfError::InvalidSectionHeader);
377    }
378
379    let str_offset = strtab_shdr.sh_offset as usize;
380    let str_end = str_offset
381        .checked_add(strtab_shdr.sh_size as usize)
382        .ok_or(ElfError::InvalidSectionHeader)?;
383    if str_end > data.len() {
384        return Err(ElfError::InvalidSectionHeader);
385    }
386
387    let strtab = &data[str_offset..str_end];
388
389    let sym_count = sym_size / sym_entsize;
390
391    // SAFETY: bounds verificados acima.
392    let raw_syms: &[Elf32Sym] = unsafe { read_slice(data, sym_offset, sym_count) };
393
394    let mut symbols = Vec::new();
395    for sym in raw_syms {
396        // Filtra por tipo
397        let kind = match sym.stt() {
398            STT_FUNC => SymbolKind::Func,
399            STT_OBJECT => SymbolKind::Object,
400            _ => continue,
401        };
402
403        // Include global, weak, and local bindings. Local functions like
404        // uart_putc are useful for the disassembler.
405        match sym.stb() {
406            STB_GLOBAL | STB_WEAK | STB_LOCAL => {}
407            _ => continue,
408        }
409
410        // Skip undefined symbols (address 0).
411        if sym.st_value == 0 {
412            continue;
413        }
414
415        let name_offset = sym.st_name as usize;
416        if name_offset >= strtab.len() {
417            continue;
418        }
419
420        let name = read_cstr(strtab, name_offset).to_owned();
421        if name.is_empty() {
422            continue;
423        }
424
425        // Filter out linker/compiler noise:
426        //   .Lxxx — local assembler labels
427        //   $x    — ARM/RISC-V mapping symbols
428        if name.starts_with('.') || name.starts_with('$') {
429            continue;
430        }
431
432        symbols.push(Symbol {
433            name,
434            addr: sym.st_value,
435            size: sym.st_size,
436            kind,
437        });
438    }
439
440    if symbols.is_empty() {
441        return Ok(None);
442    }
443
444    Ok(Some(SymbolTable::build(symbols)))
445}
446
447// ── Internal helpers ─────────────────────────────────────────────────────────
448
449/// Validate the ELF header and return a typed reference into the data slice.
450fn parse_header(data: &[u8]) -> Result<&Elf32Ehdr, ElfError> {
451    if data.len() < std::mem::size_of::<Elf32Ehdr>() {
452        return Err(ElfError::TooShort);
453    }
454
455    // SAFETY: tamanho verificado acima.
456    let hdr = unsafe { &*(data.as_ptr() as *const Elf32Ehdr) };
457
458    if &hdr.e_ident[0..4] != b"\x7fELF" {
459        return Err(ElfError::BadMagic);
460    }
461    if hdr.e_ident[4] != 1 {
462        return Err(ElfError::Not32Bit);
463    }
464    if hdr.e_ident[5] != 1 {
465        return Err(ElfError::WrongEndian);
466    }
467    if hdr.e_machine != EM_RISCV {
468        return Err(ElfError::NotRiscV);
469    }
470
471    Ok(hdr)
472}
473
474/// Extract all `PT_LOAD` segments from the ELF file.
475fn parse_segments(data: &[u8], hdr: &Elf32Ehdr) -> Result<Vec<ElfSegment>, ElfError> {
476    let phoff = hdr.e_phoff as usize;
477    let phnum = hdr.e_phnum as usize;
478    let phentsize = hdr.e_phentsize as usize;
479
480    let ph_end = phoff
481        .checked_add(
482            phnum
483                .checked_mul(phentsize)
484                .ok_or(ElfError::InvalidProgramHeader)?,
485        )
486        .ok_or(ElfError::InvalidProgramHeader)?;
487
488    if ph_end > data.len() {
489        return Err(ElfError::InvalidProgramHeader);
490    }
491
492    // SAFETY: bounds verificados acima.
493    let phdrs: &[Elf32Phdr] = unsafe { read_slice(data, phoff, phnum) };
494
495    let mut segments = Vec::new();
496    for ph in phdrs {
497        if ph.p_type != PT_LOAD {
498            continue;
499        }
500
501        let start = ph.p_offset as usize;
502        let end = start
503            .checked_add(ph.p_filesz as usize)
504            .ok_or(ElfError::InvalidProgramHeader)?;
505
506        if end > data.len() {
507            return Err(ElfError::InvalidProgramHeader);
508        }
509
510        segments.push(ElfSegment {
511            vaddr: ph.p_vaddr,
512            data: data[start..end].to_vec(),
513            mem_size: ph.p_memsz,
514        });
515    }
516
517    Ok(segments)
518}