Skip to main content

riscv_emulator/
disasm.rs

1//! RV32IMA disassembler — objdump-compatible output.
2//!
3//! Converts a 32-bit instruction word into a human-readable string using
4//! ABI register names and common pseudo-instructions. When a [`SymbolTable`]
5//! is provided, branch and jump targets are annotated with symbol names.
6//!
7//! ## Output format
8//!
9//! The format matches GNU objdump (`-M no-aliases` disabled, i.e. pseudos on):
10//!
11//! ```text
12//! 80000014  510010ef  jal     ra, <main>
13//! 80001524  ff010113  addi    sp, sp, -16
14//! 80001528  00112623  sw      ra, 12(sp)
15//! ```
16//!
17//! ## Pseudo-instructions
18//!
19//! The following common pseudos are emitted when the encoding matches:
20//!
21//! | Pseudo | Canonical form |
22//! |--------|---------------|
23//! | `nop` | `addi zero, zero, 0` |
24//! | `mv rd, rs` | `addi rd, rs, 0` |
25//! | `li rd, imm` | `addi rd, zero, imm` |
26//! | `not rd, rs` | `xori rd, rs, -1` |
27//! | `neg rd, rs` | `sub rd, zero, rs` |
28//! | `snez rd, rs` | `sltu rd, zero, rs` |
29//! | `ret` | `jalr zero, 0(ra)` |
30//! | `jr rs` | `jalr zero, 0(rs)` |
31//! | `j offset` | `jal zero, offset` |
32//! | `beqz/bnez/bltz/bgez` | branch with zero as one operand |
33//!
34//! ## Static disassembly
35//!
36//! [`disasm_elf`] disassembles an entire ELF image segment by segment, with
37//! symbol labels printed before each function — equivalent to `objdump -d`.
38
39use crate::elf::{ElfImage, SymbolTable};
40
41// ── ABI register names ───────────────────────────────────────────────────────
42
43const REG_NAMES: [&str; 32] = [
44    "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4",
45    "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4",
46    "t5", "t6",
47];
48
49/// Return the ABI name of register `r` (masked to 5 bits).
50#[inline]
51fn reg(r: u32) -> &'static str {
52    REG_NAMES[(r & 0x1f) as usize]
53}
54
55// ── Immediate decoders (pure functions, same logic as emulator.rs) ────────────
56
57#[inline]
58fn imm_i(ir: u32) -> i32 {
59    let imm = ir >> 20;
60    (imm | if imm & 0x800 != 0 { 0xffff_f000 } else { 0 }) as i32
61}
62#[inline]
63fn imm_s(ir: u32) -> i32 {
64    let imm = ((ir >> 7) & 0x1f) | ((ir & 0xfe00_0000) >> 20);
65    (imm | if imm & 0x800 != 0 { 0xffff_f000 } else { 0 }) as i32
66}
67#[inline]
68fn imm_b(ir: u32) -> i32 {
69    let imm =
70        ((ir & 0xf00) >> 7) | ((ir & 0x7e00_0000) >> 20) | ((ir & 0x80) << 4) | ((ir >> 31) << 12);
71    (imm | if imm & 0x1000 != 0 { 0xffffe000 } else { 0 }) as i32
72}
73#[inline]
74fn imm_j(ir: u32) -> i32 {
75    let imm = ((ir & 0x8000_0000) >> 11)
76        | ((ir & 0x7fe0_0000) >> 20)
77        | ((ir & 0x0010_0000) >> 9)
78        | (ir & 0x000f_f000);
79    (imm | if imm & 0x0010_0000 != 0 {
80        0xffe0_0000
81    } else {
82        0
83    }) as i32
84}
85#[inline]
86fn imm_u(ir: u32) -> i32 {
87    (ir & 0xffff_f000) as i32
88}
89
90// ── Formatting helpers ───────────────────────────────────────────────────────
91
92/// Format a branch/jump target address, annotating it with a symbol name if available.
93///
94/// - Exact match (`addr == sym.addr`): `<name>`
95/// - Inside a symbol's range (`size > 0`): `<name+0xoffset>`
96/// - No match or `size == 0` without exact match: `0x80001234`
97fn fmt_target(addr: u32, syms: Option<&SymbolTable>) -> String {
98    if let Some(sym) = syms.and_then(|t| t.lookup_addr(addr)) {
99        if sym.addr == addr {
100            // Exact match — always show the symbol name.
101            format!("<{}>", sym.name)
102        } else if sym.size > 0 {
103            // Inside a known range — offset is reliable.
104            format!("<{}+0x{:x}>", sym.name, addr - sym.addr)
105        } else {
106            // size == 0: we do not know where the symbol ends;
107            // a numeric address is more honest than a made-up offset.
108            format!("0x{:08x}", addr)
109        }
110    } else {
111        format!("0x{:08x}", addr)
112    }
113}
114
115/// Format a memory operand as `imm(rs)` for load/store instructions.
116#[inline]
117fn mem_operand(rs: u32, imm: i32) -> String {
118    format!("{}({})", imm, reg(rs))
119}
120
121// ── Main disassembler ────────────────────────────────────────────────────────
122
123/// Disassemble one RV32IMA instruction into an objdump-compatible string.
124///
125/// - `ir`   — the 32-bit instruction word.
126/// - `pc`   — virtual address of the instruction, used to compute branch and
127///   jump targets relative to PC.
128/// - `syms` — optional symbol table for resolving target addresses to names.
129///
130/// Returns a string containing the mnemonic and operands, with no leading
131/// address and no trailing newline. Example: `"jal     ra, <vTaskStartScheduler>"`.
132pub fn disassemble(ir: u32, pc: u32, syms: Option<&SymbolTable>) -> String {
133    let rd = (ir >> 7) & 0x1f;
134    let rs1 = (ir >> 15) & 0x1f;
135    let rs2 = (ir >> 20) & 0x1f;
136    let funct3 = (ir >> 12) & 0x7;
137    let funct7 = (ir >> 25) & 0x7f;
138
139    match ir & 0x7f {
140        // ── LUI ──────────────────────────────────────────────────────────────
141        0x37 => {
142            format!("lui     {}, 0x{:x}", reg(rd), (imm_u(ir) as u32) >> 12)
143        }
144
145        // ── AUIPC ────────────────────────────────────────────────────────────
146        0x17 => {
147            format!("auipc   {}, 0x{:x}", reg(rd), (imm_u(ir) as u32) >> 12)
148        }
149
150        // ── JAL ──────────────────────────────────────────────────────────────
151        0x6f => {
152            let target = pc.wrapping_add(imm_j(ir) as u32);
153            let sym = fmt_target(target, syms);
154            if rd == 0 {
155                format!("j       {}", sym) // pseudo: j offset
156            } else if rd == 1 {
157                format!("jal     ra, {}", sym) // pseudo: call / jal ra
158            } else {
159                format!("jal     {}, {}", reg(rd), sym)
160            }
161        }
162
163        // ── JALR ─────────────────────────────────────────────────────────────
164        0x67 => {
165            let imm = imm_i(ir);
166            // ret: jalr zero, 0(ra)
167            if rd == 0 && rs1 == 1 && imm == 0 {
168                return "ret".to_string();
169            }
170            // jr rs: jalr zero, 0(rs)
171            if rd == 0 && imm == 0 {
172                return format!("jr      {}", reg(rs1));
173            }
174            format!("jalr    {}, {}({})", reg(rd), imm, reg(rs1))
175        }
176
177        // ── Branches ─────────────────────────────────────────────────────────
178        0x63 => {
179            let target = pc.wrapping_add(imm_b(ir) as u32);
180            let sym = fmt_target(target, syms);
181            let mnem = match funct3 {
182                0 => "beq",
183                1 => "bne",
184                4 => "blt",
185                5 => "bge",
186                6 => "bltu",
187                7 => "bgeu",
188                _ => return format!("unknown.branch funct3={}", funct3),
189            };
190            // Pseudos: beqz / bnez / bltz / bgez / blez / bgtz
191            if rs2 == 0 {
192                let pseudo = match funct3 {
193                    0 => Some("beqz"),
194                    1 => Some("bnez"),
195                    4 => Some("bltz"),
196                    5 => Some("bgez"),
197                    _ => None,
198                };
199                if let Some(p) = pseudo {
200                    return format!("{:<8}{}, {}", p, reg(rs1), sym);
201                }
202            }
203            if rs1 == 0 {
204                let pseudo = match funct3 {
205                    4 => Some("bgtz"), // blt zero, rs2 → bgtz rs2
206                    5 => Some("blez"), // bge zero, rs2 → blez rs2
207                    _ => None,
208                };
209                if let Some(p) = pseudo {
210                    return format!("{:<8}{}, {}", p, reg(rs2), sym);
211                }
212            }
213            format!("{:<8}{}, {}, {}", mnem, reg(rs1), reg(rs2), sym)
214        }
215
216        // ── Loads ────────────────────────────────────────────────────────────
217        0x03 => {
218            let imm = imm_i(ir);
219            let mnem = match funct3 {
220                0 => "lb",
221                1 => "lh",
222                2 => "lw",
223                4 => "lbu",
224                5 => "lhu",
225                _ => return format!("unknown.load funct3={}", funct3),
226            };
227            format!("{:<8}{}, {}", mnem, reg(rd), mem_operand(rs1, imm))
228        }
229
230        // ── Stores ───────────────────────────────────────────────────────────
231        0x23 => {
232            let imm = imm_s(ir);
233            let mnem = match funct3 {
234                0 => "sb",
235                1 => "sh",
236                2 => "sw",
237                _ => return format!("unknown.store funct3={}", funct3),
238            };
239            format!("{:<8}{}, {}", mnem, reg(rs2), mem_operand(rs1, imm))
240        }
241
242        // ── OP-IMM (0x13) ────────────────────────────────────────────────────
243        0x13 => {
244            let imm = imm_i(ir);
245            let shamt = (ir >> 20) & 0x1f;
246            match funct3 {
247                0 => {
248                    // nop: addi zero, zero, 0
249                    if rd == 0 && rs1 == 0 && imm == 0 {
250                        return "nop".to_string();
251                    }
252                    // mv rd, rs: addi rd, rs, 0
253                    if imm == 0 {
254                        return format!("mv      {}, {}", reg(rd), reg(rs1));
255                    }
256                    // li rd, imm: addi rd, zero, imm
257                    if rs1 == 0 {
258                        return format!("li      {}, {}", reg(rd), imm);
259                    }
260                    format!("addi    {}, {}, {}", reg(rd), reg(rs1), imm)
261                }
262                1 => format!("slli    {}, {}, {}", reg(rd), reg(rs1), shamt),
263                2 => format!("slti    {}, {}, {}", reg(rd), reg(rs1), imm),
264                3 => format!("sltiu   {}, {}, {}", reg(rd), reg(rs1), imm as u32),
265                4 => {
266                    // not rd, rs: xori rd, rs, -1
267                    if imm == -1 {
268                        return format!("not     {}, {}", reg(rd), reg(rs1));
269                    }
270                    format!("xori    {}, {}, {}", reg(rd), reg(rs1), imm)
271                }
272                5 => {
273                    if funct7 == 0x20 {
274                        format!("srai    {}, {}, {}", reg(rd), reg(rs1), shamt)
275                    } else {
276                        format!("srli    {}, {}, {}", reg(rd), reg(rs1), shamt)
277                    }
278                }
279                6 => format!("ori     {}, {}, {}", reg(rd), reg(rs1), imm),
280                7 => format!("andi    {}, {}, {}", reg(rd), reg(rs1), imm),
281                _ => format!("unknown.op-imm funct3={}", funct3),
282            }
283        }
284
285        // ── OP (0x33) — RV32I + RV32M ────────────────────────────────────────
286        0x33 => {
287            if funct7 == 0x01 {
288                // RV32M
289                let mnem = match funct3 {
290                    0 => "mul",
291                    1 => "mulh",
292                    2 => "mulhsu",
293                    3 => "mulhu",
294                    4 => "div",
295                    5 => "divu",
296                    6 => "rem",
297                    7 => "remu",
298                    _ => return format!("unknown.m funct3={}", funct3),
299                };
300                return format!("{:<8}{}, {}, {}", mnem, reg(rd), reg(rs1), reg(rs2));
301            }
302            match funct3 {
303                0 => {
304                    if funct7 == 0x20 {
305                        // neg rd, rs: sub rd, zero, rs
306                        if rs1 == 0 {
307                            return format!("neg     {}, {}", reg(rd), reg(rs2));
308                        }
309                        format!("sub     {}, {}, {}", reg(rd), reg(rs1), reg(rs2))
310                    } else {
311                        format!("add     {}, {}, {}", reg(rd), reg(rs1), reg(rs2))
312                    }
313                }
314                1 => format!("sll     {}, {}, {}", reg(rd), reg(rs1), reg(rs2)),
315                2 => {
316                    // seqz is not a standard pseudo here; emit canonical form.
317                    format!("slt     {}, {}, {}", reg(rd), reg(rs1), reg(rs2))
318                }
319                3 => {
320                    // snez rd, rs: sltu rd, zero, rs
321                    if rs1 == 0 {
322                        return format!("snez    {}, {}", reg(rd), reg(rs2));
323                    }
324                    format!("sltu    {}, {}, {}", reg(rd), reg(rs1), reg(rs2))
325                }
326                4 => format!("xor     {}, {}, {}", reg(rd), reg(rs1), reg(rs2)),
327                5 => {
328                    if funct7 == 0x20 {
329                        format!("sra     {}, {}, {}", reg(rd), reg(rs1), reg(rs2))
330                    } else {
331                        format!("srl     {}, {}, {}", reg(rd), reg(rs1), reg(rs2))
332                    }
333                }
334                6 => format!("or      {}, {}, {}", reg(rd), reg(rs1), reg(rs2)),
335                7 => format!("and     {}, {}, {}", reg(rd), reg(rs1), reg(rs2)),
336                _ => format!("unknown.op funct3={}", funct3),
337            }
338        }
339
340        // ── FENCE ────────────────────────────────────────────────────────────
341        0x0f => "fence".to_string(),
342
343        // ── SYSTEM ───────────────────────────────────────────────────────────
344        0x73 => {
345            let csrno = ir >> 20;
346            let microop = funct3;
347            if microop == 0 {
348                return match csrno {
349                    0x000 => "ecall".to_string(),
350                    0x001 => "ebreak".to_string(),
351                    0x302 => "mret".to_string(),
352                    0x105 => "wfi".to_string(),
353                    _ => format!("system  0x{:03x}", csrno),
354                };
355            }
356            // Zicsr
357            let csr_name = csr_name(csrno);
358            let zimm = rs1; // rs1 field used as imm on I variants
359            match microop {
360                1 => {
361                    // csrw csr, rs: csrrw zero, csr, rs (rd == zero, discards reading)
362                    if rd == 0 {
363                        return format!("csrw    {}, {}", csr_name, reg(rs1));
364                    }
365                    format!("csrrw   {}, {}, {}", reg(rd), csr_name, reg(rs1))
366                }
367                2 => {
368                    // csrs csr, rs: csrrs zero, csr, rs
369                    if rd == 0 {
370                        return format!("csrs    {}, {}", csr_name, reg(rs1));
371                    }
372                    // csrr rd, csr: csrrs rd, csr, zero (rs1 == zero)
373                    if rs1 == 0 {
374                        return format!("csrr    {}, {}", reg(rd), csr_name);
375                    }
376                    format!("csrrs   {}, {}, {}", reg(rd), csr_name, reg(rs1))
377                }
378                3 => {
379                    if rd == 0 {
380                        return format!("csrc    {}, {}", csr_name, reg(rs1));
381                    }
382                    format!("csrrc   {}, {}, {}", reg(rd), csr_name, reg(rs1))
383                }
384                5 => {
385                    if rd == 0 {
386                        return format!("csrwi   {}, {}", csr_name, zimm);
387                    }
388                    format!("csrrwi  {}, {}, {}", reg(rd), csr_name, zimm)
389                }
390                6 => {
391                    if rd == 0 {
392                        return format!("csrsi   {}, {}", csr_name, zimm);
393                    }
394                    format!("csrrsi  {}, {}, {}", reg(rd), csr_name, zimm)
395                }
396                7 => {
397                    if rd == 0 {
398                        return format!("csrci   {}, {}", csr_name, zimm);
399                    }
400                    format!("csrrci  {}, {}, {}", reg(rd), csr_name, zimm)
401                }
402                _ => format!("unknown.csr microop={}", microop),
403            }
404        }
405
406        // ── RV32A ────────────────────────────────────────────────────────────
407        0x2f => {
408            let funct5 = (ir >> 27) & 0x1f;
409            let aq = (ir >> 26) & 1;
410            let rl = (ir >> 25) & 1;
411            let order = match (aq, rl) {
412                (1, 1) => ".aqrl",
413                (1, 0) => ".aq",
414                (0, 1) => ".rl",
415                _ => "",
416            };
417            match funct5 {
418                0b00010 => format!("lr.w{}   {}, ({})", order, reg(rd), reg(rs1)),
419                0b00011 => format!("sc.w{}   {}, {}, ({})", order, reg(rd), reg(rs2), reg(rs1)),
420                0b00001 => format!(
421                    "amoswap.w{} {}, {}, ({})",
422                    order,
423                    reg(rd),
424                    reg(rs2),
425                    reg(rs1)
426                ),
427                0b00000 => format!(
428                    "amoadd.w{} {}, {}, ({})",
429                    order,
430                    reg(rd),
431                    reg(rs2),
432                    reg(rs1)
433                ),
434                0b00100 => format!(
435                    "amoxor.w{} {}, {}, ({})",
436                    order,
437                    reg(rd),
438                    reg(rs2),
439                    reg(rs1)
440                ),
441                0b01100 => format!(
442                    "amoand.w{} {}, {}, ({})",
443                    order,
444                    reg(rd),
445                    reg(rs2),
446                    reg(rs1)
447                ),
448                0b01000 => format!(
449                    "amoor.w{}  {}, {}, ({})",
450                    order,
451                    reg(rd),
452                    reg(rs2),
453                    reg(rs1)
454                ),
455                0b10000 => format!(
456                    "amomin.w{} {}, {}, ({})",
457                    order,
458                    reg(rd),
459                    reg(rs2),
460                    reg(rs1)
461                ),
462                0b10100 => format!(
463                    "amomax.w{} {}, {}, ({})",
464                    order,
465                    reg(rd),
466                    reg(rs2),
467                    reg(rs1)
468                ),
469                0b11000 => format!(
470                    "amominu.w{} {}, {}, ({})",
471                    order,
472                    reg(rd),
473                    reg(rs2),
474                    reg(rs1)
475                ),
476                0b11100 => format!(
477                    "amomaxu.w{} {}, {}, ({})",
478                    order,
479                    reg(rd),
480                    reg(rs2),
481                    reg(rs1)
482                ),
483                _ => format!("unknown.amo funct5=0x{:02x}", funct5),
484            }
485        }
486
487        _ => format!("unknown  0x{:08x}", ir),
488    }
489}
490
491// ── CSR names ────────────────────────────────────────────────────────────────
492
493/// Return the conventional name of a CSR address, or `"0xNNN"` if unknown.
494fn csr_name(csr: u32) -> String {
495    match csr {
496        0x300 => "mstatus".into(),
497        0x301 => "misa".into(),
498        0x304 => "mie".into(),
499        0x305 => "mtvec".into(),
500        0x340 => "mscratch".into(),
501        0x341 => "mepc".into(),
502        0x342 => "mcause".into(),
503        0x343 => "mtval".into(),
504        0x344 => "mip".into(),
505        0xc00 => "cycle".into(),
506        0xc01 => "time".into(),
507        0xc02 => "instret".into(),
508        0xf11 => "mvendorid".into(),
509        0xf12 => "marchid".into(),
510        0xf13 => "mimpid".into(),
511        0xf14 => "mhartid".into(),
512        _ => format!("0x{:03x}", csr),
513    }
514}
515
516// ── Static disassembly ───────────────────────────────────────────────────────
517
518/// Disassemble an entire ELF image, segment by segment, like `objdump -d`.
519///
520/// Prints to stdout in the following format:
521///
522/// ```text
523/// Disassembly of section .text:
524///
525/// 80000000 <_start>:
526///   80000000  02000117  auipc   sp, 0x2000
527/// ```
528///
529/// Segments shorter than 4 bytes are skipped.
530pub fn disasm_elf(_data: &[u8], image: &ElfImage, syms: Option<&SymbolTable>) {
531    // Use PT_LOAD segments as a proxy for code sections.
532    // A true per-section disassembly would require parsing section headers,
533    // but segments are sufficient for the typical use case.
534    for (seg_idx, seg) in image.segments.iter().enumerate() {
535        let base = seg.vaddr;
536        let data_slice = &seg.data;
537
538        // Skip segments too short to contain even one instruction.
539        if data_slice.len() < 4 {
540            continue;
541        }
542
543        // Section label — use a generic name; we parse segments, not sections.
544        let section_label = if seg_idx == 0 {
545            ".text"
546        } else {
547            &format!(".text.{}", seg_idx)
548        };
549        println!("Disassembly of section {}:\n", section_label);
550
551        let mut i = 0usize;
552        while i + 3 < data_slice.len() {
553            let pc = base + i as u32;
554
555            // Print a symbol label if this address is the start of a known symbol.
556            if let Some(sym) = syms.and_then(|t| t.lookup_addr(pc)) {
557                if sym.addr == pc {
558                    println!("{:08x} <{}>:", pc, sym.name);
559                }
560            }
561
562            let ir = u32::from_le_bytes([
563                data_slice[i],
564                data_slice[i + 1],
565                data_slice[i + 2],
566                data_slice[i + 3],
567            ]);
568
569            let mnem = disassemble(ir, pc, syms);
570            println!("  {:08x}  {:08x}  {}", pc, ir, mnem);
571
572            i += 4;
573        }
574        println!();
575    }
576}