Skip to main content

riscv_emulator/
emulator.rs

1//! The emulator core — CPU execution loop, image loading, and run control.
2//!
3//! [`Emulator`] is the central type. It owns the [`CpuState`], the RAM buffer,
4//! and optional debug state (symbol table, output buffer). Everything else —
5//! time, keyboard, sleep — is delegated to a [`Platform`] implementation.
6//!
7//! ## Execution flow
8//!
9//! ```text
10//! Emulator::run()
11//!   └─ loop {
12//!        tick_timer(elapsed_us)       // advance mtime, check MTIP
13//!        check WFI / pending IRQ
14//!        'iloop: for 0..count {
15//!            fetch instruction
16//!            decode opcode
17//!            execute → update regs / pc / trap
18//!        }
19//!        commit_trap() if any trap fired
20//!        update cycle counter
21//!      }
22//! ```
23//!
24//! ## Loading images
25//!
26//! | Method | Use case |
27//! |--------|---------|
28//! | [`Emulator::load_raw`] | Linux kernel raw image + DTB |
29//! | [`Emulator::load_raw_from_bytes`] | Same, but from an in-memory slice (WASM) |
30//! | [`Emulator::load_elf`] | Bare-metal ELF or FreeRTOS |
31//!
32//! ## ISA coverage
33//!
34//! | Extension | Instructions |
35//! |-----------|-------------|
36//! | RV32I | All base integer instructions |
37//! | RV32M | MUL, MULH, MULHSU, MULHU, DIV, DIVU, REM, REMU |
38//! | RV32A | LR.W, SC.W, AMOSWAP/ADD/XOR/AND/OR/MIN/MAX/MINU/MAXU.W |
39//! | Zicsr | CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI |
40//! | Privileged | MRET, WFI, ECALL, EBREAK |
41//! | FENCE | Treated as no-op (no cache model) |
42
43use crate::cpu::{CpuState, Csr, StepResult, Trap};
44use crate::elf::SymbolTable;
45use crate::mmio;
46use crate::platform::Platform;
47use crate::{dtb, elf};
48
49/// Physical base address of emulated RAM.
50///
51/// All kernel images and ELF segments are loaded at or above this address.
52pub const RAM_BASE: u32 = 0x8000_0000;
53
54/// A complete RV32IMA emulator instance.
55///
56/// Owns the CPU state, the RAM buffer, and optional debug/WASM state.
57/// Peripheral I/O (time, keyboard, sleep) is delegated to a [`Platform`].
58pub struct Emulator {
59    /// The CPU register file and CSR set.
60    pub cpu: CpuState,
61    /// Flat emulated RAM buffer. Indexed as `ram[addr - RAM_BASE]`.
62    pub ram: Vec<u8>,
63    /// Size of `ram` in bytes.
64    pub ram_size: u32,
65    /// When `true`, any fault halts immediately instead of invoking the trap
66    /// handler. Enabled by the `-d` CLI flag for debugging.
67    pub fail_on_all_faults: bool,
68    /// Symbol table from the loaded ELF, if available.
69    /// Populated by [`load_elf`] when the image is not stripped.
70    ///
71    /// [`load_elf`]: Emulator::load_elf
72    pub symbols: Option<SymbolTable>,
73    /// UART output buffer. In WASM mode, bytes written to the UART go here
74    /// instead of stdout. Drained by [`drain_output`] after each batch.
75    ///
76    /// [`drain_output`]: Emulator::drain_output
77    pub output_buf: Vec<u8>,
78    /// When `true`, UART bytes go to `output_buf` instead of stdout.
79    /// Set automatically by [`load_raw_from_bytes`].
80    ///
81    /// [`load_raw_from_bytes`]: Emulator::load_raw_from_bytes
82    pub wasm_mode: bool,
83}
84
85impl Emulator {
86    /// Create a new emulator with `ram_size` bytes of RAM, all zeroed.
87    pub fn new(ram_size: u32) -> Self {
88        Emulator {
89            cpu: CpuState::default(),
90            ram: vec![0u8; ram_size as usize],
91            ram_size,
92            fail_on_all_faults: false,
93            symbols: None,
94            output_buf: Vec::new(),
95            wasm_mode: false,
96        }
97    }
98
99    /// Drain and return the accumulated UART output buffer.
100    ///
101    /// After this call, `output_buf` is empty and ready for the next batch.
102    /// Only meaningful in WASM mode; always returns an empty vec otherwise.
103    pub fn drain_output(&mut self) -> Vec<u8> {
104        std::mem::take(&mut self.output_buf)
105    }
106
107    /// Load a raw kernel image from an in-memory byte slice.
108    ///
109    /// Equivalent to [`load_raw`] but without disk I/O. Used by the WASM
110    /// target where the kernel image is embedded with `include_bytes!`.
111    ///
112    /// [`load_raw`]: Emulator::load_raw
113    pub fn load_raw_from_bytes(
114        &mut self,
115        image_data: &[u8],
116        kernel_cmdline: Option<&str>,
117    ) -> Result<(), i32> {
118        if image_data.len() as u32 > self.ram_size {
119            return Err(-6);
120        }
121        self.ram.fill(0);
122        self.ram[..image_data.len()].copy_from_slice(image_data);
123
124        // Use the embedded default DTB.
125        let dtb_blob = crate::dtb::DEFAULT64MB_DTB;
126        let dtb_ptr = self.ram_size - dtb_blob.len() as u32;
127        self.ram[dtb_ptr as usize..dtb_ptr as usize + dtb_blob.len()].copy_from_slice(dtb_blob);
128
129        if let Some(cmdline) = kernel_cmdline {
130            const CMDLINE_OFFSET: u32 = 0xc0;
131            const CMDLINE_MAX_LEN: usize = 54;
132            let dst = &mut self.ram[dtb_ptr as usize + CMDLINE_OFFSET as usize
133                ..dtb_ptr as usize + CMDLINE_OFFSET as usize + CMDLINE_MAX_LEN];
134            let src = cmdline.as_bytes();
135            let n = src.len().min(CMDLINE_MAX_LEN - 1);
136            dst[..n].copy_from_slice(&src[..n]);
137            dst[n] = 0;
138        }
139
140        // patch_dtb_ram_size is defined as a local function below.
141        fn patch(ram: &mut [u8], dtb_ptr: u32) {
142            const OFFSET: u32 = 0x13c;
143            const SENTINEL: u32 = 0x00c0ff03;
144            if crate::mmio::mem_load4(ram, dtb_ptr + OFFSET) == SENTINEL {
145                crate::mmio::mem_store4(ram, dtb_ptr + OFFSET, dtb_ptr.to_be());
146            }
147        }
148        patch(&mut self.ram, dtb_ptr);
149
150        self.cpu = CpuState::default();
151        self.cpu.pc = RAM_BASE;
152        self.cpu.regs[10] = 0;
153        self.cpu.regs[11] = dtb_ptr + RAM_BASE;
154        self.cpu.set_privilege(3);
155        self.wasm_mode = true;
156        Ok(())
157    }
158}
159
160/// Parameters that control the [`Emulator::run`] loop.
161pub struct RunConfig {
162    /// Maximum number of instructions to execute. `-1` means unlimited.
163    pub instct: i64,
164    /// Time divisor applied to wall-clock time before feeding the CLINT timer.
165    /// `1` = real time. Values > 1 make the CPU appear slower, which is useful
166    /// for deterministic testing (`-l` locks it to the instruction counter).
167    pub time_divisor: u32,
168    /// When `true`, the cycle counter is used as the time base instead of
169    /// wall-clock time. Produces fully deterministic execution.
170    pub fixed_update: bool,
171    /// When `true`, the run loop calls [`Platform::mini_sleep`] during WFI
172    /// to avoid burning host CPU. Disable with `-p` for maximum throughput.
173    pub do_sleep: bool,
174    /// When `true`, execute one instruction per loop iteration and call
175    /// [`Emulator::dump_state`] after each one. Enabled by `-s`.
176    pub single_step: bool,
177    /// Optional writer for the compact execution trace (`--trace <FILE>`).
178    /// When `Some`, every instruction is logged with the registers it modified.
179    pub trace: Option<Box<dyn std::io::Write>>,
180}
181
182impl Default for RunConfig {
183    fn default() -> Self {
184        RunConfig {
185            instct: -1,
186            time_divisor: 1,
187            fixed_update: false,
188            do_sleep: true,
189            single_step: false,
190            trace: None,
191        }
192    }
193}
194
195// ── Immediate decoders ───────────────────────────────────────────────────────
196
197#[inline]
198fn decode_imm_i(ir: u32) -> i32 {
199    let imm = ir >> 20;
200    (imm | if imm & 0x800 != 0 { 0xffff_f000 } else { 0 }) as i32
201}
202#[inline]
203fn decode_imm_s(ir: u32) -> i32 {
204    let imm = ((ir >> 7) & 0x1f) | ((ir & 0xfe00_0000) >> 20);
205    (imm | if imm & 0x800 != 0 { 0xffff_f000 } else { 0 }) as i32
206}
207#[inline]
208fn decode_imm_b(ir: u32) -> i32 {
209    let imm =
210        ((ir & 0xf00) >> 7) | ((ir & 0x7e00_0000) >> 20) | ((ir & 0x80) << 4) | ((ir >> 31) << 12);
211    (imm | if imm & 0x1000 != 0 { 0xffffe000 } else { 0 }) as i32
212}
213#[inline]
214fn decode_imm_j(ir: u32) -> i32 {
215    let imm = ((ir & 0x8000_0000) >> 11)
216        | ((ir & 0x7fe0_0000) >> 20)
217        | ((ir & 0x0010_0000) >> 9)
218        | (ir & 0x000f_f000);
219    (imm | if imm & 0x0010_0000 != 0 {
220        0xffe0_0000
221    } else {
222        0
223    }) as i32
224}
225#[inline]
226fn decode_imm_u(ir: u32) -> i32 {
227    (ir & 0xffff_f000) as i32
228}
229
230// ── DTB helpers ──────────────────────────────────────────────────────────────
231
232/// Patch the RAM size field in the embedded DTB.
233///
234/// The default DTB contains the sentinel value `0x00c0ff03` at offset `0x13c`.
235/// This function replaces it with the actual DTB pointer (= RAM size − DTB
236/// size) in big-endian, which the kernel uses to determine how much memory
237/// is available.
238fn patch_dtb_ram_size(ram: &mut [u8], dtb_ptr: u32) {
239    const OFFSET: u32 = 0x13c;
240    const SENTINEL: u32 = 0x00c0ff03;
241    if mmio::mem_load4(ram, dtb_ptr + OFFSET) == SENTINEL {
242        mmio::mem_store4(ram, dtb_ptr + OFFSET, dtb_ptr.to_be());
243    }
244}
245
246impl Emulator {
247    /// Load a raw Linux kernel image and a Device Tree Blob into RAM.
248    ///
249    /// - `dtb_file = None` — use the embedded 64 MB DTB from [`crate::dtb`].
250    /// - `dtb_file = Some("disable")` — skip DTB entirely (`a1 = 0`).
251    /// - `dtb_file = Some(path)` — load DTB from the given file.
252    ///
253    /// On success the CPU is reset with:
254    /// - `pc = RAM_BASE` (kernel entry point)
255    /// - `a0 = 0` (hart ID)
256    /// - `a1 = dtb_ptr + RAM_BASE` (DTB physical address)
257    /// - privilege = M-mode
258    pub fn load_raw(
259        &mut self,
260        image_file: &str,
261        dtb_file: Option<&str>,
262        kernel_cmdline: Option<&str>,
263    ) -> Result<(), i32> {
264        let image_data = std::fs::read(image_file).map_err(|_| {
265            eprintln!("Error: \"{}\" not found", image_file);
266            -5i32
267        })?;
268
269        if image_data.len() as u32 > self.ram_size {
270            eprintln!(
271                "Error: image ({} bytes) does not fit in {} bytes of RAM",
272                image_data.len(),
273                self.ram_size
274            );
275            return Err(-6);
276        }
277
278        self.ram.fill(0);
279        self.ram[..image_data.len()].copy_from_slice(&image_data);
280
281        let dtb_ptr: u32 = match dtb_file {
282            Some("disable") => 0,
283
284            Some(path) => {
285                let dtb_data = std::fs::read(path).map_err(|_| {
286                    eprintln!("Error: \"{}\" not found", path);
287                    -5i32
288                })?;
289                let ptr = self.ram_size - dtb_data.len() as u32;
290                self.ram[ptr as usize..ptr as usize + dtb_data.len()].copy_from_slice(&dtb_data);
291                ptr
292            }
293
294            None => {
295                let blob = dtb::DEFAULT64MB_DTB;
296                let ptr = self.ram_size - blob.len() as u32;
297                self.ram[ptr as usize..ptr as usize + blob.len()].copy_from_slice(blob);
298
299                if let Some(cmdline) = kernel_cmdline {
300                    const CMDLINE_OFFSET: u32 = 0xc0;
301                    const CMDLINE_MAX_LEN: usize = 54;
302                    let dst = &mut self.ram[ptr as usize + CMDLINE_OFFSET as usize
303                        ..ptr as usize + CMDLINE_OFFSET as usize + CMDLINE_MAX_LEN];
304                    let src = cmdline.as_bytes();
305                    let n = src.len().min(CMDLINE_MAX_LEN - 1);
306                    dst[..n].copy_from_slice(&src[..n]);
307                    dst[n] = 0;
308                }
309
310                patch_dtb_ram_size(&mut self.ram, ptr);
311                ptr
312            }
313        };
314
315        // RV32 Linux boot convention:
316        //   a0 = hart ID (0)
317        //   a1 = physical address of the DTB
318        self.cpu = CpuState::default();
319        self.cpu.pc = RAM_BASE;
320        self.cpu.regs[10] = 0;
321        self.cpu.regs[11] = if dtb_ptr != 0 { dtb_ptr + RAM_BASE } else { 0 };
322        self.cpu.set_privilege(3);
323        Ok(())
324    }
325
326    /// Load an RV32 bare-metal ELF or FreeRTOS image.
327    ///
328    /// Only `PT_LOAD` segments with `vaddr >= RAM_BASE` are copied into RAM.
329    /// Bytes between `p_filesz` and `p_memsz` are zero-filled (BSS). The
330    /// symbol table is loaded automatically if the ELF is not stripped.
331    ///
332    /// On success the CPU is reset with:
333    /// - `pc = elf.entry`
334    /// - `sp = RAM_BASE + ram_size - 16` (top of RAM, 16-byte aligned per ABI)
335    /// - `a0 = a1 = 0` (no DTB — FreeRTOS does not use it)
336    /// - privilege = M-mode
337    pub fn load_elf(&mut self, elf_file: &str) -> Result<(), i32> {
338        let data = std::fs::read(elf_file).map_err(|_| {
339            eprintln!("Error: \"{}\" not found", elf_file);
340            -5i32
341        })?;
342
343        let image = elf::parse_elf(&data).map_err(|e| {
344            eprintln!("Error parsing ELF \"{}\": {}", elf_file, e);
345            -7i32
346        })?;
347
348        // Attempt to load the symbol table; silently skip if the ELF is stripped.
349        self.symbols = elf::parse_symbol_table(&data).unwrap_or(None);
350        if let Some(ref syms) = self.symbols {
351            eprintln!("Symbol table loaded: {} symbols", syms.len());
352        }
353
354        self.ram.fill(0);
355
356        for seg in &image.segments {
357            if (seg.vaddr as usize) < RAM_BASE as usize {
358                eprintln!(
359                    "Warning: ELF segment vaddr=0x{:08x} below RAM base — skipped",
360                    seg.vaddr
361                );
362                continue;
363            }
364
365            let ofs = (seg.vaddr as usize) - RAM_BASE as usize;
366            let end = ofs + seg.data.len();
367            let zend = ofs + seg.mem_size as usize;
368
369            if end > self.ram_size as usize {
370                eprintln!(
371                    "Error: ELF segment [0x{:08x}..+{}] overflows RAM ({} bytes)",
372                    seg.vaddr,
373                    seg.data.len(),
374                    self.ram_size
375                );
376                return Err(-8);
377            }
378
379            self.ram[ofs..end].copy_from_slice(&seg.data);
380
381            // Zero-fill the BSS region (mem_size > file_size).
382            if zend > end && zend <= self.ram_size as usize {
383                self.ram[end..zend].fill(0);
384            }
385        }
386
387        self.cpu = CpuState::default();
388        self.cpu.pc = image.entry;
389        self.cpu.regs[2] = (RAM_BASE + self.ram_size - 16) & !0xf; // sp
390        self.cpu.regs[10] = 0; // a0
391        self.cpu.regs[11] = 0; // a1
392        self.cpu.set_privilege(3);
393
394        eprintln!(
395            "ELF loaded: entry=0x{:08x} sp=0x{:08x}",
396            self.cpu.pc, self.cpu.regs[2]
397        );
398        Ok(())
399    }
400
401    /// Run the emulator until it stops or the instruction limit is reached.
402    ///
403    /// Calls [`step`] in a loop, advancing the time base between calls.
404    /// Returns the reason the loop exited as a [`StepResult`].
405    ///
406    /// [`step`]: Emulator::step
407    pub fn run(&mut self, cfg: &mut RunConfig, plat: &mut dyn Platform) -> StepResult {
408        // trace and single_step both require 1 instruction per flip to capture every step.
409        let instrs_per_flip: i32 = if cfg.single_step || cfg.trace.is_some() {
410            1
411        } else {
412            1024
413        };
414
415        let mut last_time: u64 = if cfg.fixed_update {
416            0
417        } else {
418            plat.get_time_microseconds() / cfg.time_divisor as u64
419        };
420
421        // Previous register state for single-step diff highlighting.
422        // Zeroed so the first instruction shows no changes.
423        let mut prev_regs: [u32; 32] = [0; 32];
424
425        let mut rt: u64 = 0;
426        loop {
427            if cfg.instct >= 0 && rt > cfg.instct as u64 {
428                break;
429            }
430
431            let cycle = self.cpu.get_cycle64();
432
433            let elapsed_us: u32 = if cfg.fixed_update {
434                (cycle / cfg.time_divisor as u64).wrapping_sub(last_time) as u32
435            } else {
436                (plat.get_time_microseconds() / cfg.time_divisor as u64).wrapping_sub(last_time)
437                    as u32
438            };
439            last_time = last_time.wrapping_add(elapsed_us as u64);
440
441            if cfg.single_step {
442                // Print state *before* this step, diff against previous step.
443                self.dump_state(Some(&prev_regs));
444                // Save current state for the next dump.
445                prev_regs = self.cpu.regs;
446            }
447
448            // Capture PC and regs before the step so the trace line shows
449            // what the instruction at trace_pc did.
450            let trace_pc = self.cpu.pc;
451            if cfg.trace.is_some() {
452                prev_regs = self.cpu.regs;
453            }
454
455            let step_result = self.step(elapsed_us, instrs_per_flip, plat);
456
457            // Write trace line using the pre-step PC and register snapshot.
458            if let Some(ref mut writer) = cfg.trace {
459                self.write_trace_line(writer, trace_pc, &prev_regs);
460            }
461
462            match step_result {
463                StepResult::Ok => {}
464                StepResult::Wfi => {
465                    if cfg.do_sleep {
466                        plat.mini_sleep();
467                    }
468                    self.cpu.set_cycle64(cycle + instrs_per_flip as u64);
469                }
470                other => return other,
471            }
472
473            rt += instrs_per_flip as u64;
474        }
475        StepResult::Ok
476    }
477
478    /// Execute up to `count` instructions, advancing the timer by `elapsed_us` µs.
479    ///
480    /// This is the hot path of the emulator. It:
481    /// 1. Calls [`CpuState::tick_timer`] to update `mtime` and check MTIP.
482    /// 2. Checks for a pending timer interrupt before the instruction loop.
483    /// 3. Fetches, decodes, and executes up to `count` instructions.
484    /// 4. Commits any trap that fired via [`CpuState::commit_trap`].
485    /// 5. Updates the cycle counter.
486    ///
487    /// Returns [`StepResult::Ok`] in the normal case. Returns
488    /// [`StepResult::Wfi`] if the CPU is sleeping. Returns
489    /// [`StepResult::Restart`] or [`StepResult::Poweroff`] if the kernel
490    /// wrote to SYSCON.
491    pub fn step(&mut self, elapsed_us: u32, count: i32, plat: &mut dyn Platform) -> StepResult {
492        let ram_size = self.ram_size;
493
494        self.cpu.tick_timer(elapsed_us);
495
496        // Early exit if the CPU is sleeping.
497        if self.cpu.get_wfi() {
498            return StepResult::Wfi;
499        }
500
501        let mut trap = Trap::None;
502        let mut rval: u32 = 0;
503        let mut pc = self.cpu.pc;
504        let cycle_start = self.cpu.cyclel;
505        let mut cycle_counter = cycle_start;
506
507        // ── 2. Check for a pending timer interrupt ──────────────────────────────
508        if (self.cpu.mip & (1 << 7)) != 0
509            && (self.cpu.mie & (1 << 7)) != 0
510            && (self.cpu.mstatus & 0x8) != 0
511        {
512            trap = Trap::IntTimer;
513            pc = pc.wrapping_sub(4);
514        } else {
515            // ── Instruction dispatch loop ────────────────────────────────
516            'iloop: for _ in 0..count {
517                rval = 0;
518                cycle_counter = cycle_counter.wrapping_add(1);
519
520                let ofs_pc = pc.wrapping_sub(RAM_BASE);
521                if ofs_pc >= ram_size {
522                    trap = Trap::ExcInsnAccessFault;
523                    break;
524                } else if ofs_pc & 3 != 0 {
525                    trap = Trap::ExcInsnMisaligned;
526                    break;
527                }
528
529                let ir = mmio::mem_load4(&self.ram, ofs_pc);
530                let mut rdid = (ir >> 7) & 0x1f;
531
532                match ir & 0x7f {
533                    // ── LUI ──────────────────────────────────────────────────
534                    0x37 => {
535                        rval = decode_imm_u(ir) as u32;
536                    }
537
538                    // ── AUIPC ────────────────────────────────────────────────
539                    0x17 => {
540                        rval = pc.wrapping_add(decode_imm_u(ir) as u32);
541                    }
542
543                    // ── JAL ──────────────────────────────────────────────────
544                    0x6f => {
545                        rval = pc.wrapping_add(4);
546                        pc = pc.wrapping_add(decode_imm_j(ir) as u32).wrapping_sub(4);
547                    }
548
549                    // ── JALR ─────────────────────────────────────────────────
550                    0x67 => {
551                        rval = pc.wrapping_add(4);
552                        pc = (self.cpu.regs[((ir >> 15) & 0x1f) as usize]
553                            .wrapping_add(decode_imm_i(ir) as u32)
554                            & !1)
555                            .wrapping_sub(4);
556                    }
557
558                    // ── Branches (BEQ BNE BLT BGE BLTU BGEU) ─────────────────
559                    0x63 => {
560                        let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize] as i32;
561                        let rs2 = self.cpu.regs[((ir >> 20) & 0x1f) as usize] as i32;
562                        let target = pc.wrapping_add(decode_imm_b(ir) as u32).wrapping_sub(4);
563                        rdid = 0;
564                        let taken = match (ir >> 12) & 0x7 {
565                            0 => rs1 == rs2,                   // beq
566                            1 => rs1 != rs2,                   // bne
567                            4 => rs1 < rs2,                    // blt
568                            5 => rs1 >= rs2,                   // bge
569                            6 => (rs1 as u32) < (rs2 as u32),  // bltu
570                            7 => (rs1 as u32) >= (rs2 as u32), // bgeu
571                            _ => {
572                                trap = Trap::ExcIllegalInsn;
573                                false
574                            }
575                        };
576                        if taken {
577                            pc = target;
578                        }
579                    }
580
581                    // ── Loads (LB LH LW LBU LHU) ─────────────────────────────
582                    0x03 => {
583                        let addr = self.cpu.regs[((ir >> 15) & 0x1f) as usize]
584                            .wrapping_add(decode_imm_i(ir) as u32);
585                        let ofs = addr.wrapping_sub(RAM_BASE);
586                        if ofs >= ram_size - 3 {
587                            if mmio::is_mmio(addr) {
588                                rval = mmio::handle_load(&self.cpu, addr, plat);
589                            } else {
590                                trap = Trap::ExcLoadAccessFault;
591                                rval = addr;
592                            }
593                        } else {
594                            rval = match (ir >> 12) & 0x7 {
595                                0 => mmio::mem_load1s(&self.ram, ofs), // lb
596                                1 => mmio::mem_load2s(&self.ram, ofs), // lh
597                                2 => mmio::mem_load4(&self.ram, ofs),  // lw
598                                4 => mmio::mem_load1(&self.ram, ofs),  // lbu
599                                5 => mmio::mem_load2(&self.ram, ofs),  // lhu
600                                _ => {
601                                    trap = Trap::ExcIllegalInsn;
602                                    0
603                                }
604                            };
605                        }
606                    }
607
608                    // ── Stores (SB SH SW) ─────────────────────────────────────
609                    0x23 => {
610                        let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize];
611                        let rs2 = self.cpu.regs[((ir >> 20) & 0x1f) as usize];
612                        let addr = rs1.wrapping_add(decode_imm_s(ir) as u32);
613                        let ofs = addr.wrapping_sub(RAM_BASE);
614                        rdid = 0;
615                        if ofs >= ram_size - 3 {
616                            if mmio::is_mmio(addr) {
617                                let buf = if self.wasm_mode {
618                                    Some(&mut self.output_buf)
619                                } else {
620                                    None
621                                };
622                                let sr = mmio::handle_store(&mut self.cpu, addr, rs2, buf);
623                                if sr != StepResult::Ok {
624                                    return sr;
625                                }
626                            } else {
627                                trap = Trap::ExcStoreAccessFault;
628                                rval = addr;
629                            }
630                        } else {
631                            match (ir >> 12) & 0x7 {
632                                0 => mmio::mem_store1(&mut self.ram, ofs, rs2), // sb
633                                1 => mmio::mem_store2(&mut self.ram, ofs, rs2), // sh
634                                2 => mmio::mem_store4(&mut self.ram, ofs, rs2), // sw
635                                _ => {
636                                    trap = Trap::ExcIllegalInsn;
637                                }
638                            }
639                        }
640                    }
641
642                    // ── OP-IMM and OP (RV32I + RV32M) ──────────────────────────
643                    0x13 | 0x33 => {
644                        let imm = decode_imm_i(ir);
645                        let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize];
646                        let is_reg = (ir & 0x20) != 0;
647                        let rs2: u32 = if is_reg {
648                            self.cpu.regs[(imm & 0x1f) as usize]
649                        } else {
650                            imm as u32
651                        };
652
653                        if is_reg && (ir & 0x0200_0000) != 0 {
654                            // RV32M
655                            rval = match (ir >> 12) & 0x7 {
656                                0 => rs1.wrapping_mul(rs2),                                      // mul
657                                1 => (((rs1 as i32 as i64) * (rs2 as i32 as i64)) >> 32) as u32, // mulh
658                                2 => (((rs1 as i32 as i64) * (rs2 as u64 as i64)) >> 32) as u32, // mulhsu
659                                3 => (((rs1 as u64) * (rs2 as u64)) >> 32) as u32, // mulhu
660                                4 => {
661                                    // div
662                                    if rs2 == 0 {
663                                        u32::MAX
664                                    } else if rs1 as i32 == i32::MIN && rs2 as i32 == -1 {
665                                        rs1
666                                    } else {
667                                        ((rs1 as i32) / (rs2 as i32)) as u32
668                                    }
669                                }
670                                5 => {
671                                    // divu
672                                    if rs2 == 0 {
673                                        u32::MAX
674                                    } else {
675                                        rs1.checked_div(rs2).unwrap()
676                                    }
677                                }
678                                6 => {
679                                    // rem
680                                    if rs2 == 0 {
681                                        rs1
682                                    } else if rs1 as i32 == i32::MIN && rs2 as i32 == -1 {
683                                        0
684                                    } else {
685                                        ((rs1 as i32) % (rs2 as i32)) as u32
686                                    }
687                                }
688                                7 => {
689                                    // remu
690                                    if rs2 == 0 {
691                                        rs1
692                                    } else {
693                                        rs1 % rs2
694                                    }
695                                }
696                                _ => 0,
697                            };
698                        } else {
699                            // RV32I
700                            rval = match (ir >> 12) & 0x7 {
701                                0 => {
702                                    if is_reg && (ir & 0x4000_0000) != 0 {
703                                        rs1.wrapping_sub(rs2) // SUB
704                                    } else {
705                                        rs1.wrapping_add(rs2) // ADD / ADDI
706                                    }
707                                }
708                                1 => rs1 << (rs2 & 0x1f), // SLL / SLLI
709                                2 => ((rs1 as i32) < (rs2 as i32)) as u32, // SLT / SLTI
710                                3 => (rs1 < rs2) as u32,  // SLTU / SLTIU
711                                4 => rs1 ^ rs2,           // XOR / XORI
712                                5 => {
713                                    if ir & 0x4000_0000 != 0 {
714                                        ((rs1 as i32) >> (rs2 & 0x1f)) as u32 // SRA / SRAI
715                                    } else {
716                                        rs1 >> (rs2 & 0x1f) // SRL / SRLI
717                                    }
718                                }
719                                6 => rs1 | rs2, // OR / ORI
720                                7 => rs1 & rs2, // AND / ANDI
721                                _ => 0,
722                            };
723                        }
724                    }
725
726                    // ── FENCE — no-op on this emulator (without real cache) ─────────
727                    0x0f => {
728                        rdid = 0;
729                    }
730
731                    // ── SYSTEM (Zicsr + privileged instructions) ─────────────
732                    0x73 => {
733                        let csrno = ir >> 20;
734                        let microop = (ir >> 12) & 0x7;
735
736                        if microop & 3 != 0 {
737                            // ── Zicsr ─────────────────────────────────────────
738                            let rs1imm = (ir >> 15) & 0x1f;
739                            let rs1 = self.cpu.regs[rs1imm as usize];
740
741                            let rval_csr = match csrno {
742                                x if x == Csr::Mscratch as u32 => self.cpu.mscratch,
743                                x if x == Csr::Mtvec as u32 => self.cpu.mtvec,
744                                x if x == Csr::Mie as u32 => self.cpu.mie,
745                                x if x == Csr::Cycle as u32 => cycle_counter,
746                                x if x == Csr::Mip as u32 => self.cpu.mip,
747                                x if x == Csr::Mepc as u32 => self.cpu.mepc,
748                                x if x == Csr::Mstatus as u32 => self.cpu.mstatus,
749                                x if x == Csr::Mcause as u32 => self.cpu.mcause,
750                                x if x == Csr::Mtval as u32 => self.cpu.mtval,
751                                x if x == Csr::Mvendorid as u32 => 0xff0f_f0ff,
752                                x if x == Csr::Misa as u32 => 0x4040_1101,
753                                _ => mmio::handle_csr_read(csrno, plat) as u32,
754                            };
755                            rval = rval_csr;
756
757                            let writeval = match microop {
758                                1 => rs1,
759                                2 => rval_csr | rs1,
760                                3 => rval_csr & !rs1,
761                                5 => rs1imm,
762                                6 => rval_csr | rs1imm,
763                                7 => rval_csr & !rs1imm,
764                                _ => rs1,
765                            };
766
767                            match csrno {
768                                x if x == Csr::Mscratch as u32 => {
769                                    self.cpu.mscratch = writeval;
770                                }
771                                x if x == Csr::Mtvec as u32 => {
772                                    self.cpu.mtvec = writeval;
773                                }
774                                x if x == Csr::Mie as u32 => {
775                                    self.cpu.mie = writeval;
776                                }
777                                x if x == Csr::Mip as u32 => {
778                                    self.cpu.mip = writeval;
779                                }
780                                x if x == Csr::Mepc as u32 => {
781                                    self.cpu.mepc = writeval;
782                                }
783                                x if x == Csr::Mstatus as u32 => {
784                                    self.cpu.mstatus = writeval;
785                                }
786                                x if x == Csr::Mcause as u32 => {
787                                    self.cpu.mcause = writeval;
788                                }
789                                x if x == Csr::Mtval as u32 => {
790                                    self.cpu.mtval = writeval;
791                                }
792                                _ => {
793                                    mmio::handle_csr_write(
794                                        &self.ram,
795                                        self.ram_size,
796                                        csrno,
797                                        writeval,
798                                    );
799                                }
800                            }
801                        } else if microop == 0 {
802                            // ── Privileged instructions ──────────────────────
803                            rdid = 0;
804                            if (csrno & 0xff) == 0x02 {
805                                // MRET — return from trap handler
806                                let ms = self.cpu.mstatus;
807                                let prev = (ms >> 11) & 3;
808                                let cur_priv = self.cpu.get_privilege();
809                                // MIE = MPIE, MPIE = 1, MPP = privilege atual
810                                self.cpu.mstatus = ((ms & 0x80) >> 4) | (cur_priv << 11) | 0x80;
811                                self.cpu.set_privilege(prev);
812                                pc = self.cpu.mepc.wrapping_sub(4);
813                            } else {
814                                match csrno {
815                                    0x000 => {
816                                        trap = if self.cpu.get_privilege() != 0 {
817                                            Trap::ExcEcallM
818                                        } else {
819                                            Trap::ExcEcallU
820                                        };
821                                    }
822                                    0x001 => {
823                                        trap = Trap::ExcBreakpoint;
824                                    }
825                                    0x105 => {
826                                        // WFI — suspend until next interrupt
827                                        self.cpu.mstatus |= 8; // enable MIE
828                                        self.cpu.set_wfi(true);
829                                        if self.cpu.cyclel > cycle_counter {
830                                            self.cpu.cycleh = self.cpu.cycleh.wrapping_add(1);
831                                        }
832                                        self.cpu.cyclel = cycle_counter;
833                                        self.cpu.pc = pc.wrapping_add(4);
834                                        return StepResult::Wfi;
835                                    }
836                                    _ => {
837                                        trap = Trap::ExcIllegalInsn;
838                                    }
839                                }
840                            }
841                        } else {
842                            trap = Trap::ExcIllegalInsn;
843                        }
844                    }
845
846                    // ── RV32A  ──────────────────────────────
847                    0x2f => {
848                        let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize];
849                        let mut rs2 = self.cpu.regs[((ir >> 20) & 0x1f) as usize];
850                        let irmid = (ir >> 27) & 0x1f;
851                        let ofs = rs1.wrapping_sub(RAM_BASE);
852
853                        if ofs >= ram_size - 3 {
854                            trap = Trap::ExcStoreAccessFault;
855                            rval = rs1;
856                        } else {
857                            rval = mmio::mem_load4(&self.ram, ofs);
858                            let mut dowrite = true;
859                            match irmid {
860                                2 => {
861                                    // LR.W
862                                    dowrite = false;
863                                    self.cpu.set_reservation(ofs);
864                                }
865                                3 => {
866                                    // SC.W
867                                    rval =
868                                        (self.cpu.get_reservation() != (ofs & 0x1fff_ffff)) as u32;
869                                    dowrite = rval == 0;
870                                }
871                                1 => {
872                                    // AMOSWAP.W
873                                }
874                                0 => {
875                                    // AMOADD.W
876                                    rs2 = rs2.wrapping_add(rval);
877                                }
878                                4 => {
879                                    // AMOXOR.W
880                                    rs2 ^= rval;
881                                }
882                                12 => {
883                                    // AMOAND.W
884                                    rs2 &= rval;
885                                }
886                                8 => {
887                                    // AMOOR.W
888                                    rs2 |= rval;
889                                }
890                                16 => {
891                                    // AMOMIN.W
892                                    rs2 = if (rs2 as i32) < (rval as i32) {
893                                        rs2
894                                    } else {
895                                        rval
896                                    };
897                                }
898                                20 => {
899                                    // AMOMAX.W
900                                    rs2 = if (rs2 as i32) > (rval as i32) {
901                                        rs2
902                                    } else {
903                                        rval
904                                    };
905                                }
906                                24 => {
907                                    // AMOMINU.W
908                                    rs2 = rs2.min(rval);
909                                }
910                                28 => {
911                                    // AMOMAXU.W
912                                    rs2 = rs2.max(rval);
913                                }
914                                _ => {
915                                    trap = Trap::ExcIllegalInsn;
916                                    dowrite = false;
917                                }
918                            }
919                            if dowrite {
920                                mmio::mem_store4(&mut self.ram, ofs, rs2);
921                            }
922                        }
923                    }
924
925                    _ => {
926                        trap = Trap::ExcIllegalInsn;
927                    }
928                } // end opcode dispatch
929
930                if !trap.is_none() {
931                    self.cpu.pc = pc;
932                    if self.fail_on_all_faults {
933                        eprintln!("FAULT");
934                        return StepResult::Fault;
935                    }
936                    break 'iloop;
937                }
938
939                if rdid != 0 {
940                    self.cpu.regs[rdid as usize] = rval;
941                }
942                pc = pc.wrapping_add(4);
943            } // end instruction loop
944        }
945
946        // ── 4. Commit trap ─────────────────────────────────────────────────────
947        if !trap.is_none() {
948            pc = self.cpu.commit_trap(trap, rval, pc);
949        }
950
951        if self.cpu.cyclel > cycle_counter {
952            self.cpu.cycleh = self.cpu.cycleh.wrapping_add(1);
953        }
954        self.cpu.cyclel = cycle_counter;
955        self.cpu.pc = pc;
956        StepResult::Ok
957    }
958
959    // ─────────────────────────────────────────────────────────────────────────
960    // Debug
961    // ─────────────────────────────────────────────────────────────────────────
962
963    /// Write one line of compact execution trace to `writer`.
964    ///
965    /// Format: `<pc>  <ir>  <mnem padded to 36 chars>  [reg=val ...]`
966    ///
967    /// Only registers that changed relative to `prev_regs` are listed.
968    /// Instructions with no register writes (branches, stores) produce a line
969    /// that ends after the mnemonic.
970    ///
971    /// ## Example output
972    ///
973    /// ```text
974    /// 80000014  510010ef  jal     ra, <main>                    ra=80000018
975    /// 80001524  ff010113  addi    sp, sp, -16                   sp=81fffff0
976    /// 80001528  00112623  sw      ra, 12(sp)
977    /// ```
978    pub fn write_trace_line(
979        &self,
980        writer: &mut dyn std::io::Write,
981        pc: u32,
982        prev_regs: &[u32; 32],
983    ) {
984        use crate::disasm;
985
986        const NAMES: [&str; 32] = [
987            "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3",
988            "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
989            "t3", "t4", "t5", "t6",
990        ];
991
992        let pc_ofs = pc.wrapping_sub(RAM_BASE);
993
994        // Disassemble the instruction at the captured PC.
995        let (ir, mnem) = if pc_ofs < self.ram_size - 3 {
996            let ir = mmio::mem_load4(&self.ram, pc_ofs);
997            let mnem = disasm::disassemble(ir, pc, self.symbols.as_ref());
998            (ir, mnem)
999        } else {
1000            (0, "[out of RAM]".to_string())
1001        };
1002
1003        // Registers that have changed
1004        let changes: Vec<String> = (0..32)
1005            .filter(|&i| self.cpu.regs[i] != prev_regs[i])
1006            .map(|i| format!("{}={:08x}", NAMES[i], self.cpu.regs[i]))
1007            .collect();
1008
1009        // Compact line: pc  ir  mnem  [changed registers...]
1010        // mnem padded to 36 characters to simplify fixed-column parsing
1011        let _ = writeln!(
1012            writer,
1013            "{:08x}  {:08x}  {:<36}{}",
1014            pc,
1015            ir,
1016            mnem,
1017            changes.join("  ")
1018        );
1019    }
1020
1021    /// Print the PC, the disassembled instruction, and the full register grid.
1022    ///
1023    /// `prev_regs` — register state *before* the last step.
1024    /// Registers that changed are prefixed with `*`.
1025    ///
1026    /// Format:
1027    /// ```text
1028    /// 80001234  510010ef  jal     ra, <main>
1029    ///   zero=00000000 *ra=80000018   sp=83fffff0  gp=00000000
1030    ///   ...
1031    /// ```
1032    pub fn dump_state(&self, prev_regs: Option<&[u32; 32]>) {
1033        use crate::disasm;
1034
1035        let pc = self.cpu.pc;
1036        let pc_ofs = pc.wrapping_sub(RAM_BASE);
1037
1038        // ── Instruction line ────────────────────────────────────────────────────
1039        if pc_ofs < self.ram_size - 3 {
1040            let ir = mmio::mem_load4(&self.ram, pc_ofs);
1041            let mnem = disasm::disassemble(ir, pc, self.symbols.as_ref());
1042            eprintln!("{:08x}  {:08x}  {}", pc, ir, mnem);
1043        } else {
1044            eprintln!("{:08x}  [out of RAM]", pc);
1045        }
1046
1047        // ── Register grid — 4 per row ──────────────────────────────────────────
1048        // ABI names in the same order as the classic mini-rv32ima dump.
1049        const NAMES: [&str; 32] = [
1050            "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3",
1051            "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
1052            "t3", "t4", "t5", "t6",
1053        ];
1054
1055        let r = &self.cpu.regs;
1056        let prev = prev_regs.unwrap_or(r); // sem prev → nada marcado
1057
1058        // Each cell has a fixed width: marker (1) + name (4) + "=" (1) + value (8) = 14
1059        // The longest name is "zero" (4 chars), so {:>4} right-aligns them all.
1060        // The marker is part of the label: "*ra " or " ra " — always 5 chars total.
1061        let mut line = String::with_capacity(80);
1062        for i in 0..32 {
1063            let changed = r[i] != prev[i];
1064            // Label = marker + name, left-aligned in a 5-character field:
1065            //   " zero", "  ra ", "  sp ", " *a3 " etc.
1066            let label = format!("{}{}", if changed { "*" } else { " " }, NAMES[i]);
1067            let cell = format!("{:>5}={:08x}", label, r[i]);
1068            line.push_str(&cell);
1069
1070            if (i + 1) % 4 == 0 {
1071                eprintln!("  {}", line.trim_end());
1072                line.clear();
1073            } else {
1074                line.push_str("  ");
1075            }
1076        }
1077    }
1078}