riscv_emulator/emulator.rs
1//! The emulator core — CPU execution loop, image loading, and run control.
2//!
3//! [`Emulator`] is the central type. It owns the [`CpuState`], the RAM buffer,
4//! and optional debug state (symbol table, output buffer). Everything else —
5//! time, keyboard, sleep — is delegated to a [`Platform`] implementation.
6//!
7//! ## Execution flow
8//!
9//! ```text
10//! Emulator::run()
11//! └─ loop {
12//! tick_timer(elapsed_us) // advance mtime, check MTIP
13//! check WFI / pending IRQ
14//! 'iloop: for 0..count {
15//! fetch instruction
16//! decode opcode
17//! execute → update regs / pc / trap
18//! }
19//! commit_trap() if any trap fired
20//! update cycle counter
21//! }
22//! ```
23//!
24//! ## Loading images
25//!
26//! | Method | Use case |
27//! |--------|---------|
28//! | [`Emulator::load_raw`] | Linux kernel raw image + DTB |
29//! | [`Emulator::load_raw_from_bytes`] | Same, but from an in-memory slice (WASM) |
30//! | [`Emulator::load_elf`] | Bare-metal ELF or FreeRTOS |
31//!
32//! ## ISA coverage
33//!
34//! | Extension | Instructions |
35//! |-----------|-------------|
36//! | RV32I | All base integer instructions |
37//! | RV32M | MUL, MULH, MULHSU, MULHU, DIV, DIVU, REM, REMU |
38//! | RV32A | LR.W, SC.W, AMOSWAP/ADD/XOR/AND/OR/MIN/MAX/MINU/MAXU.W |
39//! | Zicsr | CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI |
40//! | Privileged | MRET, WFI, ECALL, EBREAK |
41//! | FENCE | Treated as no-op (no cache model) |
42
43use crate::cpu::{CpuState, Csr, StepResult, Trap};
44use crate::elf::SymbolTable;
45use crate::mmio;
46use crate::platform::Platform;
47use crate::{dtb, elf};
48
49/// Physical base address of emulated RAM.
50///
51/// All kernel images and ELF segments are loaded at or above this address.
52pub const RAM_BASE: u32 = 0x8000_0000;
53
54/// A complete RV32IMA emulator instance.
55///
56/// Owns the CPU state, the RAM buffer, and optional debug/WASM state.
57/// Peripheral I/O (time, keyboard, sleep) is delegated to a [`Platform`].
58pub struct Emulator {
59 /// The CPU register file and CSR set.
60 pub cpu: CpuState,
61 /// Flat emulated RAM buffer. Indexed as `ram[addr - RAM_BASE]`.
62 pub ram: Vec<u8>,
63 /// Size of `ram` in bytes.
64 pub ram_size: u32,
65 /// When `true`, any fault halts immediately instead of invoking the trap
66 /// handler. Enabled by the `-d` CLI flag for debugging.
67 pub fail_on_all_faults: bool,
68 /// Symbol table from the loaded ELF, if available.
69 /// Populated by [`load_elf`] when the image is not stripped.
70 ///
71 /// [`load_elf`]: Emulator::load_elf
72 pub symbols: Option<SymbolTable>,
73 /// UART output buffer. In WASM mode, bytes written to the UART go here
74 /// instead of stdout. Drained by [`drain_output`] after each batch.
75 ///
76 /// [`drain_output`]: Emulator::drain_output
77 pub output_buf: Vec<u8>,
78 /// When `true`, UART bytes go to `output_buf` instead of stdout.
79 /// Set automatically by [`load_raw_from_bytes`].
80 ///
81 /// [`load_raw_from_bytes`]: Emulator::load_raw_from_bytes
82 pub wasm_mode: bool,
83}
84
85impl Emulator {
86 /// Create a new emulator with `ram_size` bytes of RAM, all zeroed.
87 pub fn new(ram_size: u32) -> Self {
88 Emulator {
89 cpu: CpuState::default(),
90 ram: vec![0u8; ram_size as usize],
91 ram_size,
92 fail_on_all_faults: false,
93 symbols: None,
94 output_buf: Vec::new(),
95 wasm_mode: false,
96 }
97 }
98
99 /// Drain and return the accumulated UART output buffer.
100 ///
101 /// After this call, `output_buf` is empty and ready for the next batch.
102 /// Only meaningful in WASM mode; always returns an empty vec otherwise.
103 pub fn drain_output(&mut self) -> Vec<u8> {
104 std::mem::take(&mut self.output_buf)
105 }
106
107 /// Load a raw kernel image from an in-memory byte slice.
108 ///
109 /// Equivalent to [`load_raw`] but without disk I/O. Used by the WASM
110 /// target where the kernel image is embedded with `include_bytes!`.
111 ///
112 /// [`load_raw`]: Emulator::load_raw
113 pub fn load_raw_from_bytes(
114 &mut self,
115 image_data: &[u8],
116 kernel_cmdline: Option<&str>,
117 ) -> Result<(), i32> {
118 if image_data.len() as u32 > self.ram_size {
119 return Err(-6);
120 }
121 self.ram.fill(0);
122 self.ram[..image_data.len()].copy_from_slice(image_data);
123
124 // Use the embedded default DTB.
125 let dtb_blob = crate::dtb::DEFAULT64MB_DTB;
126 let dtb_ptr = self.ram_size - dtb_blob.len() as u32;
127 self.ram[dtb_ptr as usize..dtb_ptr as usize + dtb_blob.len()].copy_from_slice(dtb_blob);
128
129 if let Some(cmdline) = kernel_cmdline {
130 const CMDLINE_OFFSET: u32 = 0xc0;
131 const CMDLINE_MAX_LEN: usize = 54;
132 let dst = &mut self.ram[dtb_ptr as usize + CMDLINE_OFFSET as usize
133 ..dtb_ptr as usize + CMDLINE_OFFSET as usize + CMDLINE_MAX_LEN];
134 let src = cmdline.as_bytes();
135 let n = src.len().min(CMDLINE_MAX_LEN - 1);
136 dst[..n].copy_from_slice(&src[..n]);
137 dst[n] = 0;
138 }
139
140 // patch_dtb_ram_size is defined as a local function below.
141 fn patch(ram: &mut [u8], dtb_ptr: u32) {
142 const OFFSET: u32 = 0x13c;
143 const SENTINEL: u32 = 0x00c0ff03;
144 if crate::mmio::mem_load4(ram, dtb_ptr + OFFSET) == SENTINEL {
145 crate::mmio::mem_store4(ram, dtb_ptr + OFFSET, dtb_ptr.to_be());
146 }
147 }
148 patch(&mut self.ram, dtb_ptr);
149
150 self.cpu = CpuState::default();
151 self.cpu.pc = RAM_BASE;
152 self.cpu.regs[10] = 0;
153 self.cpu.regs[11] = dtb_ptr + RAM_BASE;
154 self.cpu.set_privilege(3);
155 self.wasm_mode = true;
156 Ok(())
157 }
158}
159
160/// Parameters that control the [`Emulator::run`] loop.
161pub struct RunConfig {
162 /// Maximum number of instructions to execute. `-1` means unlimited.
163 pub instct: i64,
164 /// Time divisor applied to wall-clock time before feeding the CLINT timer.
165 /// `1` = real time. Values > 1 make the CPU appear slower, which is useful
166 /// for deterministic testing (`-l` locks it to the instruction counter).
167 pub time_divisor: u32,
168 /// When `true`, the cycle counter is used as the time base instead of
169 /// wall-clock time. Produces fully deterministic execution.
170 pub fixed_update: bool,
171 /// When `true`, the run loop calls [`Platform::mini_sleep`] during WFI
172 /// to avoid burning host CPU. Disable with `-p` for maximum throughput.
173 pub do_sleep: bool,
174 /// When `true`, execute one instruction per loop iteration and call
175 /// [`Emulator::dump_state`] after each one. Enabled by `-s`.
176 pub single_step: bool,
177 /// Optional writer for the compact execution trace (`--trace <FILE>`).
178 /// When `Some`, every instruction is logged with the registers it modified.
179 pub trace: Option<Box<dyn std::io::Write>>,
180}
181
182impl Default for RunConfig {
183 fn default() -> Self {
184 RunConfig {
185 instct: -1,
186 time_divisor: 1,
187 fixed_update: false,
188 do_sleep: true,
189 single_step: false,
190 trace: None,
191 }
192 }
193}
194
195// ── Immediate decoders ───────────────────────────────────────────────────────
196
197#[inline]
198fn decode_imm_i(ir: u32) -> i32 {
199 let imm = ir >> 20;
200 (imm | if imm & 0x800 != 0 { 0xffff_f000 } else { 0 }) as i32
201}
202#[inline]
203fn decode_imm_s(ir: u32) -> i32 {
204 let imm = ((ir >> 7) & 0x1f) | ((ir & 0xfe00_0000) >> 20);
205 (imm | if imm & 0x800 != 0 { 0xffff_f000 } else { 0 }) as i32
206}
207#[inline]
208fn decode_imm_b(ir: u32) -> i32 {
209 let imm =
210 ((ir & 0xf00) >> 7) | ((ir & 0x7e00_0000) >> 20) | ((ir & 0x80) << 4) | ((ir >> 31) << 12);
211 (imm | if imm & 0x1000 != 0 { 0xffffe000 } else { 0 }) as i32
212}
213#[inline]
214fn decode_imm_j(ir: u32) -> i32 {
215 let imm = ((ir & 0x8000_0000) >> 11)
216 | ((ir & 0x7fe0_0000) >> 20)
217 | ((ir & 0x0010_0000) >> 9)
218 | (ir & 0x000f_f000);
219 (imm | if imm & 0x0010_0000 != 0 {
220 0xffe0_0000
221 } else {
222 0
223 }) as i32
224}
225#[inline]
226fn decode_imm_u(ir: u32) -> i32 {
227 (ir & 0xffff_f000) as i32
228}
229
230// ── DTB helpers ──────────────────────────────────────────────────────────────
231
232/// Patch the RAM size field in the embedded DTB.
233///
234/// The default DTB contains the sentinel value `0x00c0ff03` at offset `0x13c`.
235/// This function replaces it with the actual DTB pointer (= RAM size − DTB
236/// size) in big-endian, which the kernel uses to determine how much memory
237/// is available.
238fn patch_dtb_ram_size(ram: &mut [u8], dtb_ptr: u32) {
239 const OFFSET: u32 = 0x13c;
240 const SENTINEL: u32 = 0x00c0ff03;
241 if mmio::mem_load4(ram, dtb_ptr + OFFSET) == SENTINEL {
242 mmio::mem_store4(ram, dtb_ptr + OFFSET, dtb_ptr.to_be());
243 }
244}
245
246impl Emulator {
247 /// Load a raw Linux kernel image and a Device Tree Blob into RAM.
248 ///
249 /// - `dtb_file = None` — use the embedded 64 MB DTB from [`crate::dtb`].
250 /// - `dtb_file = Some("disable")` — skip DTB entirely (`a1 = 0`).
251 /// - `dtb_file = Some(path)` — load DTB from the given file.
252 ///
253 /// On success the CPU is reset with:
254 /// - `pc = RAM_BASE` (kernel entry point)
255 /// - `a0 = 0` (hart ID)
256 /// - `a1 = dtb_ptr + RAM_BASE` (DTB physical address)
257 /// - privilege = M-mode
258 pub fn load_raw(
259 &mut self,
260 image_file: &str,
261 dtb_file: Option<&str>,
262 kernel_cmdline: Option<&str>,
263 ) -> Result<(), i32> {
264 let image_data = std::fs::read(image_file).map_err(|_| {
265 eprintln!("Error: \"{}\" not found", image_file);
266 -5i32
267 })?;
268
269 if image_data.len() as u32 > self.ram_size {
270 eprintln!(
271 "Error: image ({} bytes) does not fit in {} bytes of RAM",
272 image_data.len(),
273 self.ram_size
274 );
275 return Err(-6);
276 }
277
278 self.ram.fill(0);
279 self.ram[..image_data.len()].copy_from_slice(&image_data);
280
281 let dtb_ptr: u32 = match dtb_file {
282 Some("disable") => 0,
283
284 Some(path) => {
285 let dtb_data = std::fs::read(path).map_err(|_| {
286 eprintln!("Error: \"{}\" not found", path);
287 -5i32
288 })?;
289 let ptr = self.ram_size - dtb_data.len() as u32;
290 self.ram[ptr as usize..ptr as usize + dtb_data.len()].copy_from_slice(&dtb_data);
291 ptr
292 }
293
294 None => {
295 let blob = dtb::DEFAULT64MB_DTB;
296 let ptr = self.ram_size - blob.len() as u32;
297 self.ram[ptr as usize..ptr as usize + blob.len()].copy_from_slice(blob);
298
299 if let Some(cmdline) = kernel_cmdline {
300 const CMDLINE_OFFSET: u32 = 0xc0;
301 const CMDLINE_MAX_LEN: usize = 54;
302 let dst = &mut self.ram[ptr as usize + CMDLINE_OFFSET as usize
303 ..ptr as usize + CMDLINE_OFFSET as usize + CMDLINE_MAX_LEN];
304 let src = cmdline.as_bytes();
305 let n = src.len().min(CMDLINE_MAX_LEN - 1);
306 dst[..n].copy_from_slice(&src[..n]);
307 dst[n] = 0;
308 }
309
310 patch_dtb_ram_size(&mut self.ram, ptr);
311 ptr
312 }
313 };
314
315 // RV32 Linux boot convention:
316 // a0 = hart ID (0)
317 // a1 = physical address of the DTB
318 self.cpu = CpuState::default();
319 self.cpu.pc = RAM_BASE;
320 self.cpu.regs[10] = 0;
321 self.cpu.regs[11] = if dtb_ptr != 0 { dtb_ptr + RAM_BASE } else { 0 };
322 self.cpu.set_privilege(3);
323 Ok(())
324 }
325
326 /// Load an RV32 bare-metal ELF or FreeRTOS image.
327 ///
328 /// Only `PT_LOAD` segments with `vaddr >= RAM_BASE` are copied into RAM.
329 /// Bytes between `p_filesz` and `p_memsz` are zero-filled (BSS). The
330 /// symbol table is loaded automatically if the ELF is not stripped.
331 ///
332 /// On success the CPU is reset with:
333 /// - `pc = elf.entry`
334 /// - `sp = RAM_BASE + ram_size - 16` (top of RAM, 16-byte aligned per ABI)
335 /// - `a0 = a1 = 0` (no DTB — FreeRTOS does not use it)
336 /// - privilege = M-mode
337 pub fn load_elf(&mut self, elf_file: &str) -> Result<(), i32> {
338 let data = std::fs::read(elf_file).map_err(|_| {
339 eprintln!("Error: \"{}\" not found", elf_file);
340 -5i32
341 })?;
342
343 let image = elf::parse_elf(&data).map_err(|e| {
344 eprintln!("Error parsing ELF \"{}\": {}", elf_file, e);
345 -7i32
346 })?;
347
348 // Attempt to load the symbol table; silently skip if the ELF is stripped.
349 self.symbols = elf::parse_symbol_table(&data).unwrap_or(None);
350 if let Some(ref syms) = self.symbols {
351 eprintln!("Symbol table loaded: {} symbols", syms.len());
352 }
353
354 self.ram.fill(0);
355
356 for seg in &image.segments {
357 if (seg.vaddr as usize) < RAM_BASE as usize {
358 eprintln!(
359 "Warning: ELF segment vaddr=0x{:08x} below RAM base — skipped",
360 seg.vaddr
361 );
362 continue;
363 }
364
365 let ofs = (seg.vaddr as usize) - RAM_BASE as usize;
366 let end = ofs + seg.data.len();
367 let zend = ofs + seg.mem_size as usize;
368
369 if end > self.ram_size as usize {
370 eprintln!(
371 "Error: ELF segment [0x{:08x}..+{}] overflows RAM ({} bytes)",
372 seg.vaddr,
373 seg.data.len(),
374 self.ram_size
375 );
376 return Err(-8);
377 }
378
379 self.ram[ofs..end].copy_from_slice(&seg.data);
380
381 // Zero-fill the BSS region (mem_size > file_size).
382 if zend > end && zend <= self.ram_size as usize {
383 self.ram[end..zend].fill(0);
384 }
385 }
386
387 self.cpu = CpuState::default();
388 self.cpu.pc = image.entry;
389 self.cpu.regs[2] = (RAM_BASE + self.ram_size - 16) & !0xf; // sp
390 self.cpu.regs[10] = 0; // a0
391 self.cpu.regs[11] = 0; // a1
392 self.cpu.set_privilege(3);
393
394 eprintln!(
395 "ELF loaded: entry=0x{:08x} sp=0x{:08x}",
396 self.cpu.pc, self.cpu.regs[2]
397 );
398 Ok(())
399 }
400
401 /// Run the emulator until it stops or the instruction limit is reached.
402 ///
403 /// Calls [`step`] in a loop, advancing the time base between calls.
404 /// Returns the reason the loop exited as a [`StepResult`].
405 ///
406 /// [`step`]: Emulator::step
407 pub fn run(&mut self, cfg: &mut RunConfig, plat: &mut dyn Platform) -> StepResult {
408 // trace and single_step both require 1 instruction per flip to capture every step.
409 let instrs_per_flip: i32 = if cfg.single_step || cfg.trace.is_some() {
410 1
411 } else {
412 1024
413 };
414
415 let mut last_time: u64 = if cfg.fixed_update {
416 0
417 } else {
418 plat.get_time_microseconds() / cfg.time_divisor as u64
419 };
420
421 // Previous register state for single-step diff highlighting.
422 // Zeroed so the first instruction shows no changes.
423 let mut prev_regs: [u32; 32] = [0; 32];
424
425 let mut rt: u64 = 0;
426 loop {
427 if cfg.instct >= 0 && rt > cfg.instct as u64 {
428 break;
429 }
430
431 let cycle = self.cpu.get_cycle64();
432
433 let elapsed_us: u32 = if cfg.fixed_update {
434 (cycle / cfg.time_divisor as u64).wrapping_sub(last_time) as u32
435 } else {
436 (plat.get_time_microseconds() / cfg.time_divisor as u64).wrapping_sub(last_time)
437 as u32
438 };
439 last_time = last_time.wrapping_add(elapsed_us as u64);
440
441 if cfg.single_step {
442 // Print state *before* this step, diff against previous step.
443 self.dump_state(Some(&prev_regs));
444 // Save current state for the next dump.
445 prev_regs = self.cpu.regs;
446 }
447
448 // Capture PC and regs before the step so the trace line shows
449 // what the instruction at trace_pc did.
450 let trace_pc = self.cpu.pc;
451 if cfg.trace.is_some() {
452 prev_regs = self.cpu.regs;
453 }
454
455 let step_result = self.step(elapsed_us, instrs_per_flip, plat);
456
457 // Write trace line using the pre-step PC and register snapshot.
458 if let Some(ref mut writer) = cfg.trace {
459 self.write_trace_line(writer, trace_pc, &prev_regs);
460 }
461
462 match step_result {
463 StepResult::Ok => {}
464 StepResult::Wfi => {
465 if cfg.do_sleep {
466 plat.mini_sleep();
467 }
468 self.cpu.set_cycle64(cycle + instrs_per_flip as u64);
469 }
470 other => return other,
471 }
472
473 rt += instrs_per_flip as u64;
474 }
475 StepResult::Ok
476 }
477
478 /// Execute up to `count` instructions, advancing the timer by `elapsed_us` µs.
479 ///
480 /// This is the hot path of the emulator. It:
481 /// 1. Calls [`CpuState::tick_timer`] to update `mtime` and check MTIP.
482 /// 2. Checks for a pending timer interrupt before the instruction loop.
483 /// 3. Fetches, decodes, and executes up to `count` instructions.
484 /// 4. Commits any trap that fired via [`CpuState::commit_trap`].
485 /// 5. Updates the cycle counter.
486 ///
487 /// Returns [`StepResult::Ok`] in the normal case. Returns
488 /// [`StepResult::Wfi`] if the CPU is sleeping. Returns
489 /// [`StepResult::Restart`] or [`StepResult::Poweroff`] if the kernel
490 /// wrote to SYSCON.
491 pub fn step(&mut self, elapsed_us: u32, count: i32, plat: &mut dyn Platform) -> StepResult {
492 let ram_size = self.ram_size;
493
494 self.cpu.tick_timer(elapsed_us);
495
496 // Early exit if the CPU is sleeping.
497 if self.cpu.get_wfi() {
498 return StepResult::Wfi;
499 }
500
501 let mut trap = Trap::None;
502 let mut rval: u32 = 0;
503 let mut pc = self.cpu.pc;
504 let cycle_start = self.cpu.cyclel;
505 let mut cycle_counter = cycle_start;
506
507 // ── 2. Check for a pending timer interrupt ──────────────────────────────
508 if (self.cpu.mip & (1 << 7)) != 0
509 && (self.cpu.mie & (1 << 7)) != 0
510 && (self.cpu.mstatus & 0x8) != 0
511 {
512 trap = Trap::IntTimer;
513 pc = pc.wrapping_sub(4);
514 } else {
515 // ── Instruction dispatch loop ────────────────────────────────
516 'iloop: for _ in 0..count {
517 rval = 0;
518 cycle_counter = cycle_counter.wrapping_add(1);
519
520 let ofs_pc = pc.wrapping_sub(RAM_BASE);
521 if ofs_pc >= ram_size {
522 trap = Trap::ExcInsnAccessFault;
523 break;
524 } else if ofs_pc & 3 != 0 {
525 trap = Trap::ExcInsnMisaligned;
526 break;
527 }
528
529 let ir = mmio::mem_load4(&self.ram, ofs_pc);
530 let mut rdid = (ir >> 7) & 0x1f;
531
532 match ir & 0x7f {
533 // ── LUI ──────────────────────────────────────────────────
534 0x37 => {
535 rval = decode_imm_u(ir) as u32;
536 }
537
538 // ── AUIPC ────────────────────────────────────────────────
539 0x17 => {
540 rval = pc.wrapping_add(decode_imm_u(ir) as u32);
541 }
542
543 // ── JAL ──────────────────────────────────────────────────
544 0x6f => {
545 rval = pc.wrapping_add(4);
546 pc = pc.wrapping_add(decode_imm_j(ir) as u32).wrapping_sub(4);
547 }
548
549 // ── JALR ─────────────────────────────────────────────────
550 0x67 => {
551 rval = pc.wrapping_add(4);
552 pc = (self.cpu.regs[((ir >> 15) & 0x1f) as usize]
553 .wrapping_add(decode_imm_i(ir) as u32)
554 & !1)
555 .wrapping_sub(4);
556 }
557
558 // ── Branches (BEQ BNE BLT BGE BLTU BGEU) ─────────────────
559 0x63 => {
560 let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize] as i32;
561 let rs2 = self.cpu.regs[((ir >> 20) & 0x1f) as usize] as i32;
562 let target = pc.wrapping_add(decode_imm_b(ir) as u32).wrapping_sub(4);
563 rdid = 0;
564 let taken = match (ir >> 12) & 0x7 {
565 0 => rs1 == rs2, // beq
566 1 => rs1 != rs2, // bne
567 4 => rs1 < rs2, // blt
568 5 => rs1 >= rs2, // bge
569 6 => (rs1 as u32) < (rs2 as u32), // bltu
570 7 => (rs1 as u32) >= (rs2 as u32), // bgeu
571 _ => {
572 trap = Trap::ExcIllegalInsn;
573 false
574 }
575 };
576 if taken {
577 pc = target;
578 }
579 }
580
581 // ── Loads (LB LH LW LBU LHU) ─────────────────────────────
582 0x03 => {
583 let addr = self.cpu.regs[((ir >> 15) & 0x1f) as usize]
584 .wrapping_add(decode_imm_i(ir) as u32);
585 let ofs = addr.wrapping_sub(RAM_BASE);
586 if ofs >= ram_size - 3 {
587 if mmio::is_mmio(addr) {
588 rval = mmio::handle_load(&self.cpu, addr, plat);
589 } else {
590 trap = Trap::ExcLoadAccessFault;
591 rval = addr;
592 }
593 } else {
594 rval = match (ir >> 12) & 0x7 {
595 0 => mmio::mem_load1s(&self.ram, ofs), // lb
596 1 => mmio::mem_load2s(&self.ram, ofs), // lh
597 2 => mmio::mem_load4(&self.ram, ofs), // lw
598 4 => mmio::mem_load1(&self.ram, ofs), // lbu
599 5 => mmio::mem_load2(&self.ram, ofs), // lhu
600 _ => {
601 trap = Trap::ExcIllegalInsn;
602 0
603 }
604 };
605 }
606 }
607
608 // ── Stores (SB SH SW) ─────────────────────────────────────
609 0x23 => {
610 let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize];
611 let rs2 = self.cpu.regs[((ir >> 20) & 0x1f) as usize];
612 let addr = rs1.wrapping_add(decode_imm_s(ir) as u32);
613 let ofs = addr.wrapping_sub(RAM_BASE);
614 rdid = 0;
615 if ofs >= ram_size - 3 {
616 if mmio::is_mmio(addr) {
617 let buf = if self.wasm_mode {
618 Some(&mut self.output_buf)
619 } else {
620 None
621 };
622 let sr = mmio::handle_store(&mut self.cpu, addr, rs2, buf);
623 if sr != StepResult::Ok {
624 return sr;
625 }
626 } else {
627 trap = Trap::ExcStoreAccessFault;
628 rval = addr;
629 }
630 } else {
631 match (ir >> 12) & 0x7 {
632 0 => mmio::mem_store1(&mut self.ram, ofs, rs2), // sb
633 1 => mmio::mem_store2(&mut self.ram, ofs, rs2), // sh
634 2 => mmio::mem_store4(&mut self.ram, ofs, rs2), // sw
635 _ => {
636 trap = Trap::ExcIllegalInsn;
637 }
638 }
639 }
640 }
641
642 // ── OP-IMM and OP (RV32I + RV32M) ──────────────────────────
643 0x13 | 0x33 => {
644 let imm = decode_imm_i(ir);
645 let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize];
646 let is_reg = (ir & 0x20) != 0;
647 let rs2: u32 = if is_reg {
648 self.cpu.regs[(imm & 0x1f) as usize]
649 } else {
650 imm as u32
651 };
652
653 if is_reg && (ir & 0x0200_0000) != 0 {
654 // RV32M
655 rval = match (ir >> 12) & 0x7 {
656 0 => rs1.wrapping_mul(rs2), // mul
657 1 => (((rs1 as i32 as i64) * (rs2 as i32 as i64)) >> 32) as u32, // mulh
658 2 => (((rs1 as i32 as i64) * (rs2 as u64 as i64)) >> 32) as u32, // mulhsu
659 3 => (((rs1 as u64) * (rs2 as u64)) >> 32) as u32, // mulhu
660 4 => {
661 // div
662 if rs2 == 0 {
663 u32::MAX
664 } else if rs1 as i32 == i32::MIN && rs2 as i32 == -1 {
665 rs1
666 } else {
667 ((rs1 as i32) / (rs2 as i32)) as u32
668 }
669 }
670 5 => {
671 // divu
672 if rs2 == 0 {
673 u32::MAX
674 } else {
675 rs1.checked_div(rs2).unwrap()
676 }
677 }
678 6 => {
679 // rem
680 if rs2 == 0 {
681 rs1
682 } else if rs1 as i32 == i32::MIN && rs2 as i32 == -1 {
683 0
684 } else {
685 ((rs1 as i32) % (rs2 as i32)) as u32
686 }
687 }
688 7 => {
689 // remu
690 if rs2 == 0 {
691 rs1
692 } else {
693 rs1 % rs2
694 }
695 }
696 _ => 0,
697 };
698 } else {
699 // RV32I
700 rval = match (ir >> 12) & 0x7 {
701 0 => {
702 if is_reg && (ir & 0x4000_0000) != 0 {
703 rs1.wrapping_sub(rs2) // SUB
704 } else {
705 rs1.wrapping_add(rs2) // ADD / ADDI
706 }
707 }
708 1 => rs1 << (rs2 & 0x1f), // SLL / SLLI
709 2 => ((rs1 as i32) < (rs2 as i32)) as u32, // SLT / SLTI
710 3 => (rs1 < rs2) as u32, // SLTU / SLTIU
711 4 => rs1 ^ rs2, // XOR / XORI
712 5 => {
713 if ir & 0x4000_0000 != 0 {
714 ((rs1 as i32) >> (rs2 & 0x1f)) as u32 // SRA / SRAI
715 } else {
716 rs1 >> (rs2 & 0x1f) // SRL / SRLI
717 }
718 }
719 6 => rs1 | rs2, // OR / ORI
720 7 => rs1 & rs2, // AND / ANDI
721 _ => 0,
722 };
723 }
724 }
725
726 // ── FENCE — no-op on this emulator (without real cache) ─────────
727 0x0f => {
728 rdid = 0;
729 }
730
731 // ── SYSTEM (Zicsr + privileged instructions) ─────────────
732 0x73 => {
733 let csrno = ir >> 20;
734 let microop = (ir >> 12) & 0x7;
735
736 if microop & 3 != 0 {
737 // ── Zicsr ─────────────────────────────────────────
738 let rs1imm = (ir >> 15) & 0x1f;
739 let rs1 = self.cpu.regs[rs1imm as usize];
740
741 let rval_csr = match csrno {
742 x if x == Csr::Mscratch as u32 => self.cpu.mscratch,
743 x if x == Csr::Mtvec as u32 => self.cpu.mtvec,
744 x if x == Csr::Mie as u32 => self.cpu.mie,
745 x if x == Csr::Cycle as u32 => cycle_counter,
746 x if x == Csr::Mip as u32 => self.cpu.mip,
747 x if x == Csr::Mepc as u32 => self.cpu.mepc,
748 x if x == Csr::Mstatus as u32 => self.cpu.mstatus,
749 x if x == Csr::Mcause as u32 => self.cpu.mcause,
750 x if x == Csr::Mtval as u32 => self.cpu.mtval,
751 x if x == Csr::Mvendorid as u32 => 0xff0f_f0ff,
752 x if x == Csr::Misa as u32 => 0x4040_1101,
753 _ => mmio::handle_csr_read(csrno, plat) as u32,
754 };
755 rval = rval_csr;
756
757 let writeval = match microop {
758 1 => rs1,
759 2 => rval_csr | rs1,
760 3 => rval_csr & !rs1,
761 5 => rs1imm,
762 6 => rval_csr | rs1imm,
763 7 => rval_csr & !rs1imm,
764 _ => rs1,
765 };
766
767 match csrno {
768 x if x == Csr::Mscratch as u32 => {
769 self.cpu.mscratch = writeval;
770 }
771 x if x == Csr::Mtvec as u32 => {
772 self.cpu.mtvec = writeval;
773 }
774 x if x == Csr::Mie as u32 => {
775 self.cpu.mie = writeval;
776 }
777 x if x == Csr::Mip as u32 => {
778 self.cpu.mip = writeval;
779 }
780 x if x == Csr::Mepc as u32 => {
781 self.cpu.mepc = writeval;
782 }
783 x if x == Csr::Mstatus as u32 => {
784 self.cpu.mstatus = writeval;
785 }
786 x if x == Csr::Mcause as u32 => {
787 self.cpu.mcause = writeval;
788 }
789 x if x == Csr::Mtval as u32 => {
790 self.cpu.mtval = writeval;
791 }
792 _ => {
793 mmio::handle_csr_write(
794 &self.ram,
795 self.ram_size,
796 csrno,
797 writeval,
798 );
799 }
800 }
801 } else if microop == 0 {
802 // ── Privileged instructions ──────────────────────
803 rdid = 0;
804 if (csrno & 0xff) == 0x02 {
805 // MRET — return from trap handler
806 let ms = self.cpu.mstatus;
807 let prev = (ms >> 11) & 3;
808 let cur_priv = self.cpu.get_privilege();
809 // MIE = MPIE, MPIE = 1, MPP = privilege atual
810 self.cpu.mstatus = ((ms & 0x80) >> 4) | (cur_priv << 11) | 0x80;
811 self.cpu.set_privilege(prev);
812 pc = self.cpu.mepc.wrapping_sub(4);
813 } else {
814 match csrno {
815 0x000 => {
816 trap = if self.cpu.get_privilege() != 0 {
817 Trap::ExcEcallM
818 } else {
819 Trap::ExcEcallU
820 };
821 }
822 0x001 => {
823 trap = Trap::ExcBreakpoint;
824 }
825 0x105 => {
826 // WFI — suspend until next interrupt
827 self.cpu.mstatus |= 8; // enable MIE
828 self.cpu.set_wfi(true);
829 if self.cpu.cyclel > cycle_counter {
830 self.cpu.cycleh = self.cpu.cycleh.wrapping_add(1);
831 }
832 self.cpu.cyclel = cycle_counter;
833 self.cpu.pc = pc.wrapping_add(4);
834 return StepResult::Wfi;
835 }
836 _ => {
837 trap = Trap::ExcIllegalInsn;
838 }
839 }
840 }
841 } else {
842 trap = Trap::ExcIllegalInsn;
843 }
844 }
845
846 // ── RV32A ──────────────────────────────
847 0x2f => {
848 let rs1 = self.cpu.regs[((ir >> 15) & 0x1f) as usize];
849 let mut rs2 = self.cpu.regs[((ir >> 20) & 0x1f) as usize];
850 let irmid = (ir >> 27) & 0x1f;
851 let ofs = rs1.wrapping_sub(RAM_BASE);
852
853 if ofs >= ram_size - 3 {
854 trap = Trap::ExcStoreAccessFault;
855 rval = rs1;
856 } else {
857 rval = mmio::mem_load4(&self.ram, ofs);
858 let mut dowrite = true;
859 match irmid {
860 2 => {
861 // LR.W
862 dowrite = false;
863 self.cpu.set_reservation(ofs);
864 }
865 3 => {
866 // SC.W
867 rval =
868 (self.cpu.get_reservation() != (ofs & 0x1fff_ffff)) as u32;
869 dowrite = rval == 0;
870 }
871 1 => {
872 // AMOSWAP.W
873 }
874 0 => {
875 // AMOADD.W
876 rs2 = rs2.wrapping_add(rval);
877 }
878 4 => {
879 // AMOXOR.W
880 rs2 ^= rval;
881 }
882 12 => {
883 // AMOAND.W
884 rs2 &= rval;
885 }
886 8 => {
887 // AMOOR.W
888 rs2 |= rval;
889 }
890 16 => {
891 // AMOMIN.W
892 rs2 = if (rs2 as i32) < (rval as i32) {
893 rs2
894 } else {
895 rval
896 };
897 }
898 20 => {
899 // AMOMAX.W
900 rs2 = if (rs2 as i32) > (rval as i32) {
901 rs2
902 } else {
903 rval
904 };
905 }
906 24 => {
907 // AMOMINU.W
908 rs2 = rs2.min(rval);
909 }
910 28 => {
911 // AMOMAXU.W
912 rs2 = rs2.max(rval);
913 }
914 _ => {
915 trap = Trap::ExcIllegalInsn;
916 dowrite = false;
917 }
918 }
919 if dowrite {
920 mmio::mem_store4(&mut self.ram, ofs, rs2);
921 }
922 }
923 }
924
925 _ => {
926 trap = Trap::ExcIllegalInsn;
927 }
928 } // end opcode dispatch
929
930 if !trap.is_none() {
931 self.cpu.pc = pc;
932 if self.fail_on_all_faults {
933 eprintln!("FAULT");
934 return StepResult::Fault;
935 }
936 break 'iloop;
937 }
938
939 if rdid != 0 {
940 self.cpu.regs[rdid as usize] = rval;
941 }
942 pc = pc.wrapping_add(4);
943 } // end instruction loop
944 }
945
946 // ── 4. Commit trap ─────────────────────────────────────────────────────
947 if !trap.is_none() {
948 pc = self.cpu.commit_trap(trap, rval, pc);
949 }
950
951 if self.cpu.cyclel > cycle_counter {
952 self.cpu.cycleh = self.cpu.cycleh.wrapping_add(1);
953 }
954 self.cpu.cyclel = cycle_counter;
955 self.cpu.pc = pc;
956 StepResult::Ok
957 }
958
959 // ─────────────────────────────────────────────────────────────────────────
960 // Debug
961 // ─────────────────────────────────────────────────────────────────────────
962
963 /// Write one line of compact execution trace to `writer`.
964 ///
965 /// Format: `<pc> <ir> <mnem padded to 36 chars> [reg=val ...]`
966 ///
967 /// Only registers that changed relative to `prev_regs` are listed.
968 /// Instructions with no register writes (branches, stores) produce a line
969 /// that ends after the mnemonic.
970 ///
971 /// ## Example output
972 ///
973 /// ```text
974 /// 80000014 510010ef jal ra, <main> ra=80000018
975 /// 80001524 ff010113 addi sp, sp, -16 sp=81fffff0
976 /// 80001528 00112623 sw ra, 12(sp)
977 /// ```
978 pub fn write_trace_line(
979 &self,
980 writer: &mut dyn std::io::Write,
981 pc: u32,
982 prev_regs: &[u32; 32],
983 ) {
984 use crate::disasm;
985
986 const NAMES: [&str; 32] = [
987 "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3",
988 "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
989 "t3", "t4", "t5", "t6",
990 ];
991
992 let pc_ofs = pc.wrapping_sub(RAM_BASE);
993
994 // Disassemble the instruction at the captured PC.
995 let (ir, mnem) = if pc_ofs < self.ram_size - 3 {
996 let ir = mmio::mem_load4(&self.ram, pc_ofs);
997 let mnem = disasm::disassemble(ir, pc, self.symbols.as_ref());
998 (ir, mnem)
999 } else {
1000 (0, "[out of RAM]".to_string())
1001 };
1002
1003 // Registers that have changed
1004 let changes: Vec<String> = (0..32)
1005 .filter(|&i| self.cpu.regs[i] != prev_regs[i])
1006 .map(|i| format!("{}={:08x}", NAMES[i], self.cpu.regs[i]))
1007 .collect();
1008
1009 // Compact line: pc ir mnem [changed registers...]
1010 // mnem padded to 36 characters to simplify fixed-column parsing
1011 let _ = writeln!(
1012 writer,
1013 "{:08x} {:08x} {:<36}{}",
1014 pc,
1015 ir,
1016 mnem,
1017 changes.join(" ")
1018 );
1019 }
1020
1021 /// Print the PC, the disassembled instruction, and the full register grid.
1022 ///
1023 /// `prev_regs` — register state *before* the last step.
1024 /// Registers that changed are prefixed with `*`.
1025 ///
1026 /// Format:
1027 /// ```text
1028 /// 80001234 510010ef jal ra, <main>
1029 /// zero=00000000 *ra=80000018 sp=83fffff0 gp=00000000
1030 /// ...
1031 /// ```
1032 pub fn dump_state(&self, prev_regs: Option<&[u32; 32]>) {
1033 use crate::disasm;
1034
1035 let pc = self.cpu.pc;
1036 let pc_ofs = pc.wrapping_sub(RAM_BASE);
1037
1038 // ── Instruction line ────────────────────────────────────────────────────
1039 if pc_ofs < self.ram_size - 3 {
1040 let ir = mmio::mem_load4(&self.ram, pc_ofs);
1041 let mnem = disasm::disassemble(ir, pc, self.symbols.as_ref());
1042 eprintln!("{:08x} {:08x} {}", pc, ir, mnem);
1043 } else {
1044 eprintln!("{:08x} [out of RAM]", pc);
1045 }
1046
1047 // ── Register grid — 4 per row ──────────────────────────────────────────
1048 // ABI names in the same order as the classic mini-rv32ima dump.
1049 const NAMES: [&str; 32] = [
1050 "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3",
1051 "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
1052 "t3", "t4", "t5", "t6",
1053 ];
1054
1055 let r = &self.cpu.regs;
1056 let prev = prev_regs.unwrap_or(r); // sem prev → nada marcado
1057
1058 // Each cell has a fixed width: marker (1) + name (4) + "=" (1) + value (8) = 14
1059 // The longest name is "zero" (4 chars), so {:>4} right-aligns them all.
1060 // The marker is part of the label: "*ra " or " ra " — always 5 chars total.
1061 let mut line = String::with_capacity(80);
1062 for i in 0..32 {
1063 let changed = r[i] != prev[i];
1064 // Label = marker + name, left-aligned in a 5-character field:
1065 // " zero", " ra ", " sp ", " *a3 " etc.
1066 let label = format!("{}{}", if changed { "*" } else { " " }, NAMES[i]);
1067 let cell = format!("{:>5}={:08x}", label, r[i]);
1068 line.push_str(&cell);
1069
1070 if (i + 1) % 4 == 0 {
1071 eprintln!(" {}", line.trim_end());
1072 line.clear();
1073 } else {
1074 line.push_str(" ");
1075 }
1076 }
1077 }
1078}