diff --git a/crates/rue-codegen/src/aarch64/cfg_lower.rs b/crates/rue-codegen/src/aarch64/cfg_lower.rs index df598b86c..dfe99e31c 100644 --- a/crates/rue-codegen/src/aarch64/cfg_lower.rs +++ b/crates/rue-codegen/src/aarch64/cfg_lower.rs @@ -96,6 +96,13 @@ impl<'a> CfgLower<'a> { } } + /// Intern a symbol name and return its ID. + /// + /// Convenience method that delegates to the MIR's symbol table. + fn intern_symbol(&mut self, symbol: &str) -> u32 { + self.mir.intern_symbol(symbol) + } + /// Get the length of an array type. fn array_length(&self, array_type_id: ArrayTypeId) -> u64 { debug_assert!( @@ -302,9 +309,8 @@ impl<'a> CfgLower<'a> { }); // Call the bounds check error handler (never returns) - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_bounds_check".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_bounds_check"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); // Continue with valid access self.mir.push(Aarch64Inst::Label { id: ok_label }); @@ -843,9 +849,8 @@ impl<'a> CfgLower<'a> { src: Operand::Virtual(rhs_vreg), label: ok_label, }); - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_div_by_zero".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_div_by_zero"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); self.mir.push(Aarch64Inst::SdivRR { @@ -868,9 +873,8 @@ impl<'a> CfgLower<'a> { src: Operand::Virtual(rhs_vreg), label: ok_label, }); - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_div_by_zero".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_div_by_zero"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); // Compute quotient first @@ -1698,9 +1702,9 @@ impl<'a> CfgLower<'a> { } // Call the function - the linker will add the underscore prefix for macOS - self.mir.push(Aarch64Inst::Bl { - symbol: self.interner.get(*name).to_string(), - }); + let symbol_name = self.interner.get(*name); + let symbol_id = self.intern_symbol(symbol_name); + self.mir.push(Aarch64Inst::Bl { symbol_id }); // Clean up stack space after call if stack_space > 0 { @@ -1790,9 +1794,8 @@ impl<'a> CfgLower<'a> { }); // Call __rue_dbg_str - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_dbg_str".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_dbg_str"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); } else { unreachable!("String fat pointer not found in struct_slot_vregs"); } @@ -1871,9 +1874,8 @@ impl<'a> CfgLower<'a> { _ => unreachable!(), } - self.mir.push(Aarch64Inst::Bl { - symbol: runtime_fn.to_string(), - }); + let symbol_id = self.intern_symbol(runtime_fn); + self.mir.push(Aarch64Inst::Bl { symbol_id }); let result_vreg = self.mir.alloc_vreg(); self.value_map.insert(value, result_vreg); @@ -2426,9 +2428,8 @@ impl<'a> CfgLower<'a> { src: Operand::Virtual(field_vregs[2]), // cap }); - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_drop_String".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_drop_String"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); return; } @@ -2457,9 +2458,8 @@ impl<'a> CfgLower<'a> { src: Operand::Virtual(*vreg), }); } - self.mir.push(Aarch64Inst::Bl { - symbol: destructor_name.clone(), - }); + let symbol_id = self.intern_symbol(destructor_name); + self.mir.push(Aarch64Inst::Bl { symbol_id }); } // Now call the drop glue function to drop fields @@ -2472,9 +2472,8 @@ impl<'a> CfgLower<'a> { } let drop_fn_name = format!("__rue_drop_{}", struct_def.name); - self.mir.push(Aarch64Inst::Bl { - symbol: drop_fn_name, - }); + let symbol_id = self.intern_symbol(&drop_fn_name); + self.mir.push(Aarch64Inst::Bl { symbol_id }); return; } @@ -2618,9 +2617,8 @@ impl<'a> CfgLower<'a> { } // Overflow occurred - call panic handler - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); } @@ -2653,9 +2651,8 @@ impl<'a> CfgLower<'a> { _ => return, } - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); } @@ -2794,9 +2791,8 @@ impl<'a> CfgLower<'a> { _ => return, } - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); } @@ -2836,9 +2832,8 @@ impl<'a> CfgLower<'a> { _ => return, } - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); } @@ -2896,9 +2891,8 @@ impl<'a> CfgLower<'a> { }); // Below min - panic - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); let ok_label2 = self.mir.alloc_label(); @@ -2919,9 +2913,8 @@ impl<'a> CfgLower<'a> { }); // Above max - panic - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label2 }); } } else { @@ -2969,9 +2962,8 @@ impl<'a> CfgLower<'a> { }); // Negative - panic - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); // Also check upper bound if narrowing @@ -2993,9 +2985,8 @@ impl<'a> CfgLower<'a> { }); // Above max - panic - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label2 }); } } @@ -3020,9 +3011,8 @@ impl<'a> CfgLower<'a> { }); // Above max - panic - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); } else { // Unsigned to unsigned: narrowing check @@ -3042,9 +3032,8 @@ impl<'a> CfgLower<'a> { }); // Above max - panic - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); self.mir.push(Aarch64Inst::Label { id: ok_label }); } } @@ -3127,9 +3116,8 @@ impl<'a> CfgLower<'a> { }); // Call __rue_str_eq - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_str_eq".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_str_eq"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); // Result is in X0 (0 or 1) self.mir.push(Aarch64Inst::MovRR { @@ -3318,9 +3306,8 @@ impl<'a> CfgLower<'a> { }); // Call __rue_str_eq - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_str_eq".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_str_eq"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); // Result is in X0 (0 or 1) self.mir.push(Aarch64Inst::MovRR { @@ -3476,9 +3463,8 @@ impl<'a> CfgLower<'a> { dst: Operand::Physical(Reg::X0), src: Operand::Virtual(val_vreg), }); - self.mir.push(Aarch64Inst::Bl { - symbol: "__rue_exit".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_exit"); + self.mir.push(Aarch64Inst::Bl { symbol_id }); } else if let Type::Struct(struct_id) = return_type { // Return struct in registers let slot_count = self.type_slot_count(Type::Struct(struct_id)); diff --git a/crates/rue-codegen/src/aarch64/emit.rs b/crates/rue-codegen/src/aarch64/emit.rs index 1b003bef0..932dced27 100644 --- a/crates/rue-codegen/src/aarch64/emit.rs +++ b/crates/rue-codegen/src/aarch64/emit.rs @@ -881,7 +881,8 @@ impl<'a> Emitter<'a> { self.record_label(format!("L{}", id)); } - Aarch64Inst::Bl { symbol } => { + Aarch64Inst::Bl { symbol_id } => { + let symbol = self.mir.get_symbol(*symbol_id); self.begin_inst(); self.emit_bl(symbol); self.end_inst(format!("bl {}", symbol)); @@ -2537,9 +2538,8 @@ mod tests { use crate::RelocationKind; let mut mir = Aarch64Mir::new(); - mir.push(Aarch64Inst::Bl { - symbol: "test_func".to_string(), - }); + let symbol_id = mir.intern_symbol("test_func"); + mir.push(Aarch64Inst::Bl { symbol_id }); let (code, relocs) = Emitter::new(&mir, 0, 0, &[], &[]).emit().unwrap(); diff --git a/crates/rue-codegen/src/aarch64/mir.rs b/crates/rue-codegen/src/aarch64/mir.rs index 27739d5d2..43ac89f78 100644 --- a/crates/rue-codegen/src/aarch64/mir.rs +++ b/crates/rue-codegen/src/aarch64/mir.rs @@ -699,7 +699,9 @@ pub enum Aarch64Inst { Label { id: LabelId }, /// `bl symbol` - Branch with link (call). - Bl { symbol: String }, + /// + /// The `symbol_id` is an index into the symbol table stored in `Aarch64Mir`. + Bl { symbol_id: u32 }, /// `ret` - Return (branch to LR). Ret, @@ -909,7 +911,7 @@ impl fmt::Display for Aarch64Inst { Aarch64Inst::Bvs { label } => write!(f, "b.vs {}", label), Aarch64Inst::Bvc { label } => write!(f, "b.vc {}", label), Aarch64Inst::Label { id } => write!(f, "{}:", id), - Aarch64Inst::Bl { symbol } => write!(f, "bl {}", symbol), + Aarch64Inst::Bl { symbol_id } => write!(f, "bl sym{}", symbol_id), Aarch64Inst::Ret => write!(f, "ret"), Aarch64Inst::StpPre { src1, src2, offset } => { write!(f, "stp {}, {}, [sp, #{}]!", src1, src2, offset) @@ -942,6 +944,11 @@ pub struct Aarch64Mir { /// Inline labels (for overflow checks, bounds checks, etc.) use IDs from /// the lower half of the `u32` space. See module docs for namespace details. next_label: u32, + /// Symbol table for call targets. + /// + /// Stores symbol names indexed by `symbol_id` in `Bl` instructions. + /// This avoids heap-allocating a String for every call instruction. + symbols: Vec, } impl Aarch64Mir { @@ -951,7 +958,52 @@ impl Aarch64Mir { instructions: Vec::new(), next_vreg: 0, next_label: 0, + symbols: Vec::new(), + } + } + + /// Intern a symbol name and return its ID. + /// + /// If the symbol already exists, returns its existing ID. + /// Otherwise, adds it to the table and returns the new ID. + pub fn intern_symbol(&mut self, symbol: &str) -> u32 { + // Check if symbol already exists + if let Some(idx) = self.symbols.iter().position(|s| s == symbol) { + return idx as u32; } + // Add new symbol + let idx = self.symbols.len() as u32; + self.symbols.push(symbol.to_string()); + idx + } + + /// Get a symbol name by its ID. + /// + /// # Panics + /// Panics if the symbol_id is out of bounds. + #[inline] + pub fn get_symbol(&self, symbol_id: u32) -> &str { + &self.symbols[symbol_id as usize] + } + + /// Get the symbol table. + #[inline] + pub fn symbols(&self) -> &[String] { + &self.symbols + } + + /// Take ownership of the symbol table. + /// + /// Used during register allocation to transfer symbols to the new MIR. + pub fn take_symbols(&mut self) -> Vec { + std::mem::take(&mut self.symbols) + } + + /// Set the symbol table. + /// + /// Used during register allocation to restore symbols from the old MIR. + pub fn set_symbols(&mut self, symbols: Vec) { + self.symbols = symbols; } /// Allocate a new virtual register. @@ -1029,7 +1081,12 @@ impl Aarch64Mir { impl fmt::Display for Aarch64Mir { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for inst in &self.instructions { - writeln!(f, " {}", inst)?; + // Special handling for Bl to show actual symbol name + if let Aarch64Inst::Bl { symbol_id } = inst { + writeln!(f, " bl {}", self.get_symbol(*symbol_id))?; + } else { + writeln!(f, " {}", inst)?; + } } Ok(()) } diff --git a/crates/rue-codegen/src/aarch64/regalloc.rs b/crates/rue-codegen/src/aarch64/regalloc.rs index 4e011d552..e9eee9d27 100644 --- a/crates/rue-codegen/src/aarch64/regalloc.rs +++ b/crates/rue-codegen/src/aarch64/regalloc.rs @@ -135,8 +135,12 @@ impl RegAlloc { } fn rewrite_instructions(&mut self) -> CompileResult<()> { + // Take symbols from old MIR before taking instructions + let symbols = self.mir.take_symbols(); let old_instructions = std::mem::take(&mut self.mir).into_instructions(); let mut new_mir = Aarch64Mir::new(); + // Restore symbols to new MIR + new_mir.set_symbols(symbols); for inst in old_instructions { self.rewrite_inst(&mut new_mir, inst)?; @@ -823,7 +827,7 @@ impl RegAlloc { Aarch64Inst::Bvs { label } => mir.push(Aarch64Inst::Bvs { label }), Aarch64Inst::Bvc { label } => mir.push(Aarch64Inst::Bvc { label }), Aarch64Inst::Label { id } => mir.push(Aarch64Inst::Label { id }), - Aarch64Inst::Bl { symbol } => mir.push(Aarch64Inst::Bl { symbol }), + Aarch64Inst::Bl { symbol_id } => mir.push(Aarch64Inst::Bl { symbol_id }), Aarch64Inst::Ret => mir.push(Aarch64Inst::Ret), } Ok(()) diff --git a/crates/rue-codegen/src/x86_64/cfg_lower.rs b/crates/rue-codegen/src/x86_64/cfg_lower.rs index 5acf9b0cb..b9d9684f1 100644 --- a/crates/rue-codegen/src/x86_64/cfg_lower.rs +++ b/crates/rue-codegen/src/x86_64/cfg_lower.rs @@ -107,6 +107,13 @@ impl<'a> CfgLower<'a> { } } + /// Intern a symbol name and return its ID. + /// + /// Convenience method that delegates to the MIR's symbol table. + fn intern_symbol(&mut self, symbol: &str) -> u32 { + self.mir.intern_symbol(symbol) + } + /// Get the length of an array type. fn array_length(&self, array_type_id: ArrayTypeId) -> u64 { debug_assert!( @@ -308,9 +315,8 @@ impl<'a> CfgLower<'a> { self.mir.push(X86Inst::Jb { label: ok_label }); // Call the bounds check error handler (never returns) - self.mir.push(X86Inst::CallRel { - symbol: "__rue_bounds_check".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_bounds_check"); + self.mir.push(X86Inst::CallRel { symbol_id }); // Continue with valid access self.mir.push(X86Inst::Label { id: ok_label }); @@ -917,9 +923,8 @@ impl<'a> CfgLower<'a> { src2: Operand::Virtual(rhs_vreg), }); self.mir.push(X86Inst::Jnz { label: ok_label }); - self.mir.push(X86Inst::CallRel { - symbol: "__rue_div_by_zero".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_div_by_zero"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label }); self.mir.push(X86Inst::MovRR { @@ -949,9 +954,8 @@ impl<'a> CfgLower<'a> { src2: Operand::Virtual(rhs_vreg), }); self.mir.push(X86Inst::Jnz { label: ok_label }); - self.mir.push(X86Inst::CallRel { - symbol: "__rue_div_by_zero".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_div_by_zero"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label }); self.mir.push(X86Inst::MovRR { @@ -1831,9 +1835,9 @@ impl<'a> CfgLower<'a> { }); } - self.mir.push(X86Inst::CallRel { - symbol: self.interner.get(*name).to_string(), - }); + let symbol_name = self.interner.get(*name); + let symbol_id = self.intern_symbol(symbol_name); + self.mir.push(X86Inst::CallRel { symbol_id }); // Clean up stack arguments if num_stack_args > 0 { @@ -1926,9 +1930,8 @@ impl<'a> CfgLower<'a> { }); // Call __rue_dbg_str - self.mir.push(X86Inst::CallRel { - symbol: "__rue_dbg_str".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_dbg_str"); + self.mir.push(X86Inst::CallRel { symbol_id }); } else { unreachable!("string value should have field vregs for fat pointer"); } @@ -2008,9 +2011,8 @@ impl<'a> CfgLower<'a> { _ => unreachable!(), } - self.mir.push(X86Inst::CallRel { - symbol: runtime_fn.to_string(), - }); + let symbol_id = self.intern_symbol(runtime_fn); + self.mir.push(X86Inst::CallRel { symbol_id }); let result_vreg = self.mir.alloc_vreg(); self.value_map.insert(value, result_vreg); @@ -2590,9 +2592,8 @@ impl<'a> CfgLower<'a> { src: Operand::Virtual(field_vregs[2]), // cap }); - self.mir.push(X86Inst::CallRel { - symbol: "__rue_drop_String".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_drop_String"); + self.mir.push(X86Inst::CallRel { symbol_id }); return; } @@ -2621,9 +2622,8 @@ impl<'a> CfgLower<'a> { src: Operand::Virtual(*vreg), }); } - self.mir.push(X86Inst::CallRel { - symbol: destructor_name.clone(), - }); + let symbol_id = self.intern_symbol(destructor_name); + self.mir.push(X86Inst::CallRel { symbol_id }); } // Now call the drop glue function to drop fields @@ -2636,9 +2636,8 @@ impl<'a> CfgLower<'a> { } let drop_fn_name = format!("__rue_drop_{}", struct_def.name); - self.mir.push(X86Inst::CallRel { - symbol: drop_fn_name, - }); + let symbol_id = self.intern_symbol(&drop_fn_name); + self.mir.push(X86Inst::CallRel { symbol_id }); return; } @@ -2755,9 +2754,8 @@ impl<'a> CfgLower<'a> { } // Overflow occurred - call panic handler - self.mir.push(X86Inst::CallRel { - symbol: "__rue_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_overflow"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label }); } @@ -2819,9 +2817,8 @@ impl<'a> CfgLower<'a> { self.mir.push(X86Inst::Jge { label: ok_label }); // Below min - panic - self.mir.push(X86Inst::CallRel { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label }); let ok_label2 = self.new_label(); @@ -2845,9 +2842,8 @@ impl<'a> CfgLower<'a> { self.mir.push(X86Inst::Jle { label: ok_label2 }); // Above max - panic - self.mir.push(X86Inst::CallRel { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label2 }); } } else { @@ -2867,9 +2863,8 @@ impl<'a> CfgLower<'a> { self.mir.push(X86Inst::Jge { label: ok_label }); // Negative - panic - self.mir.push(X86Inst::CallRel { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label }); // Also check upper bound if narrowing @@ -2896,9 +2891,8 @@ impl<'a> CfgLower<'a> { } // Above max - panic - self.mir.push(X86Inst::CallRel { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label2 }); } } @@ -2927,9 +2921,8 @@ impl<'a> CfgLower<'a> { self.mir.push(X86Inst::Jbe { label: ok_label }); // Above max - panic - self.mir.push(X86Inst::CallRel { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label }); } else { // Unsigned to unsigned: narrowing check @@ -2953,9 +2946,8 @@ impl<'a> CfgLower<'a> { self.mir.push(X86Inst::Jbe { label: ok_label }); // Above max - panic - self.mir.push(X86Inst::CallRel { - symbol: "__rue_intcast_overflow".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_intcast_overflow"); + self.mir.push(X86Inst::CallRel { symbol_id }); self.mir.push(X86Inst::Label { id: ok_label }); } } @@ -3169,9 +3161,8 @@ impl<'a> CfgLower<'a> { }); // Call __rue_str_eq - self.mir.push(X86Inst::CallRel { - symbol: "__rue_str_eq".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_str_eq"); + self.mir.push(X86Inst::CallRel { symbol_id }); // Result is in RAX (0 or 1) self.mir.push(X86Inst::MovRR { @@ -3335,9 +3326,8 @@ impl<'a> CfgLower<'a> { // restoring the frame would break stack alignment for the call // (after pop rbp, rsp is 8 mod 16; call pushes 8 more, making // it 0 mod 16 at callee entry, violating SysV ABI). - self.mir.push(X86Inst::CallRel { - symbol: "__rue_exit".to_string(), - }); + let symbol_id = self.intern_symbol("__rue_exit"); + self.mir.push(X86Inst::CallRel { symbol_id }); } else if let Type::Struct(struct_id) = return_type { // Return struct in registers let slot_count = self.type_slot_count(Type::Struct(struct_id)); diff --git a/crates/rue-codegen/src/x86_64/emit.rs b/crates/rue-codegen/src/x86_64/emit.rs index c5bc33800..fca6addb9 100644 --- a/crates/rue-codegen/src/x86_64/emit.rs +++ b/crates/rue-codegen/src/x86_64/emit.rs @@ -853,7 +853,8 @@ impl<'a> Emitter<'a> { self.labels.insert(*id, self.code.len()); self.record_label(format!("{}", id)); } - X86Inst::CallRel { symbol } => { + X86Inst::CallRel { symbol_id } => { + let symbol = self.mir.get_symbol(*symbol_id); self.begin_inst(); self.emit_call_rel(symbol); self.end_inst(format!("call {}", symbol)); @@ -2343,9 +2344,8 @@ mod tests { use crate::RelocationKind; let mut mir = X86Mir::new(); - mir.push(X86Inst::CallRel { - symbol: "__rue_exit".into(), - }); + let symbol_id = mir.intern_symbol("__rue_exit"); + mir.push(X86Inst::CallRel { symbol_id }); let (code, relocs) = Emitter::new(&mir, 0, 0, 0, &[], &[]).emit().unwrap(); diff --git a/crates/rue-codegen/src/x86_64/mir.rs b/crates/rue-codegen/src/x86_64/mir.rs index 01e04e5fb..89507f485 100644 --- a/crates/rue-codegen/src/x86_64/mir.rs +++ b/crates/rue-codegen/src/x86_64/mir.rs @@ -369,7 +369,9 @@ pub enum X86Inst { /// /// The symbol will be resolved by the linker. This emits a `call rel32` /// instruction with a relocation for the target address. - CallRel { symbol: String }, + /// + /// The `symbol_id` is an index into the symbol table stored in `X86Mir`. + CallRel { symbol_id: u32 }, /// `syscall` - Invoke system call. Syscall, @@ -531,7 +533,7 @@ impl fmt::Display for X86Inst { X86Inst::Jle { label } => write!(f, "jle {}", label), X86Inst::Jmp { label } => write!(f, "jmp {}", label), X86Inst::Label { id } => write!(f, "{}:", id), - X86Inst::CallRel { symbol } => write!(f, "call {}", symbol), + X86Inst::CallRel { symbol_id } => write!(f, "call sym{}", symbol_id), X86Inst::Syscall => write!(f, "syscall"), X86Inst::Ret => write!(f, "ret"), X86Inst::Pop { dst } => write!(f, "pop {}", dst), @@ -597,6 +599,11 @@ pub struct X86Mir { next_vreg: u32, /// The next label index. next_label: u32, + /// Symbol table for call targets. + /// + /// Stores symbol names indexed by `symbol_id` in `CallRel` instructions. + /// This avoids heap-allocating a String for every call instruction. + symbols: Vec, } impl X86Mir { @@ -606,7 +613,52 @@ impl X86Mir { instructions: Vec::new(), next_vreg: 0, next_label: 0, + symbols: Vec::new(), + } + } + + /// Intern a symbol name and return its ID. + /// + /// If the symbol already exists, returns its existing ID. + /// Otherwise, adds it to the table and returns the new ID. + pub fn intern_symbol(&mut self, symbol: &str) -> u32 { + // Check if symbol already exists + if let Some(idx) = self.symbols.iter().position(|s| s == symbol) { + return idx as u32; } + // Add new symbol + let idx = self.symbols.len() as u32; + self.symbols.push(symbol.to_string()); + idx + } + + /// Get a symbol name by its ID. + /// + /// # Panics + /// Panics if the symbol_id is out of bounds. + #[inline] + pub fn get_symbol(&self, symbol_id: u32) -> &str { + &self.symbols[symbol_id as usize] + } + + /// Get the symbol table. + #[inline] + pub fn symbols(&self) -> &[String] { + &self.symbols + } + + /// Take ownership of the symbol table. + /// + /// Used during register allocation to transfer symbols to the new MIR. + pub fn take_symbols(&mut self) -> Vec { + std::mem::take(&mut self.symbols) + } + + /// Set the symbol table. + /// + /// Used during register allocation to restore symbols from the old MIR. + pub fn set_symbols(&mut self, symbols: Vec) { + self.symbols = symbols; } /// Allocate a new virtual register. @@ -666,7 +718,12 @@ impl X86Mir { impl fmt::Display for X86Mir { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for inst in &self.instructions { - writeln!(f, " {}", inst)?; + // Special handling for CallRel to show actual symbol name + if let X86Inst::CallRel { symbol_id } = inst { + writeln!(f, " call {}", self.get_symbol(*symbol_id))?; + } else { + writeln!(f, " {}", inst)?; + } } Ok(()) } diff --git a/crates/rue-codegen/src/x86_64/regalloc.rs b/crates/rue-codegen/src/x86_64/regalloc.rs index 56c7b016f..033d784ad 100644 --- a/crates/rue-codegen/src/x86_64/regalloc.rs +++ b/crates/rue-codegen/src/x86_64/regalloc.rs @@ -153,8 +153,12 @@ impl RegAlloc { fn rewrite_instructions(&mut self) -> CompileResult<()> { // For spilled vregs, we need to insert load/store operations. // This is done by building a new instruction list. + // Take symbols from old MIR before taking instructions + let symbols = self.mir.take_symbols(); let old_instructions = std::mem::take(&mut self.mir).into_instructions(); let mut new_mir = X86Mir::new(); + // Restore symbols to new MIR + new_mir.set_symbols(symbols); for inst in old_instructions { self.rewrite_inst(&mut new_mir, inst)?; @@ -729,7 +733,7 @@ impl RegAlloc { X86Inst::Jle { label } => mir.push(X86Inst::Jle { label }), X86Inst::Jmp { label } => mir.push(X86Inst::Jmp { label }), X86Inst::Label { id } => mir.push(X86Inst::Label { id }), - X86Inst::CallRel { symbol } => mir.push(X86Inst::CallRel { symbol }), + X86Inst::CallRel { symbol_id } => mir.push(X86Inst::CallRel { symbol_id }), X86Inst::Syscall => mir.push(X86Inst::Syscall), X86Inst::Ret => mir.push(X86Inst::Ret), } diff --git a/crates/rue-compiler/src/lib.rs b/crates/rue-compiler/src/lib.rs index 00ebe5082..e2b893359 100644 --- a/crates/rue-compiler/src/lib.rs +++ b/crates/rue-compiler/src/lib.rs @@ -761,6 +761,9 @@ impl Mir { X86Inst::Label { id } => { output.push_str(&format!("{}:\n", id)); } + X86Inst::CallRel { symbol_id } => { + output.push_str(&format!(" call {}\n", mir.get_symbol(*symbol_id))); + } _ => { output.push_str(&format!(" {}\n", inst)); } @@ -774,6 +777,9 @@ impl Mir { Aarch64Inst::Label { id } => { output.push_str(&format!("{}:\n", id)); } + Aarch64Inst::Bl { symbol_id } => { + output.push_str(&format!(" bl {}\n", mir.get_symbol(*symbol_id))); + } _ => { output.push_str(&format!(" {}\n", inst)); }