From 0c37081034421052c512016f3739609ab439930f Mon Sep 17 00:00:00 2001 From: Steve Klabnik Date: Sun, 28 Dec 2025 07:12:07 -0600 Subject: [PATCH] refactor(rir): replace Vec fields with extra array pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert all InstData variants that embedded Vec to use the extra array pattern (start: u32, len: u32). This follows the Zig ZIR approach. Affected variants: - Match: arms now in extra array - FnDecl: directives and params now in extra array - Call, MethodCall, AssocFnCall, Intrinsic: args now in extra array - ArrayInit: elements now in extra array - StructInit, StructDecl: fields now in extra array - EnumDecl: variants now in extra array - ImplDecl: methods now in extra array - Alloc: directives now in extra array Benefits: - Reduces InstData enum size from ~64+ bytes to ~24 bytes - Eliminates per-instruction heap allocations - Improves cache locality during IR traversal Fix: Use try_from_usize instead of non-existent from_usize_unchecked in Symbol::from_raw. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- crates/rue-air/src/inference/generate.rs | 70 +- crates/rue-air/src/sema.rs | 159 +++-- crates/rue-intern/src/lib.rs | 14 +- crates/rue-rir/src/astgen.rs | 156 ++++- crates/rue-rir/src/inst.rs | 825 ++++++++++++++++++----- crates/rue-span/src/lib.rs | 6 + 6 files changed, 976 insertions(+), 254 deletions(-) diff --git a/crates/rue-air/src/inference/generate.rs b/crates/rue-air/src/inference/generate.rs index cea62389e..6eca9d53e 100644 --- a/crates/rue-air/src/inference/generate.rs +++ b/crates/rue-air/src/inference/generate.rs @@ -363,7 +363,8 @@ impl<'a> ConstraintGenerator<'a> { // Local variable allocation InstData::Alloc { - directives: _, + directives_start: _, + directives_len: _, name, is_mut, ty: type_annotation, @@ -442,7 +443,12 @@ impl<'a> ConstraintGenerator<'a> { } // Function call - InstData::Call { name, args } => { + InstData::Call { + name, + args_start, + args_len, + } => { + let args = self.rir.get_call_args(*args_start, *args_len); if let Some(func) = self.functions.get(name) { // Check argument count matches parameter count. // Semantic analysis will emit a proper error; we just need to avoid @@ -476,8 +482,13 @@ impl<'a> ConstraintGenerator<'a> { } // Intrinsic call - InstData::Intrinsic { name, args } => { + InstData::Intrinsic { + name, + args_start, + args_len, + } => { let intrinsic_name = self.interner.get(*name); + let args = self.rir.get_inst_refs(*args_start, *args_len); if intrinsic_name == "intCast" { // @intCast: target type is inferred from context @@ -617,8 +628,13 @@ impl<'a> ConstraintGenerator<'a> { InstData::Break | InstData::Continue => InferType::Concrete(Type::Never), // Match expression - InstData::Match { scrutinee, arms } => { + InstData::Match { + scrutinee, + arms_start, + arms_len, + } => { let scrutinee_info = self.generate(*scrutinee, ctx); + let arms = self.rir.get_match_arms(*arms_start, *arms_len); // Collect arm types, handling Never coercion let mut arm_types: Vec = Vec::new(); @@ -662,8 +678,13 @@ impl<'a> ConstraintGenerator<'a> { } // Struct initialization - InstData::StructInit { type_name, fields } => { + InstData::StructInit { + type_name, + fields_start, + fields_len, + } => { if let Some(&struct_ty) = self.structs.get(type_name) { + let fields = self.rir.get_field_inits(*fields_start, *fields_len); // Generate constraints for each field for (_, value_ref) in fields.iter() { self.generate(*value_ref, ctx); @@ -709,7 +730,11 @@ impl<'a> ConstraintGenerator<'a> { } // Array initialization - InstData::ArrayInit { elements } => { + InstData::ArrayInit { + elems_start, + elems_len, + } => { + let elements = self.rir.get_inst_refs(*elems_start, *elems_len); if elements.is_empty() { // Empty array - need type annotation to know element type // Use a fresh type variable for the element type @@ -790,10 +815,12 @@ impl<'a> ConstraintGenerator<'a> { InstData::MethodCall { receiver, method, - args, + args_start, + args_len, } => { // Generate type for receiver let receiver_info = self.generate(*receiver, ctx); + let args = self.rir.get_call_args(*args_start, *args_len); // Get struct name from receiver type if it's a struct // If we can't determine the struct type, we still generate constraints @@ -852,8 +879,10 @@ impl<'a> ConstraintGenerator<'a> { InstData::AssocFnCall { type_name, function, - args, + args_start, + args_len, } => { + let args = self.rir.get_call_args(*args_start, *args_len); let method_key = (*type_name, *function); if let Some(method_sig) = self.methods.get(&method_key) { // Generate constraints for arguments @@ -1569,13 +1598,15 @@ mod tests { data: InstData::IntConst(42), span: Span::new(4, 6), }); + let (args_start, args_len) = rir.add_call_args(&[rue_rir::RirCallArg { + value: arg, + mode: rue_rir::RirArgMode::Normal, + }]); let call = rir.add_inst(rue_rir::Inst { data: InstData::Call { name: func_name, - args: vec![rue_rir::RirCallArg { - value: arg, - mode: rue_rir::RirArgMode::Normal, - }], + args_start, + args_len, }, span: Span::new(0, 7), }); @@ -1607,13 +1638,15 @@ mod tests { data: InstData::IntConst(42), span: Span::new(8, 10), }); + let (args_start, args_len) = rir.add_call_args(&[rue_rir::RirCallArg { + value: arg, + mode: rue_rir::RirArgMode::Normal, + }]); let call = rir.add_inst(rue_rir::Inst { data: InstData::Call { name: unknown_func, - args: vec![rue_rir::RirCallArg { - value: arg, - mode: rue_rir::RirArgMode::Normal, - }], + args_start, + args_len, }, span: Span::new(0, 11), }); @@ -1666,10 +1699,13 @@ mod tests { }); let pattern3 = rue_rir::RirPattern::Wildcard(Span::new(30, 31)); + let arms = vec![(pattern1, body1), (pattern2, body2), (pattern3, body3)]; + let (arms_start, arms_len) = rir.add_match_arms(&arms); let match_inst = rir.add_inst(rue_rir::Inst { data: InstData::Match { scrutinee, - arms: vec![(pattern1, body1), (pattern2, body2), (pattern3, body3)], + arms_start, + arms_len, }, span: Span::new(0, 40), }); diff --git a/crates/rue-air/src/sema.rs b/crates/rue-air/src/sema.rs index 4ffbdb8bf..76512a271 100644 --- a/crates/rue-air/src/sema.rs +++ b/crates/rue-air/src/sema.rs @@ -644,9 +644,15 @@ impl<'a> Sema<'a> { // Collect method refs from impl blocks so we can skip them in the first pass let mut method_refs: HashSet = HashSet::new(); for (_, inst) in self.rir.iter() { - if let InstData::ImplDecl { methods, .. } = &inst.data { + if let InstData::ImplDecl { + methods_start, + methods_len, + .. + } = &inst.data + { + let methods = self.rir.get_inst_refs(*methods_start, *methods_len); for method_ref in methods { - method_refs.insert(*method_ref); + method_refs.insert(method_ref); } } } @@ -654,9 +660,11 @@ impl<'a> Sema<'a> { // Analyze regular functions (not methods in impl blocks) for (inst_ref, inst) in self.rir.iter() { if let InstData::FnDecl { - directives: _, + directives_start: _, + directives_len: _, name, - params, + params_start, + params_len, return_type, body, has_self: _, @@ -668,10 +676,16 @@ impl<'a> Sema<'a> { } let fn_name = self.interner.get(*name).to_string(); + let params = self.rir.get_params(*params_start, *params_len); // Try to analyze this function - on error, record it and continue - match self.analyze_single_function(&fn_name, *return_type, params, *body, inst.span) - { + match self.analyze_single_function( + &fn_name, + *return_type, + ¶ms, + *body, + inst.span, + ) { Ok(analyzed) => functions.push(analyzed), Err(e) => errors.push(e), } @@ -680,16 +694,23 @@ impl<'a> Sema<'a> { // Fourth pass: analyze method bodies from impl blocks for (_, inst) in self.rir.iter() { - if let InstData::ImplDecl { type_name, methods } = &inst.data { + if let InstData::ImplDecl { + type_name, + methods_start, + methods_len, + } = &inst.data + { let type_name_str = self.interner.get(*type_name).to_string(); let struct_id = *self.structs.get(type_name).unwrap(); let struct_type = Type::Struct(struct_id); + let methods = self.rir.get_inst_refs(*methods_start, *methods_len); for method_ref in methods { - let method_inst = self.rir.get(*method_ref); + let method_inst = self.rir.get(method_ref); if let InstData::FnDecl { name: method_name, - params, + params_start, + params_len, return_type, body, has_self, @@ -697,6 +718,7 @@ impl<'a> Sema<'a> { } = &method_inst.data { let method_name_str = self.interner.get(*method_name).to_string(); + let params = self.rir.get_params(*params_start, *params_len); // Generate method name with struct prefix: "Type.method" or "Type::function" let full_name = if *has_self { @@ -709,7 +731,7 @@ impl<'a> Sema<'a> { match self.analyze_method_function( &full_name, *return_type, - params, + ¶ms, *body, method_inst.span, struct_type, @@ -867,18 +889,22 @@ impl<'a> Sema<'a> { fn collect_struct_definitions(&mut self) -> CompileResult<()> { for (_, inst) in self.rir.iter() { if let InstData::StructDecl { - directives, + directives_start, + directives_len, name, - fields, + fields_start, + fields_len, } = &inst.data { let struct_id = StructId(self.struct_defs.len() as u32); let struct_name = self.interner.get(*name).to_string(); - let is_copy = self.has_copy_directive(directives); + let directives = self.rir.get_directives(*directives_start, *directives_len); + let is_copy = self.has_copy_directive(&directives); + let fields = self.rir.get_field_decls(*fields_start, *fields_len); // Check for duplicate field names let mut seen_fields: HashSet = HashSet::new(); - for (field_name, _) in fields { + for (field_name, _) in &fields { if !seen_fields.insert(*field_name) { let field_name_str = self.interner.get(*field_name).to_string(); return Err(CompileError::new( @@ -893,7 +919,7 @@ impl<'a> Sema<'a> { // Resolve field types (can only be primitive types for now, or other structs) let mut resolved_fields = Vec::new(); - for (field_name, field_type) in fields { + for (field_name, field_type) in &fields { let field_ty = self.resolve_type(*field_type, inst.span)?; resolved_fields.push(StructField { name: self.interner.get(*field_name).to_string(), @@ -916,13 +942,19 @@ impl<'a> Sema<'a> { /// Collect all enum definitions from the RIR. fn collect_enum_definitions(&mut self) -> CompileResult<()> { for (_, inst) in self.rir.iter() { - if let InstData::EnumDecl { name, variants } = &inst.data { + if let InstData::EnumDecl { + name, + variants_start, + variants_len, + } = &inst.data + { let enum_id = EnumId(self.enum_defs.len() as u32); let enum_name = self.interner.get(*name).to_string(); + let variants = self.rir.get_symbols(*variants_start, *variants_len); // Check for duplicate variant names let mut seen_variants: HashSet = HashSet::new(); - for variant_name in variants { + for variant_name in &variants { if !seen_variants.insert(*variant_name) { let variant_name_str = self.interner.get(*variant_name).to_string(); return Err(CompileError::new( @@ -1018,12 +1050,14 @@ impl<'a> Sema<'a> { fn validate_copy_structs(&self) -> CompileResult<()> { for (_, inst) in self.rir.iter() { if let InstData::StructDecl { - directives, + directives_start, + directives_len, name, - fields: _, + .. } = &inst.data { - let is_copy = self.has_copy_directive(directives); + let directives = self.rir.get_directives(*directives_start, *directives_len); + let is_copy = self.has_copy_directive(&directives); if !is_copy { continue; } @@ -1097,12 +1131,14 @@ impl<'a> Sema<'a> { for (_, inst) in self.rir.iter() { if let InstData::FnDecl { name, - params, + params_start, + params_len, return_type, .. } = &inst.data { let ret_type = self.resolve_type(*return_type, inst.span)?; + let params = self.rir.get_params(*params_start, *params_len); let param_types: Vec = params .iter() .map(|p| self.resolve_type(p.ty, inst.span)) @@ -1128,7 +1164,12 @@ impl<'a> Sema<'a> { /// that maps (struct_name, method_name) to MethodInfo. fn collect_method_definitions(&mut self) -> CompileResult<()> { for (_, inst) in self.rir.iter() { - if let InstData::ImplDecl { type_name, methods } = &inst.data { + if let InstData::ImplDecl { + type_name, + methods_start, + methods_len, + } = &inst.data + { // Check that the type exists let struct_id = match self.structs.get(type_name) { Some(id) => *id, @@ -1143,11 +1184,13 @@ impl<'a> Sema<'a> { let struct_type = Type::Struct(struct_id); // Process each method in the impl block + let methods = self.rir.get_inst_refs(*methods_start, *methods_len); for method_ref in methods { - let method_inst = self.rir.get(*method_ref); + let method_inst = self.rir.get(method_ref); if let InstData::FnDecl { name: method_name, - params, + params_start, + params_len, return_type, body, has_self, @@ -1169,6 +1212,7 @@ impl<'a> Sema<'a> { } // Resolve parameter types + let params = self.rir.get_params(*params_start, *params_len); let param_names: Vec = params.iter().map(|p| p.name).collect(); let param_types: Vec = params .iter() @@ -2025,7 +2069,11 @@ impl<'a> Sema<'a> { Ok(AnalysisResult::new(air_ref, Type::Never)) } - InstData::Match { scrutinee, arms } => { + InstData::Match { + scrutinee, + arms_start, + arms_len, + } => { // Analyze the scrutinee to determine its type let scrutinee_result = self.analyze_inst(air, *scrutinee, ctx)?; let scrutinee_type = scrutinee_result.ty; @@ -2041,6 +2089,7 @@ impl<'a> Sema<'a> { )); } + let arms = self.rir.get_match_arms(*arms_start, *arms_len); // Check for empty match if arms.is_empty() { return Err(CompileError::new(ErrorKind::EmptyMatch, inst.span)); @@ -2301,7 +2350,8 @@ impl<'a> Sema<'a> { } InstData::Alloc { - directives, + directives_start, + directives_len, name, is_mut, ty: _, @@ -2324,7 +2374,8 @@ impl<'a> Sema<'a> { }; // Check if @allow(unused_variable) directive is present - let allow_unused = self.has_allow_directive(directives, "unused_variable"); + let directives = self.rir.get_directives(*directives_start, *directives_len); + let allow_unused = self.has_allow_directive(&directives, "unused_variable"); // Allocate slots - structs and arrays need multiple slots // Use abi_slot_count which recursively computes total slots for nested types @@ -2732,7 +2783,11 @@ impl<'a> Sema<'a> { } } - InstData::Call { name, args } => { + InstData::Call { + name, + args_start, + args_len, + } => { // Look up the function let fn_name_str = self.interner.get(*name).to_string(); let fn_info = self.functions.get(name).ok_or_compile_error( @@ -2740,6 +2795,7 @@ impl<'a> Sema<'a> { inst.span, )?; + let args = self.rir.get_call_args(*args_start, *args_len); // Check argument count if args.len() != fn_info.param_types.len() { let expected = fn_info.param_types.len(); @@ -2751,7 +2807,7 @@ impl<'a> Sema<'a> { } // Check for exclusive access violation: same variable passed to multiple inout params - self.check_exclusive_access(args, inst.span)?; + self.check_exclusive_access(&args, inst.span)?; // Clone the data we need before mutable borrow let param_types = fn_info.param_types.clone(); @@ -2784,7 +2840,7 @@ impl<'a> Sema<'a> { } // Analyze arguments (move checking happens in analyze_inst for VarRef) - let air_args = self.analyze_call_args(air, args, ctx)?; + let air_args = self.analyze_call_args(air, &args, ctx)?; // Encode call args into extra array: each arg is (air_ref, mode) let args_len = air_args.len() as u32; @@ -2840,8 +2896,10 @@ impl<'a> Sema<'a> { InstData::StructInit { type_name, - fields: field_inits, + fields_start, + fields_len, } => { + let field_inits = self.rir.get_field_inits(*fields_start, *fields_len); // Look up the struct type let type_name_str = self.interner.get(*type_name); let struct_id = *self.structs.get(type_name).ok_or_compile_error( @@ -3248,7 +3306,12 @@ impl<'a> Sema<'a> { Ok(AnalysisResult::new(air_ref, Type::Unit)) } - InstData::Intrinsic { name, args } => { + InstData::Intrinsic { + name, + args_start, + args_len, + } => { + let args = self.rir.get_call_args(*args_start, *args_len); let intrinsic_name = self.interner.get(*name).to_string(); match intrinsic_name.as_str() { @@ -3266,7 +3329,7 @@ impl<'a> Sema<'a> { } // Synthesize the argument type in a single traversal - let arg_result = self.analyze_inst(air, args[0], ctx)?; + let arg_result = self.analyze_inst(air, args[0].value, ctx)?; let arg_type = arg_result.ty; // Check that argument is a supported type (integer, bool, or string) @@ -3313,7 +3376,7 @@ impl<'a> Sema<'a> { } // Analyze the argument - let arg_result = self.analyze_inst(air, args[0], ctx)?; + let arg_result = self.analyze_inst(air, args[0].value, ctx)?; let from_ty = arg_result.ty; // Argument must be an integer type @@ -3439,7 +3502,11 @@ impl<'a> Sema<'a> { Ok(AnalysisResult::new(air_ref, Type::I32)) } - InstData::ArrayInit { elements } => { + InstData::ArrayInit { + elems_start, + elems_len, + } => { + let elements = self.rir.get_inst_refs(*elems_start, *elems_len); // Get the array type from HM inference let array_type_id = match ctx.resolved_types.get(&inst_ref).copied() { Some(Type::Array(id)) => id, @@ -3811,8 +3878,10 @@ impl<'a> Sema<'a> { InstData::MethodCall { receiver, method, - args, + args_start, + args_len, } => { + let args = self.rir.get_call_args(*args_start, *args_len); // For String borrow methods, we need to extract the root variable before // analyzing the receiver so we can "unmove" it afterwards. String query // methods (len, capacity, is_empty) use `borrow self` semantics - they @@ -3867,7 +3936,7 @@ impl<'a> Sema<'a> { &method_name_str, receiver_result, receiver_storage, - args, + &args, inst.span, ); } @@ -3877,7 +3946,7 @@ impl<'a> Sema<'a> { ctx, &method_name_str, receiver_result, - args, + &args, inst.span, ); } @@ -3936,7 +4005,7 @@ impl<'a> Sema<'a> { } // Check for exclusive access violation in method args - self.check_exclusive_access(args, inst.span)?; + self.check_exclusive_access(&args, inst.span)?; // Clone data needed before mutable borrow let return_type = method_info.return_type; @@ -3946,7 +4015,7 @@ impl<'a> Sema<'a> { value: receiver_result.air_ref, mode: AirArgMode::Normal, // receiver is not inout }]; - air_args.extend(self.analyze_call_args(air, args, ctx)?); + air_args.extend(self.analyze_call_args(air, &args, ctx)?); // Generate a method call name: Type.method (intern for AIR) let call_name = format!("{}.{}", struct_name_str, method_name_str); @@ -3977,8 +4046,10 @@ impl<'a> Sema<'a> { InstData::AssocFnCall { type_name, function, - args, + args_start, + args_len, } => { + let args = self.rir.get_call_args(*args_start, *args_len); // Get the type and function names for error messages let type_name_str = self.interner.get(*type_name).to_string(); let function_name_str = self.interner.get(*function).to_string(); @@ -3992,7 +4063,7 @@ impl<'a> Sema<'a> { air, ctx, &function_name_str, - args, + &args, inst.span, ); } @@ -4036,13 +4107,13 @@ impl<'a> Sema<'a> { } // Check for exclusive access violation in assoc fn args - self.check_exclusive_access(args, inst.span)?; + self.check_exclusive_access(&args, inst.span)?; // Clone data needed before mutable borrow let return_type = method_info.return_type; // Analyze arguments - let air_args = self.analyze_call_args(air, args, ctx)?; + let air_args = self.analyze_call_args(air, &args, ctx)?; // Generate a function call name: Type::function (intern for AIR) let call_name = format!("{}::{}", type_name_str, function_name_str); diff --git a/crates/rue-intern/src/lib.rs b/crates/rue-intern/src/lib.rs index 0c859de43..c535f0e4c 100644 --- a/crates/rue-intern/src/lib.rs +++ b/crates/rue-intern/src/lib.rs @@ -27,12 +27,24 @@ use string_interner::{StringInterner, Symbol as SymbolTrait}; pub struct Symbol(SymbolU32); impl Symbol { - /// Create a symbol from a raw index. + /// Create a symbol from a usize index. Only for internal use. #[inline] fn from_usize(index: usize) -> Option { SymbolU32::try_from_usize(index).map(Symbol) } + /// Create a symbol from a raw index. + /// + /// # Panics + /// Panics if the index is invalid (>= u32::MAX - 1). + /// + /// This is intended for use by the RIR extra data deserialization, where + /// we know the indices are valid because they were serialized from valid symbols. + #[inline] + pub fn from_raw(index: u32) -> Self { + Self::from_usize(index as usize).expect("invalid symbol index") + } + /// Get the raw index of this symbol. #[inline] pub fn as_u32(self) -> u32 { diff --git a/crates/rue-rir/src/astgen.rs b/crates/rue-rir/src/astgen.rs index 6833c16af..a7eb453a7 100644 --- a/crates/rue-rir/src/astgen.rs +++ b/crates/rue-rir/src/astgen.rs @@ -92,6 +92,7 @@ impl<'a> AstGen<'a> { fn gen_struct(&mut self, struct_decl: &StructDecl) -> InstRef { let directives = self.convert_directives(&struct_decl.directives); + let (directives_start, directives_len) = self.rir.add_directives(&directives); let name = struct_decl.name.name; // Already a Symbol let fields: Vec<_> = struct_decl .fields @@ -102,12 +103,15 @@ impl<'a> AstGen<'a> { (field_name, field_type) }) .collect(); + let (fields_start, fields_len) = self.rir.add_field_decls(&fields); self.rir.add_inst(Inst { data: InstData::StructDecl { - directives, + directives_start, + directives_len, name, - fields, + fields_start, + fields_len, }, span: struct_decl.span, }) @@ -120,9 +124,14 @@ impl<'a> AstGen<'a> { .iter() .map(|v| v.name.name) // Already a Symbol .collect(); + let (variants_start, variants_len) = self.rir.add_symbols(&variants); self.rir.add_inst(Inst { - data: InstData::EnumDecl { name, variants }, + data: InstData::EnumDecl { + name, + variants_start, + variants_len, + }, span: enum_decl.span, }) } @@ -136,9 +145,14 @@ impl<'a> AstGen<'a> { .iter() .map(|m| self.gen_method(m)) .collect(); + let (methods_start, methods_len) = self.rir.add_inst_refs(&methods); self.rir.add_inst(Inst { - data: InstData::ImplDecl { type_name, methods }, + data: InstData::ImplDecl { + type_name, + methods_start, + methods_len, + }, span: impl_block.span, }) } @@ -158,6 +172,7 @@ impl<'a> AstGen<'a> { fn gen_method(&mut self, method: &Method) -> InstRef { // Convert directives let directives = self.convert_directives(&method.directives); + let (directives_start, directives_len) = self.rir.add_directives(&directives); // Get the method name (already a Symbol) and return type let name = method.name.name; // Already a Symbol @@ -176,6 +191,7 @@ impl<'a> AstGen<'a> { mode: self.convert_param_mode(p.mode), }) .collect(); + let (params_start, params_len) = self.rir.add_params(¶ms); // Generate body expression let body = self.gen_expr(&method.body); @@ -187,9 +203,11 @@ impl<'a> AstGen<'a> { // Sema uses has_self to add the implicit self parameter for methods. self.rir.add_inst(Inst { data: InstData::FnDecl { - directives, + directives_start, + directives_len, name, - params, + params_start, + params_len, return_type, body, has_self, @@ -245,6 +263,7 @@ impl<'a> AstGen<'a> { fn gen_function(&mut self, func: &Function) -> InstRef { // Convert directives let directives = self.convert_directives(&func.directives); + let (directives_start, directives_len) = self.rir.add_directives(&directives); // Get the function name (already a Symbol) and return type let name = func.name.name; // Already a Symbol @@ -263,6 +282,7 @@ impl<'a> AstGen<'a> { mode: self.convert_param_mode(p.mode), }) .collect(); + let (params_start, params_len) = self.rir.add_params(¶ms); // Generate body expression let body = self.gen_expr(&func.body); @@ -271,9 +291,11 @@ impl<'a> AstGen<'a> { // Regular functions don't have a self receiver self.rir.add_inst(Inst { data: InstData::FnDecl { - directives, + directives_start, + directives_len, name, - params, + params_start, + params_len, return_type, body, has_self: false, @@ -393,19 +415,26 @@ impl<'a> AstGen<'a> { (pattern, body) }) .collect(); + let (arms_start, arms_len) = self.rir.add_match_arms(&arms); self.rir.add_inst(Inst { - data: InstData::Match { scrutinee, arms }, + data: InstData::Match { + scrutinee, + arms_start, + arms_len, + }, span: match_expr.span, }) } Expr::Call(call) => { let args: Vec<_> = call.args.iter().map(|a| self.convert_call_arg(a)).collect(); + let (args_start, args_len) = self.rir.add_call_args(&args); self.rir.add_inst(Inst { data: InstData::Call { name: call.name.name, // Already a Symbol - args, + args_start, + args_len, }, span: call.span, }) @@ -434,11 +463,13 @@ impl<'a> AstGen<'a> { (f.name.name, field_value) // name is already a Symbol }) .collect(); + let (fields_start, fields_len) = self.rir.add_field_inits(&fields); self.rir.add_inst(Inst { data: InstData::StructInit { type_name: struct_lit.name.name, // Already a Symbol - fields, + fields_start, + fields_len, }, span: struct_lit.span, }) @@ -492,9 +523,14 @@ impl<'a> AstGen<'a> { IntrinsicArg::Type(_) => None, // This shouldn't happen for expr intrinsics }) .collect(); + let (args_start, args_len) = self.rir.add_inst_refs(&args); self.rir.add_inst(Inst { - data: InstData::Intrinsic { name, args }, + data: InstData::Intrinsic { + name, + args_start, + args_len, + }, span: intrinsic.span, }) } @@ -504,9 +540,13 @@ impl<'a> AstGen<'a> { .iter() .map(|e| self.gen_expr(e)) .collect(); + let (elems_start, elems_len) = self.rir.add_inst_refs(&elements); self.rir.add_inst(Inst { - data: InstData::ArrayInit { elements }, + data: InstData::ArrayInit { + elems_start, + elems_len, + }, span: array_lit.span, }) } @@ -535,12 +575,14 @@ impl<'a> AstGen<'a> { .iter() .map(|a| self.convert_call_arg(a)) .collect(); + let (args_start, args_len) = self.rir.add_call_args(&args); self.rir.add_inst(Inst { data: InstData::MethodCall { receiver, method: method_call.method.name, // Already a Symbol - args, + args_start, + args_len, }, span: method_call.span, }) @@ -551,12 +593,14 @@ impl<'a> AstGen<'a> { .iter() .map(|a| self.convert_call_arg(a)) .collect(); + let (args_start, args_len) = self.rir.add_call_args(&args); self.rir.add_inst(Inst { data: InstData::AssocFnCall { type_name: assoc_fn_call.type_name.name, // Already a Symbol function: assoc_fn_call.function.name, // Already a Symbol - args, + args_start, + args_len, }, span: assoc_fn_call.span, }) @@ -622,6 +666,7 @@ impl<'a> AstGen<'a> { match stmt { Statement::Let(let_stmt) => { let directives = self.convert_directives(&let_stmt.directives); + let (directives_start, directives_len) = self.rir.add_directives(&directives); let name = match &let_stmt.pattern { LetPattern::Ident(ident) => Some(ident.name), // Already a Symbol LetPattern::Wildcard(_) => None, @@ -630,7 +675,8 @@ impl<'a> AstGen<'a> { let init = self.gen_expr(&let_stmt.init); self.rir.add_inst(Inst { data: InstData::Alloc { - directives, + directives_start, + directives_len, name, is_mut: let_stmt.is_mut, ty, @@ -710,14 +756,16 @@ mod tests { let (_, fn_inst) = rir.iter().last().unwrap(); match &fn_inst.data { InstData::FnDecl { - directives: _, name, - params, + params_start, + params_len, return_type, body, has_self, + .. } => { assert_eq!(interner.get(*name), "main"); + let params = rir.get_params(*params_start, *params_len); assert!(params.is_empty()); assert_eq!(interner.get(*return_type), "i32"); assert!(!has_self); // Regular functions don't have self @@ -864,11 +912,11 @@ mod tests { let (_, inst) = alloc_inst.unwrap(); match &inst.data { InstData::Alloc { - directives: _, name, is_mut, ty, init, + .. } => { assert_eq!(interner.get(name.unwrap()), "x"); assert!(!is_mut); @@ -1002,8 +1050,13 @@ mod tests { let (_, inst) = impl_decl.unwrap(); match &inst.data { - InstData::ImplDecl { type_name, methods } => { + InstData::ImplDecl { + type_name, + methods_start, + methods_len, + } => { assert_eq!(interner.get(*type_name), "Point"); + let methods = rir.get_inst_refs(*methods_start, *methods_len); assert_eq!(methods.len(), 1); // Check the method is a FnDecl with has_self=true @@ -1040,12 +1093,17 @@ mod tests { let (_, inst) = impl_decl.unwrap(); match &inst.data { - InstData::ImplDecl { methods, .. } => { + InstData::ImplDecl { + methods_start, + methods_len, + .. + } => { + let methods = rir.get_inst_refs(*methods_start, *methods_len); assert_eq!(methods.len(), 3); // Check get_x and get_y have self, origin does not for method_ref in methods { - let method_inst = rir.get(*method_ref); + let method_inst = rir.get(method_ref); match &method_inst.data { InstData::FnDecl { name, has_self, .. } => { let method_name = interner.get(*name); @@ -1088,9 +1146,11 @@ mod tests { InstData::MethodCall { receiver: _, method, - args, + args_start, + args_len, } => { assert_eq!(interner.get(*method), "get_x"); + let args = rir.get_call_args(*args_start, *args_len); assert!(args.is_empty()); // No explicit args (self is implicit) } _ => panic!("expected MethodCall"), @@ -1122,10 +1182,12 @@ mod tests { InstData::AssocFnCall { type_name, function, - args, + args_start, + args_len, } => { assert_eq!(interner.get(*type_name), "Point"); assert_eq!(interner.get(*function), "origin"); + let args = rir.get_call_args(*args_start, *args_len); assert!(args.is_empty()); } _ => panic!("expected AssocFnCall"), @@ -1153,7 +1215,12 @@ mod tests { let (_, inst) = match_inst.unwrap(); match &inst.data { - InstData::Match { arms, .. } => { + InstData::Match { + arms_start, + arms_len, + .. + } => { + let arms = rir.get_match_arms(*arms_start, *arms_len); assert_eq!(arms.len(), 1); assert!(matches!(arms[0].0, RirPattern::Wildcard(_))); } @@ -1182,7 +1249,12 @@ mod tests { let (_, inst) = match_inst.unwrap(); match &inst.data { - InstData::Match { arms, .. } => { + InstData::Match { + arms_start, + arms_len, + .. + } => { + let arms = rir.get_match_arms(*arms_start, *arms_len); assert_eq!(arms.len(), 3); assert!(matches!(arms[0].0, RirPattern::Int(1, _))); assert!(matches!(arms[1].0, RirPattern::Int(2, _))); @@ -1213,7 +1285,12 @@ mod tests { let (_, inst) = match_inst.unwrap(); match &inst.data { - InstData::Match { arms, .. } => { + InstData::Match { + arms_start, + arms_len, + .. + } => { + let arms = rir.get_match_arms(*arms_start, *arms_len); assert_eq!(arms.len(), 3); assert!(matches!(arms[0].0, RirPattern::Int(-5, _))); assert!(matches!(arms[1].0, RirPattern::Int(-10, _))); @@ -1243,7 +1320,12 @@ mod tests { let (_, inst) = match_inst.unwrap(); match &inst.data { - InstData::Match { arms, .. } => { + InstData::Match { + arms_start, + arms_len, + .. + } => { + let arms = rir.get_match_arms(*arms_start, *arms_len); assert_eq!(arms.len(), 2); assert!(matches!(arms[0].0, RirPattern::Bool(true, _))); assert!(matches!(arms[1].0, RirPattern::Bool(false, _))); @@ -1274,7 +1356,12 @@ mod tests { let (_, inst) = match_inst.unwrap(); match &inst.data { - InstData::Match { arms, .. } => { + InstData::Match { + arms_start, + arms_len, + .. + } => { + let arms = rir.get_match_arms(*arms_start, *arms_len); assert_eq!(arms.len(), 3); // Check first arm is Color::Red @@ -1403,18 +1490,25 @@ mod tests { let (_, inst) = impl_decl.unwrap(); match &inst.data { - InstData::ImplDecl { methods, .. } => { + InstData::ImplDecl { + methods_start, + methods_len, + .. + } => { + let methods = rir.get_inst_refs(*methods_start, *methods_len); let method_inst = rir.get(methods[0]); match &method_inst.data { InstData::FnDecl { name, - params, + params_start, + params_len, has_self, .. } => { assert_eq!(interner.get(*name), "add"); assert!(*has_self); // params should contain 'amount', not 'self' + let params = rir.get_params(*params_start, *params_len); assert_eq!(params.len(), 1); assert_eq!(interner.get(params[0].name), "amount"); } diff --git a/crates/rue-rir/src/inst.rs b/crates/rue-rir/src/inst.rs index e59c9d005..ca8c466e3 100644 --- a/crates/rue-rir/src/inst.rs +++ b/crates/rue-rir/src/inst.rs @@ -128,6 +128,53 @@ impl RirPattern { } } +/// Extra data marker types for type-safe storage in the extra array. +/// These types represent data stored in the extra array. + +/// Stored representation of RirCallArg in the extra array. +/// Layout: [value: u32, mode: u32] = 2 u32s per arg +const CALL_ARG_SIZE: u32 = 2; + +/// Stored representation of RirParam in the extra array. +/// Layout: [name: u32, ty: u32, mode: u32] = 3 u32s per param +const PARAM_SIZE: u32 = 3; + +/// Stored representation of match arm in the extra array. +/// Layout: pattern data + [body: u32] +/// Pattern data varies by kind (see PatternKind enum). + +/// Pattern kinds encoded in extra array +#[repr(u32)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PatternKind { + /// Wildcard pattern: [kind, span_start, span_len] + Wildcard = 0, + /// Int pattern: [kind, span_start, span_len, value_lo, value_hi] + Int = 1, + /// Bool pattern: [kind, span_start, span_len, value] + Bool = 2, + /// Path pattern: [kind, span_start, span_len, type_name, variant] + Path = 3, +} + +/// Size of each pattern kind in the extra array (including body InstRef) +const PATTERN_WILDCARD_SIZE: u32 = 4; // kind, span_start, span_len, body +const PATTERN_INT_SIZE: u32 = 6; // kind, span_start, span_len, value_lo, value_hi, body +const PATTERN_BOOL_SIZE: u32 = 5; // kind, span_start, span_len, value, body +const PATTERN_PATH_SIZE: u32 = 6; // kind, span_start, span_len, type_name, variant, body + +/// Stored representation of struct field initializer. +/// Layout: [field_name: u32, value: u32] = 2 u32s per field +const FIELD_INIT_SIZE: u32 = 2; + +/// Stored representation of struct field declaration. +/// Layout: [field_name: u32, field_type: u32] = 2 u32s per field +const FIELD_DECL_SIZE: u32 = 2; + +/// Stored representation of directive in the extra array. +/// Layout: [name: u32, span_start: u32, span_len: u32, args_len: u32, args...] +/// Variable size due to args. + /// The complete RIR for a source file. #[derive(Debug, Default)] pub struct Rir { @@ -216,6 +263,287 @@ impl Rir { let end = start + len as usize; &self.extra[start..end] } + + // ===== Helper methods for storing/retrieving typed data in the extra array ===== + + /// Store a slice of InstRefs and return (start, len). + pub fn add_inst_refs(&mut self, refs: &[InstRef]) -> (u32, u32) { + let data: Vec = refs.iter().map(|r| r.as_u32()).collect(); + let start = self.add_extra(&data); + (start, refs.len() as u32) + } + + /// Retrieve InstRefs from the extra array. + pub fn get_inst_refs(&self, start: u32, len: u32) -> Vec { + self.get_extra(start, len) + .iter() + .map(|&v| InstRef::from_raw(v)) + .collect() + } + + /// Store a slice of Symbols and return (start, len). + pub fn add_symbols(&mut self, symbols: &[Symbol]) -> (u32, u32) { + let data: Vec = symbols.iter().map(|s| s.as_u32()).collect(); + let start = self.add_extra(&data); + (start, symbols.len() as u32) + } + + /// Retrieve Symbols from the extra array. + pub fn get_symbols(&self, start: u32, len: u32) -> Vec { + self.get_extra(start, len) + .iter() + .map(|&v| Symbol::from_raw(v)) + .collect() + } + + /// Store RirCallArgs and return (start, len). + /// Layout: [value: u32, mode: u32] per arg + pub fn add_call_args(&mut self, args: &[RirCallArg]) -> (u32, u32) { + let mut data = Vec::with_capacity(args.len() * CALL_ARG_SIZE as usize); + for arg in args { + data.push(arg.value.as_u32()); + data.push(arg.mode as u32); + } + let start = self.add_extra(&data); + (start, args.len() as u32) + } + + /// Retrieve RirCallArgs from the extra array. + pub fn get_call_args(&self, start: u32, len: u32) -> Vec { + let data = self.get_extra(start, len * CALL_ARG_SIZE); + let mut args = Vec::with_capacity(len as usize); + for chunk in data.chunks(CALL_ARG_SIZE as usize) { + let value = InstRef::from_raw(chunk[0]); + let mode = match chunk[1] { + 0 => RirArgMode::Normal, + 1 => RirArgMode::Inout, + 2 => RirArgMode::Borrow, + _ => RirArgMode::Normal, // Fallback, shouldn't happen + }; + args.push(RirCallArg { value, mode }); + } + args + } + + /// Store RirParams and return (start, len). + /// Layout: [name: u32, ty: u32, mode: u32] per param + pub fn add_params(&mut self, params: &[RirParam]) -> (u32, u32) { + let mut data = Vec::with_capacity(params.len() * PARAM_SIZE as usize); + for param in params { + data.push(param.name.as_u32()); + data.push(param.ty.as_u32()); + data.push(param.mode as u32); + } + let start = self.add_extra(&data); + (start, params.len() as u32) + } + + /// Retrieve RirParams from the extra array. + pub fn get_params(&self, start: u32, len: u32) -> Vec { + let data = self.get_extra(start, len * PARAM_SIZE); + let mut params = Vec::with_capacity(len as usize); + for chunk in data.chunks(PARAM_SIZE as usize) { + let name = Symbol::from_raw(chunk[0]); + let ty = Symbol::from_raw(chunk[1]); + let mode = match chunk[2] { + 0 => RirParamMode::Normal, + 1 => RirParamMode::Inout, + 2 => RirParamMode::Borrow, + _ => RirParamMode::Normal, // Fallback + }; + params.push(RirParam { name, ty, mode }); + } + params + } + + /// Store match arms (pattern + body pairs) and return (start, arm_count). + /// Each arm is stored with variable size depending on pattern kind. + pub fn add_match_arms(&mut self, arms: &[(RirPattern, InstRef)]) -> (u32, u32) { + let start = self.extra.len() as u32; + for (pattern, body) in arms { + match pattern { + RirPattern::Wildcard(span) => { + self.extra.push(PatternKind::Wildcard as u32); + self.extra.push(span.start()); + self.extra.push(span.len()); + self.extra.push(body.as_u32()); + } + RirPattern::Int(value, span) => { + self.extra.push(PatternKind::Int as u32); + self.extra.push(span.start()); + self.extra.push(span.len()); + // Store i64 as two u32s (little-endian) + self.extra.push(*value as u32); + self.extra.push((*value >> 32) as u32); + self.extra.push(body.as_u32()); + } + RirPattern::Bool(value, span) => { + self.extra.push(PatternKind::Bool as u32); + self.extra.push(span.start()); + self.extra.push(span.len()); + self.extra.push(if *value { 1 } else { 0 }); + self.extra.push(body.as_u32()); + } + RirPattern::Path { + type_name, + variant, + span, + } => { + self.extra.push(PatternKind::Path as u32); + self.extra.push(span.start()); + self.extra.push(span.len()); + self.extra.push(type_name.as_u32()); + self.extra.push(variant.as_u32()); + self.extra.push(body.as_u32()); + } + } + } + (start, arms.len() as u32) + } + + /// Retrieve match arms from the extra array. + pub fn get_match_arms(&self, start: u32, arm_count: u32) -> Vec<(RirPattern, InstRef)> { + let mut arms = Vec::with_capacity(arm_count as usize); + let mut pos = start as usize; + + for _ in 0..arm_count { + let kind = self.extra[pos]; + match kind { + k if k == PatternKind::Wildcard as u32 => { + let span_start = self.extra[pos + 1]; + let span_len = self.extra[pos + 2]; + let span = Span::new(span_start, span_start + span_len); + let body = InstRef::from_raw(self.extra[pos + 3]); + arms.push((RirPattern::Wildcard(span), body)); + pos += PATTERN_WILDCARD_SIZE as usize; + } + k if k == PatternKind::Int as u32 => { + let span_start = self.extra[pos + 1]; + let span_len = self.extra[pos + 2]; + let span = Span::new(span_start, span_start + span_len); + let value_lo = self.extra[pos + 3] as i64; + let value_hi = self.extra[pos + 4] as i64; + let value = value_lo | (value_hi << 32); + let body = InstRef::from_raw(self.extra[pos + 5]); + arms.push((RirPattern::Int(value, span), body)); + pos += PATTERN_INT_SIZE as usize; + } + k if k == PatternKind::Bool as u32 => { + let span_start = self.extra[pos + 1]; + let span_len = self.extra[pos + 2]; + let span = Span::new(span_start, span_start + span_len); + let value = self.extra[pos + 3] != 0; + let body = InstRef::from_raw(self.extra[pos + 4]); + arms.push((RirPattern::Bool(value, span), body)); + pos += PATTERN_BOOL_SIZE as usize; + } + k if k == PatternKind::Path as u32 => { + let span_start = self.extra[pos + 1]; + let span_len = self.extra[pos + 2]; + let span = Span::new(span_start, span_start + span_len); + let type_name = Symbol::from_raw(self.extra[pos + 3]); + let variant = Symbol::from_raw(self.extra[pos + 4]); + let body = InstRef::from_raw(self.extra[pos + 5]); + arms.push(( + RirPattern::Path { + type_name, + variant, + span, + }, + body, + )); + pos += PATTERN_PATH_SIZE as usize; + } + _ => panic!("Unknown pattern kind: {}", kind), + } + } + arms + } + + /// Store field initializers (name, value) and return (start, len). + /// Layout: [name: u32, value: u32] per field + pub fn add_field_inits(&mut self, fields: &[(Symbol, InstRef)]) -> (u32, u32) { + let mut data = Vec::with_capacity(fields.len() * FIELD_INIT_SIZE as usize); + for (name, value) in fields { + data.push(name.as_u32()); + data.push(value.as_u32()); + } + let start = self.add_extra(&data); + (start, fields.len() as u32) + } + + /// Retrieve field initializers from the extra array. + pub fn get_field_inits(&self, start: u32, len: u32) -> Vec<(Symbol, InstRef)> { + let data = self.get_extra(start, len * FIELD_INIT_SIZE); + let mut fields = Vec::with_capacity(len as usize); + for chunk in data.chunks(FIELD_INIT_SIZE as usize) { + let name = Symbol::from_raw(chunk[0]); + let value = InstRef::from_raw(chunk[1]); + fields.push((name, value)); + } + fields + } + + /// Store field declarations (name, type) and return (start, len). + /// Layout: [name: u32, type: u32] per field + pub fn add_field_decls(&mut self, fields: &[(Symbol, Symbol)]) -> (u32, u32) { + let mut data = Vec::with_capacity(fields.len() * FIELD_DECL_SIZE as usize); + for (name, ty) in fields { + data.push(name.as_u32()); + data.push(ty.as_u32()); + } + let start = self.add_extra(&data); + (start, fields.len() as u32) + } + + /// Retrieve field declarations from the extra array. + pub fn get_field_decls(&self, start: u32, len: u32) -> Vec<(Symbol, Symbol)> { + let data = self.get_extra(start, len * FIELD_DECL_SIZE); + let mut fields = Vec::with_capacity(len as usize); + for chunk in data.chunks(FIELD_DECL_SIZE as usize) { + let name = Symbol::from_raw(chunk[0]); + let ty = Symbol::from_raw(chunk[1]); + fields.push((name, ty)); + } + fields + } + + /// Store directives and return (start, directive_count). + /// Layout: [name: u32, span_start: u32, span_len: u32, args_len: u32, args...] per directive + pub fn add_directives(&mut self, directives: &[RirDirective]) -> (u32, u32) { + let start = self.extra.len() as u32; + for directive in directives { + self.extra.push(directive.name.as_u32()); + self.extra.push(directive.span.start()); + self.extra.push(directive.span.len()); + self.extra.push(directive.args.len() as u32); + for arg in &directive.args { + self.extra.push(arg.as_u32()); + } + } + (start, directives.len() as u32) + } + + /// Retrieve directives from the extra array. + pub fn get_directives(&self, start: u32, directive_count: u32) -> Vec { + let mut directives = Vec::with_capacity(directive_count as usize); + let mut pos = start as usize; + + for _ in 0..directive_count { + let name = Symbol::from_raw(self.extra[pos]); + let span = Span::new(self.extra[pos + 1], self.extra[pos + 2]); + let args_len = self.extra[pos + 3] as usize; + pos += 4; + + let args: Vec = (0..args_len) + .map(|i| Symbol::from_raw(self.extra[pos + i])) + .collect(); + pos += args_len; + + directives.push(RirDirective { name, args, span }); + } + directives + } } /// A single RIR instruction. @@ -307,11 +635,14 @@ pub enum InstData { InfiniteLoop { body: InstRef }, /// Match expression: match scrutinee { pattern => expr, ... } + /// Arms are stored in the extra array using add_match_arms/get_match_arms. Match { /// The value being matched scrutinee: InstRef, - /// Match arms: [(pattern, body), ...] - arms: Vec<(RirPattern, InstRef)>, + /// Index into extra data where arms start + arms_start: u32, + /// Number of match arms + arms_len: u32, }, /// Break: exits the innermost loop @@ -322,12 +653,17 @@ pub enum InstData { /// Function definition /// Contains: name symbol, parameters, return type symbol, body instruction ref + /// Directives and params are stored in the extra array. FnDecl { - /// Directives applied to this function - directives: Vec, + /// Index into extra data where directives start + directives_start: u32, + /// Number of directives + directives_len: u32, name: Symbol, - /// Parameters with names, types, and modes - params: Vec, + /// Index into extra data where params start + params_start: u32, + /// Number of parameters + params_len: u32, return_type: Symbol, body: InstRef, /// Whether this function/method takes `self` as a receiver. @@ -337,19 +673,25 @@ pub enum InstData { }, /// Function call + /// Args are stored in the extra array using add_call_args/get_call_args. Call { /// Function name name: Symbol, - /// Arguments with optional inout flags - args: Vec, + /// Index into extra data where args start + args_start: u32, + /// Number of arguments + args_len: u32, }, /// Intrinsic call with expression arguments (e.g., @dbg) + /// Args are stored in the extra array using add_inst_refs/get_inst_refs. Intrinsic { /// Intrinsic name (without @) name: Symbol, - /// Argument instruction refs - args: Vec, + /// Index into extra data where args start + args_start: u32, + /// Number of arguments + args_len: u32, }, /// Intrinsic call with a type argument (e.g., @size_of, @align_of) @@ -383,9 +725,12 @@ pub enum InstData { // Variable operations /// Local variable declaration: allocates storage and initializes /// If name is None, this is a wildcard pattern that discards the value + /// Directives are stored in the extra array using add_directives/get_directives. Alloc { - /// Directives applied to this let binding - directives: Vec, + /// Index into extra data where directives start + directives_start: u32, + /// Number of directives + directives_len: u32, /// Variable name (None for wildcard `_` pattern that discards the value) name: Option, /// Whether the variable is mutable @@ -412,21 +757,29 @@ pub enum InstData { // Struct operations /// Struct type declaration + /// Directives and fields are stored in the extra array. StructDecl { - /// Directives applied to the struct (e.g., @copy) - directives: Vec, + /// Index into extra data where directives start + directives_start: u32, + /// Number of directives + directives_len: u32, /// Struct name name: Symbol, - /// Fields: [(field_name, field_type), ...] - fields: Vec<(Symbol, Symbol)>, + /// Index into extra data where fields start + fields_start: u32, + /// Number of fields + fields_len: u32, }, /// Struct literal: creates a new struct instance + /// Fields are stored in the extra array using add_field_inits/get_field_inits. StructInit { /// Struct type name type_name: Symbol, - /// Field initializers: [(field_name, value_inst), ...] - fields: Vec<(Symbol, InstRef)>, + /// Index into extra data where fields start + fields_start: u32, + /// Number of fields + fields_len: u32, }, /// Field access: reads a field from a struct @@ -449,11 +802,14 @@ pub enum InstData { // Enum operations /// Enum type declaration + /// Variants are stored in the extra array using add_symbols/get_symbols. EnumDecl { /// Enum name name: Symbol, - /// Variant names (no data for now) - variants: Vec, + /// Index into extra data where variants start + variants_start: u32, + /// Number of variants + variants_len: u32, }, /// Enum variant: creates a value of an enum type @@ -466,9 +822,12 @@ pub enum InstData { // Array operations /// Array literal: creates a new array from element values + /// Elements are stored in the extra array using add_inst_refs/get_inst_refs. ArrayInit { - /// Element values - elements: Vec, + /// Index into extra data where elements start + elems_start: u32, + /// Number of elements + elems_len: u32, }, /// Array index read: reads an element from an array @@ -491,31 +850,40 @@ pub enum InstData { // Method operations /// Impl block declaration + /// Methods are stored in the extra array using add_inst_refs/get_inst_refs. ImplDecl { /// Type name this impl block is for type_name: Symbol, - /// Methods defined in this impl block (references to FnDecl instructions) - methods: Vec, + /// Index into extra data where method refs start + methods_start: u32, + /// Number of methods + methods_len: u32, }, /// Method call: receiver.method(args) + /// Args are stored in the extra array using add_call_args/get_call_args. MethodCall { /// Receiver expression (the struct value) receiver: InstRef, /// Method name method: Symbol, - /// Arguments with optional inout flags - args: Vec, + /// Index into extra data where args start + args_start: u32, + /// Number of arguments + args_len: u32, }, /// Associated function call: Type::function(args) + /// Args are stored in the extra array using add_call_args/get_call_args. AssocFnCall { /// Type name (e.g., Point) type_name: Symbol, /// Function name (e.g., origin) function: Symbol, - /// Arguments with optional inout flags - args: Vec, + /// Index into extra data where args start + args_start: u32, + /// Number of arguments + args_len: u32, }, /// User-defined destructor declaration: drop fn TypeName(self) { ... } @@ -635,7 +1003,12 @@ impl<'a, 'b> RirPrinter<'a, 'b> { } InstData::Loop { cond, body } => writeln!(out, "loop {}, {}", cond, body).unwrap(), InstData::InfiniteLoop { body } => writeln!(out, "infinite_loop {}", body).unwrap(), - InstData::Match { scrutinee, arms } => { + InstData::Match { + scrutinee, + arms_start, + arms_len, + } => { + let arms = self.rir.get_match_arms(*arms_start, *arms_len); let arms_str: Vec = arms .iter() .map(|(pat, body)| format!("{} => {}", self.format_pattern(pat), body)) @@ -647,9 +1020,11 @@ impl<'a, 'b> RirPrinter<'a, 'b> { // Functions InstData::FnDecl { - directives: _, + directives_start: _, + directives_len: _, name, - params, + params_start, + params_len, return_type, body, has_self, @@ -657,6 +1032,7 @@ impl<'a, 'b> RirPrinter<'a, 'b> { let name_str = self.interner.get(*name); let ret_str = self.interner.get(*return_type); let self_str = if *has_self { "self, " } else { "" }; + let params = self.rir.get_params(*params_start, *params_len); let params_str: Vec = params .iter() .map(|p| { @@ -692,12 +1068,22 @@ impl<'a, 'b> RirPrinter<'a, 'b> { writeln!(out, "ret").unwrap(); } } - InstData::Call { name, args } => { + InstData::Call { + name, + args_start, + args_len, + } => { let name_str = self.interner.get(*name); - writeln!(out, "call {}({})", name_str, Self::format_call_args(args)).unwrap(); + let args = self.rir.get_call_args(*args_start, *args_len); + writeln!(out, "call {}({})", name_str, Self::format_call_args(&args)).unwrap(); } - InstData::Intrinsic { name, args } => { + InstData::Intrinsic { + name, + args_start, + args_len, + } => { let name_str = self.interner.get(*name); + let args = self.rir.get_inst_refs(*args_start, *args_len); let args_str: Vec = args.iter().map(|a| format!("{}", a)).collect(); writeln!(out, "intrinsic @{}({})", name_str, args_str.join(", ")).unwrap(); } @@ -715,7 +1101,8 @@ impl<'a, 'b> RirPrinter<'a, 'b> { // Variables InstData::Alloc { - directives: _, + directives_start: _, + directives_len: _, name, is_mut, ty, @@ -739,11 +1126,14 @@ impl<'a, 'b> RirPrinter<'a, 'b> { // Structs InstData::StructDecl { - directives, + directives_start, + directives_len, name, - fields, + fields_start, + fields_len, } => { let name_str = self.interner.get(*name); + let fields = self.rir.get_field_decls(*fields_start, *fields_len); let fields_str: Vec = fields .iter() .map(|(fname, ftype)| { @@ -754,6 +1144,7 @@ impl<'a, 'b> RirPrinter<'a, 'b> { ) }) .collect(); + let directives = self.rir.get_directives(*directives_start, *directives_len); let directives_str = if directives.is_empty() { String::new() } else { @@ -772,8 +1163,13 @@ impl<'a, 'b> RirPrinter<'a, 'b> { ) .unwrap(); } - InstData::StructInit { type_name, fields } => { + InstData::StructInit { + type_name, + fields_start, + fields_len, + } => { let type_str = self.interner.get(*type_name); + let fields = self.rir.get_field_inits(*fields_start, *fields_len); let fields_str: Vec = fields .iter() .map(|(fname, value)| format!("{}: {}", self.interner.get(*fname), value)) @@ -801,8 +1197,13 @@ impl<'a, 'b> RirPrinter<'a, 'b> { } // Enums - InstData::EnumDecl { name, variants } => { + InstData::EnumDecl { + name, + variants_start, + variants_len, + } => { let name_str = self.interner.get(*name); + let variants = self.rir.get_symbols(*variants_start, *variants_len); let variants_str: Vec = variants .iter() .map(|v| self.interner.get(*v).to_string()) @@ -820,7 +1221,11 @@ impl<'a, 'b> RirPrinter<'a, 'b> { } // Arrays - InstData::ArrayInit { elements } => { + InstData::ArrayInit { + elems_start, + elems_len, + } => { + let elements = self.rir.get_inst_refs(*elems_start, *elems_len); let elems_str: Vec = elements.iter().map(|e| format!("{}", e)).collect(); writeln!(out, "array_init [{}]", elems_str.join(", ")).unwrap(); @@ -833,8 +1238,13 @@ impl<'a, 'b> RirPrinter<'a, 'b> { } // Methods - InstData::ImplDecl { type_name, methods } => { + InstData::ImplDecl { + type_name, + methods_start, + methods_len, + } => { let type_str = self.interner.get(*type_name); + let methods = self.rir.get_inst_refs(*methods_start, *methods_len); let methods_str: Vec = methods.iter().map(|m| format!("{}", m)).collect(); writeln!(out, "impl {} {{ {} }}", type_str, methods_str.join(", ")).unwrap(); @@ -842,28 +1252,32 @@ impl<'a, 'b> RirPrinter<'a, 'b> { InstData::MethodCall { receiver, method, - args, + args_start, + args_len, } => { + let args = self.rir.get_call_args(*args_start, *args_len); writeln!( out, "method_call {}.{}({})", receiver, self.interner.get(*method), - Self::format_call_args(args) + Self::format_call_args(&args) ) .unwrap(); } InstData::AssocFnCall { type_name, function, - args, + args_start, + args_len, } => { + let args = self.rir.get_call_args(*args_start, *args_len); writeln!( out, "assoc_fn_call {}::{}({})", self.interner.get(*type_name), self.interner.get(*function), - Self::format_call_args(args) + Self::format_call_args(&args) ) .unwrap(); } @@ -1355,15 +1769,20 @@ mod tests { let param_name = interner.intern("x"); let param_type = interner.intern("i32"); + let (directives_start, directives_len) = rir.add_directives(&[]); + let (params_start, params_len) = rir.add_params(&[RirParam { + name: param_name, + ty: param_type, + mode: RirParamMode::Normal, + }]); + rir.add_inst(Inst { data: InstData::FnDecl { - directives: vec![], + directives_start, + directives_len, name, - params: vec![RirParam { - name: param_name, - ty: param_type, - mode: RirParamMode::Normal, - }], + params_start, + params_len, return_type, body, has_self: false, @@ -1387,11 +1806,16 @@ mod tests { let name = interner.intern("get_x"); let return_type = interner.intern("i32"); + let (directives_start, directives_len) = rir.add_directives(&[]); + let (params_start, params_len) = rir.add_params(&[]); + rir.add_inst(Inst { data: InstData::FnDecl { - directives: vec![], + directives_start, + directives_len, name, - params: vec![], + params_start, + params_len, return_type, body, has_self: true, @@ -1421,27 +1845,32 @@ mod tests { let param3_name = interner.intern("c"); let param3_type = interner.intern("i32"); + let (directives_start, directives_len) = rir.add_directives(&[]); + let (params_start, params_len) = rir.add_params(&[ + RirParam { + name: param1_name, + ty: param1_type, + mode: RirParamMode::Normal, + }, + RirParam { + name: param2_name, + ty: param2_type, + mode: RirParamMode::Inout, + }, + RirParam { + name: param3_name, + ty: param3_type, + mode: RirParamMode::Borrow, + }, + ]); + rir.add_inst(Inst { data: InstData::FnDecl { - directives: vec![], + directives_start, + directives_len, name, - params: vec![ - RirParam { - name: param1_name, - ty: param1_type, - mode: RirParamMode::Normal, - }, - RirParam { - name: param2_name, - ty: param2_type, - mode: RirParamMode::Inout, - }, - RirParam { - name: param3_name, - ty: param3_type, - mode: RirParamMode::Borrow, - }, - ], + params_start, + params_len, return_type, body, has_self: false, @@ -1466,13 +1895,16 @@ mod tests { let name = interner.intern("foo"); + let (args_start, args_len) = rir.add_call_args(&[RirCallArg { + value: arg, + mode: RirArgMode::Normal, + }]); + rir.add_inst(Inst { data: InstData::Call { name, - args: vec![RirCallArg { - value: arg, - mode: RirArgMode::Normal, - }], + args_start, + args_len, }, span: Span::new(0, 8), }); @@ -1500,23 +1932,26 @@ mod tests { let name = interner.intern("modify"); + let (args_start, args_len) = rir.add_call_args(&[ + RirCallArg { + value: arg1, + mode: RirArgMode::Normal, + }, + RirCallArg { + value: arg2, + mode: RirArgMode::Inout, + }, + RirCallArg { + value: arg3, + mode: RirArgMode::Borrow, + }, + ]); + rir.add_inst(Inst { data: InstData::Call { name, - args: vec![ - RirCallArg { - value: arg1, - mode: RirArgMode::Normal, - }, - RirCallArg { - value: arg2, - mode: RirArgMode::Inout, - }, - RirCallArg { - value: arg3, - mode: RirArgMode::Borrow, - }, - ], + args_start, + args_len, }, span: Span::new(0, 20), }); @@ -1536,10 +1971,16 @@ mod tests { let name = interner.intern("dbg"); + let (args_start, args_len) = rir.add_call_args(&[RirCallArg { + value: arg, + mode: RirArgMode::Normal, + }]); + rir.add_inst(Inst { data: InstData::Intrinsic { name, - args: vec![arg], + args_start, + args_len, }, span: Span::new(0, 10), }); @@ -1607,10 +2048,13 @@ mod tests { let name = interner.intern("x"); let ty = interner.intern("i32"); + let (directives_start, directives_len) = rir.add_directives(&[]); + // Normal alloc with type rir.add_inst(Inst { data: InstData::Alloc { - directives: vec![], + directives_start, + directives_len, name: Some(name), is_mut: false, ty: Some(ty), @@ -1634,9 +2078,12 @@ mod tests { let name = interner.intern("x"); + let (directives_start, directives_len) = rir.add_directives(&[]); + rir.add_inst(Inst { data: InstData::Alloc { - directives: vec![], + directives_start, + directives_len, name: Some(name), is_mut: true, ty: None, @@ -1658,9 +2105,12 @@ mod tests { span: Span::new(0, 2), }); + let (directives_start, directives_len) = rir.add_directives(&[]); + rir.add_inst(Inst { data: InstData::Alloc { - directives: vec![], + directives_start, + directives_len, name: None, is_mut: false, ty: None, @@ -1717,11 +2167,17 @@ mod tests { let y_name = interner.intern("y"); let i32_type = interner.intern("i32"); + let (directives_start, directives_len) = rir.add_directives(&[]); + let (fields_start, fields_len) = + rir.add_field_decls(&[(x_name, i32_type), (y_name, i32_type)]); + rir.add_inst(Inst { data: InstData::StructDecl { - directives: vec![], + directives_start, + directives_len, name, - fields: vec![(x_name, i32_type), (y_name, i32_type)], + fields_start, + fields_len, }, span: Span::new(0, 30), }); @@ -1739,15 +2195,20 @@ mod tests { let i32_type = interner.intern("i32"); let copy_name = interner.intern("copy"); + let (directives_start, directives_len) = rir.add_directives(&[RirDirective { + name: copy_name, + args: vec![], + span: Span::new(0, 5), + }]); + let (fields_start, fields_len) = rir.add_field_decls(&[(x_name, i32_type)]); + rir.add_inst(Inst { data: InstData::StructDecl { - directives: vec![RirDirective { - name: copy_name, - args: vec![], - span: Span::new(0, 5), - }], + directives_start, + directives_len, name, - fields: vec![(x_name, i32_type)], + fields_start, + fields_len, }, span: Span::new(0, 30), }); @@ -1773,10 +2234,13 @@ mod tests { let x_name = interner.intern("x"); let y_name = interner.intern("y"); + let (fields_start, fields_len) = rir.add_field_inits(&[(x_name, x_val), (y_name, y_val)]); + rir.add_inst(Inst { data: InstData::StructInit { type_name, - fields: vec![(x_name, x_val), (y_name, y_val)], + fields_start, + fields_len, }, span: Span::new(0, 25), }); @@ -1838,10 +2302,13 @@ mod tests { let green = interner.intern("Green"); let blue = interner.intern("Blue"); + let (variants_start, variants_len) = rir.add_symbols(&[red, green, blue]); + rir.add_inst(Inst { data: InstData::EnumDecl { name, - variants: vec![red, green, blue], + variants_start, + variants_len, }, span: Span::new(0, 35), }); @@ -1883,9 +2350,12 @@ mod tests { span: Span::new(0, 1), }); + let (elems_start, elems_len) = rir.add_inst_refs(&[elem1, elem2, elem3]); + rir.add_inst(Inst { data: InstData::ArrayInit { - elements: vec![elem1, elem2, elem3], + elems_start, + elems_len, }, span: Span::new(0, 10), }); @@ -1956,11 +2426,16 @@ mod tests { let method_name = interner.intern("get_x"); let return_type = interner.intern("i32"); + let (directives_start, directives_len) = rir.add_directives(&[]); + let (params_start, params_len) = rir.add_params(&[]); + let method_ref = rir.add_inst(Inst { data: InstData::FnDecl { - directives: vec![], + directives_start, + directives_len, name: method_name, - params: vec![], + params_start, + params_len, return_type, body: method_body, has_self: true, @@ -1970,10 +2445,13 @@ mod tests { let type_name = interner.intern("Point"); + let (methods_start, methods_len) = rir.add_inst_refs(&[method_ref]); + rir.add_inst(Inst { data: InstData::ImplDecl { type_name, - methods: vec![method_ref], + methods_start, + methods_len, }, span: Span::new(0, 50), }); @@ -1997,14 +2475,17 @@ mod tests { let method = interner.intern("add"); + let (args_start, args_len) = rir.add_call_args(&[RirCallArg { + value: arg, + mode: RirArgMode::Normal, + }]); + rir.add_inst(Inst { data: InstData::MethodCall { receiver, method, - args: vec![RirCallArg { - value: arg, - mode: RirArgMode::Normal, - }], + args_start, + args_len, }, span: Span::new(0, 15), }); @@ -2032,20 +2513,23 @@ mod tests { let method = interner.intern("modify"); + let (args_start, args_len) = rir.add_call_args(&[ + RirCallArg { + value: arg1, + mode: RirArgMode::Inout, + }, + RirCallArg { + value: arg2, + mode: RirArgMode::Borrow, + }, + ]); + rir.add_inst(Inst { data: InstData::MethodCall { receiver, method, - args: vec![ - RirCallArg { - value: arg1, - mode: RirArgMode::Inout, - }, - RirCallArg { - value: arg2, - mode: RirArgMode::Borrow, - }, - ], + args_start, + args_len, }, span: Span::new(0, 25), }); @@ -2062,11 +2546,14 @@ mod tests { let type_name = interner.intern("Point"); let function = interner.intern("origin"); + let (args_start, args_len) = rir.add_call_args(&[]); + rir.add_inst(Inst { data: InstData::AssocFnCall { type_name, function, - args: vec![], + args_start, + args_len, }, span: Span::new(0, 15), }); @@ -2091,20 +2578,23 @@ mod tests { let type_name = interner.intern("Point"); let function = interner.intern("new"); + let (args_start, args_len) = rir.add_call_args(&[ + RirCallArg { + value: arg1, + mode: RirArgMode::Normal, + }, + RirCallArg { + value: arg2, + mode: RirArgMode::Normal, + }, + ]); + rir.add_inst(Inst { data: InstData::AssocFnCall { type_name, function, - args: vec![ - RirCallArg { - value: arg1, - mode: RirArgMode::Normal, - }, - RirCallArg { - value: arg2, - mode: RirArgMode::Normal, - }, - ], + args_start, + args_len, }, span: Span::new(0, 20), }); @@ -2147,10 +2637,14 @@ mod tests { span: Span::new(0, 1), }); + let (arms_start, arms_len) = + rir.add_match_arms(&[(RirPattern::Wildcard(Span::new(0, 1)), body)]); + rir.add_inst(Inst { data: InstData::Match { scrutinee, - arms: vec![(RirPattern::Wildcard(Span::new(0, 1)), body)], + arms_start, + arms_len, }, span: Span::new(0, 20), }); @@ -2180,14 +2674,17 @@ mod tests { span: Span::new(0, 1), }); + let (arms_start, arms_len) = rir.add_match_arms(&[ + (RirPattern::Int(1, Span::new(0, 1)), body1), + (RirPattern::Int(-5, Span::new(0, 2)), body2), + (RirPattern::Wildcard(Span::new(0, 1)), body_default), + ]); + rir.add_inst(Inst { data: InstData::Match { scrutinee, - arms: vec![ - (RirPattern::Int(1, Span::new(0, 1)), body1), - (RirPattern::Int(-5, Span::new(0, 2)), body2), - (RirPattern::Wildcard(Span::new(0, 1)), body_default), - ], + arms_start, + arms_len, }, span: Span::new(0, 30), }); @@ -2213,13 +2710,16 @@ mod tests { span: Span::new(0, 1), }); + let (arms_start, arms_len) = rir.add_match_arms(&[ + (RirPattern::Bool(true, Span::new(0, 4)), body_true), + (RirPattern::Bool(false, Span::new(0, 5)), body_false), + ]); + rir.add_inst(Inst { data: InstData::Match { scrutinee, - arms: vec![ - (RirPattern::Bool(true, Span::new(0, 4)), body_true), - (RirPattern::Bool(false, Span::new(0, 5)), body_false), - ], + arms_start, + arms_len, }, span: Span::new(0, 30), }); @@ -2253,28 +2753,31 @@ mod tests { let red = interner.intern("Red"); let green = interner.intern("Green"); + let (arms_start, arms_len) = rir.add_match_arms(&[ + ( + RirPattern::Path { + type_name: color, + variant: red, + span: Span::new(0, 10), + }, + body_red, + ), + ( + RirPattern::Path { + type_name: color, + variant: green, + span: Span::new(0, 12), + }, + body_green, + ), + (RirPattern::Wildcard(Span::new(0, 1)), body_default), + ]); + rir.add_inst(Inst { data: InstData::Match { scrutinee, - arms: vec![ - ( - RirPattern::Path { - type_name: color, - variant: red, - span: Span::new(0, 10), - }, - body_red, - ), - ( - RirPattern::Path { - type_name: color, - variant: green, - span: Span::new(0, 12), - }, - body_green, - ), - (RirPattern::Wildcard(Span::new(0, 1)), body_default), - ], + arms_start, + arms_len, }, span: Span::new(0, 50), }); diff --git a/crates/rue-span/src/lib.rs b/crates/rue-span/src/lib.rs index e510aa15f..dce67b653 100644 --- a/crates/rue-span/src/lib.rs +++ b/crates/rue-span/src/lib.rs @@ -100,6 +100,12 @@ impl Span { } } + /// Get the start byte offset. + #[inline] + pub const fn start(&self) -> u32 { + self.start + } + /// The length of this span in bytes. #[inline] pub const fn len(&self) -> u32 {