diff --git a/crates/core/src/cfg_ir/mod.rs b/crates/core/src/cfg_ir/mod.rs index 580e61d..cb86008 100644 --- a/crates/core/src/cfg_ir/mod.rs +++ b/crates/core/src/cfg_ir/mod.rs @@ -751,6 +751,216 @@ impl CfgIrBundle { Ok(()) } + /// Remap return-address PUSH instructions in Solidity internal function call patterns. + /// + /// After `reindex_pcs` shifts PCs, `write_symbolic_immediates` and `patch_jump_immediates` + /// update PUSH values that feed directly into JUMP/JUMPI. However, return addresses pushed + /// earlier in a block (the `PUSH ret_addr` in `PUSH ret_addr; PUSH func_entry; JUMP`) are + /// not part of any recognized jump pattern and become stale. This pass finds those specific + /// return-address PUSHes and remaps them. + pub fn remap_orphan_jump_pushes( + &mut self, + pc_mapping: &HashMap, + old_runtime_bounds: Option<(usize, usize)>, + ) -> Result<(), Error> { + let old_runtime_start = old_runtime_bounds.map(|(s, _)| s); + let new_runtime_start = self.runtime_bounds.map(|(s, _)| s); + + // Build set of old JUMPDEST PCs so we can verify candidates are real jump targets. + let inverse: HashMap = + pc_mapping.iter().map(|(&old, &new)| (new, old)).collect(); + let mut old_jumpdest_pcs: HashSet = HashSet::new(); + for node in self.cfg.node_indices() { + if let Some(Block::Body(body)) = self.cfg.node_weight(node) { + for instr in &body.instructions { + if matches!(instr.op, Opcode::JUMPDEST) + && let Some(&old_pc) = inverse.get(&instr.pc) + { + old_jumpdest_pcs.insert(old_pc); + } + } + } + } + + if old_jumpdest_pcs.is_empty() { + return Ok(()); + } + + // First pass: collect (node, instruction_index) pairs that need remapping. + // We look for the internal call pattern: PUSH ret_addr; PUSH func_entry; JUMP + // The return address PUSH is at push_idx - 1 in a Direct pattern. + let nodes: Vec<_> = self.cfg.node_indices().collect(); + let mut edits: Vec<(NodeIndex, usize, usize)> = Vec::new(); // (node, instr_idx, new_value) + + for &node in &nodes { + let Some(Block::Body(body)) = self.cfg.node_weight(node) else { + continue; + }; + + let in_runtime = body.is_runtime(self.runtime_bounds); + + // compute the remapped value for a PUSH instruction + let try_remap = |push_value: usize| -> Option { + let old_pc_abs = if in_runtime { + old_runtime_start.unwrap_or(0).saturating_add(push_value) + } else { + push_value + }; + if !old_jumpdest_pcs.contains(&old_pc_abs) { + return None; + } + let &new_pc_abs = pc_mapping.get(&old_pc_abs)?; + let new_value = if in_runtime { + new_runtime_start + .map(|s| new_pc_abs.saturating_sub(s)) + .unwrap_or(new_pc_abs) + } else { + new_pc_abs + }; + if push_value != new_value { + Some(new_value) + } else { + None + } + }; + + // Find the terminal jump pattern + let pattern = detect_jump_pattern(&body.instructions); + + if let Some(ref pat) = pattern { + // check the PUSH immediately before the jump pattern + let pattern_first_push_idx = match pat { + JumpPattern::Direct { push_idx } => *push_idx, + JumpPattern::SplitAdd { push_a_idx, .. } => *push_a_idx, + JumpPattern::PcRelative { push_idx, .. } => *push_idx, + }; + + if pattern_first_push_idx > 0 { + let ret_idx = pattern_first_push_idx - 1; + let ret_instr = &body.instructions[ret_idx]; + if matches!(ret_instr.op, Opcode::PUSH(_)) + && let Some(imm) = &ret_instr.imm + && let Ok(push_value) = usize::from_str_radix(imm, 16) + && let Some(new_value) = try_remap(push_value) + { + let old_pc_abs = if in_runtime { + old_runtime_start.unwrap_or(0).saturating_add(push_value) + } else { + push_value + }; + let new_pc_abs = pc_mapping.get(&old_pc_abs).copied().unwrap_or(0); + tracing::debug!( + "remap_orphan_jump_pushes: block {} instr {} at pc=0x{:x}: \ + 0x{:x} -> 0x{:x} (abs: 0x{:x} -> 0x{:x})", + node.index(), + ret_idx, + ret_instr.pc, + push_value, + new_value, + old_pc_abs, + new_pc_abs, + ); + edits.push((node, ret_idx, new_value)); + } + } + } + + // Extended scan: for blocks ending with JUMP/JUMPI (regardless of pattern), + // scan ALL PUSH instructions for values matching old JUMPDEST PCs. + // + // Solidity contracts with inheritance (e.g. EscrowERC20 + EscrowBase) emit + // internal function call patterns where the return address PUSH is separated + // from the terminal JUMP by several instructions: + // + // PUSH ret_addr ← return address, not adjacent to JUMP + // DUP3 + // PUSH2 value + // SWAP4 + // PUSH0 + // SSTORE + // PUSH1 slot + // SSTORE + // JUMP ← uses ret_addr still on the stack + // + // The standard path above only checks the PUSH immediately before a recognized + // jump pattern (PUSH+JUMP). This extended scan catches return addresses at + // arbitrary positions within the block. + let last = body.instructions.last(); + let ends_with_jump = last.is_some_and(|i| matches!(i.op, Opcode::JUMP | Opcode::JUMPI)); + if ends_with_jump { + // Determine which indices are already part of the recognized jump pattern + // to avoid double-remapping + let pattern_indices: HashSet = match &pattern { + Some(JumpPattern::Direct { push_idx }) => { + [*push_idx, push_idx.wrapping_sub(1)].into_iter().collect() + } + Some(JumpPattern::SplitAdd { + push_a_idx, + push_b_idx, + }) => [*push_a_idx, *push_b_idx, push_a_idx.wrapping_sub(1)] + .into_iter() + .collect(), + Some(JumpPattern::PcRelative { push_idx, .. }) => { + [*push_idx, push_idx.wrapping_sub(1)].into_iter().collect() + } + None => HashSet::new(), + }; + + for (idx, instr) in body.instructions.iter().enumerate() { + if pattern_indices.contains(&idx) { + continue; + } + if !matches!(instr.op, Opcode::PUSH(_)) { + continue; + } + let Some(imm) = &instr.imm else { + continue; + }; + let Ok(push_value) = usize::from_str_radix(imm, 16) else { + continue; + }; + if let Some(new_value) = try_remap(push_value) { + let old_pc_abs = if in_runtime { + old_runtime_start.unwrap_or(0).saturating_add(push_value) + } else { + push_value + }; + let new_pc_abs = pc_mapping.get(&old_pc_abs).copied().unwrap_or(0); + tracing::debug!( + "remap_orphan_jump_pushes: block {} instr {} at pc=0x{:x}: \ + 0x{:x} -> 0x{:x} (abs: 0x{:x} -> 0x{:x}) [extended scan]", + node.index(), + idx, + instr.pc, + push_value, + new_value, + old_pc_abs, + new_pc_abs, + ); + edits.push((node, idx, new_value)); + } + } + } + } + + // Second pass: apply the edits + let total_remapped = edits.len(); + for (node, instr_idx, new_value) in edits { + if let Some(Block::Body(body)) = self.cfg.node_weight_mut(node) { + apply_immediate(&mut body.instructions[instr_idx], new_value)?; + } + } + + if total_remapped > 0 { + tracing::debug!( + "remap_orphan_jump_pushes: remapped {} internal-call return address PUSHes", + total_remapped + ); + } + + Ok(()) + } + /// Remap all stored metadata that references absolute PCs using the supplied mapping. /// /// This should be called any time a transform invokes `reindex_pcs` directly so that diff --git a/crates/core/src/strip.rs b/crates/core/src/strip.rs index ec26278..d34171f 100644 --- a/crates/core/src/strip.rs +++ b/crates/core/src/strip.rs @@ -398,6 +398,115 @@ impl CleanReport { Ok(()) } + /// Patch immutable reference offsets in the init code. + /// + /// The Solidity compiler's init code writes immutable variable values into the + /// runtime bytecode at hardcoded byte offsets. When obfuscation transforms change + /// the runtime layout (e.g., PushSplit growing blocks), these offsets become stale. + /// This method detects the pattern `PUSH2 ; ... ADD` in the init code and + /// updates each offset using the supplied byte-offset remapping closure. + /// + /// The `remap` closure takes an old byte offset within the runtime and returns the + /// new byte offset, or `None` if no mapping is available. + pub fn patch_init_immutable_refs( + &mut self, + remap: &dyn Fn(usize) -> Option, + ) -> Result<(), String> { + let runtime_start = self + .runtime_layout + .iter() + .map(|span| span.offset) + .min() + .ok_or("No runtime layout found")?; + let runtime_end = runtime_start + self.clean_len; + + let init_section = self + .removed + .iter_mut() + .find(|r| matches!(r.kind, SectionKind::Init)) + .ok_or("No Init section found")?; + + let mut init_bytes = init_section.data.to_vec(); + let mut patched = 0usize; + let mut idx = 0usize; + + while idx < init_bytes.len() { + let opcode = init_bytes[idx]; + if !(0x60..=0x7f).contains(&opcode) { + idx += 1; + continue; + } + + let width = (opcode - 0x60 + 1) as usize; + if idx + 1 + width > init_bytes.len() { + idx += 1; + continue; + } + + let mut value = 0usize; + for &byte in &init_bytes[idx + 1..idx + 1 + width] { + value = (value << 8) | byte as usize; + } + + // Check if the next non-stack-manipulation opcode is ADD (0x01). + // The pattern is: PUSH2 ; (DUP/SWAP ops); ADD + let after = idx + 1 + width; + let is_add_target = if after < init_bytes.len() { + init_bytes[after] == 0x01 // ADD immediately follows + } else { + false + }; + + // Only remap values that look like runtime offsets followed by ADD + if is_add_target + && value >= 1 + && value < runtime_end.saturating_sub(runtime_start) + && let Some(new_value) = remap(value) + && new_value != value + { + // Check that new value fits in the same width + let max = if width >= std::mem::size_of::() { + usize::MAX + } else { + (1usize << (width * 8)) - 1 + }; + if new_value > max { + tracing::warn!( + "Immutable ref at init offset 0x{:x}: new value 0x{:x} exceeds \ + PUSH{} capacity", + idx, + new_value, + width + ); + } else { + for j in 0..width { + let shift = (width - 1 - j) * 8; + init_bytes[idx + 1 + j] = ((new_value >> shift) & 0xff) as u8; + } + tracing::debug!( + "Patched immutable ref at init offset 0x{:x}: 0x{:x} -> 0x{:x}", + idx, + value, + new_value + ); + patched += 1; + } + } + + idx += 1 + width; + } + + if patched > 0 { + tracing::debug!( + "Patched {} immutable reference offsets in init code", + patched + ); + init_section.data = Bytes::from(init_bytes); + } + + Ok(()) + } + /// Reassemble bytecode by placing the clean runtime at original offsets /// and filling removed sections with their original data. pub fn reassemble(&mut self, clean: &[u8]) -> Vec { diff --git a/crates/transforms/src/obfuscator.rs b/crates/transforms/src/obfuscator.rs index 870860e..810d9eb 100644 --- a/crates/transforms/src/obfuscator.rs +++ b/crates/transforms/src/obfuscator.rs @@ -329,6 +329,30 @@ pub async fn obfuscate_bytecode( tracing::debug!(" {}", log_entry); } + // Capture old instruction layout before reindexing (needed for immutable ref patching). + // For each runtime instruction, record (old_pc, byte_size) so we can build a byte-level + // displacement map after reindex_pcs remaps instruction PCs. + let old_runtime_start = cfg_ir.runtime_bounds.map(|(s, _)| s).unwrap_or(0); + let old_instr_layout: Vec<(usize, usize)> = { + let mut layout = Vec::new(); + let rt_bounds = cfg_ir.runtime_bounds; + for node_idx in cfg_ir.cfg.node_indices() { + if let cfg_ir::Block::Body(body) = &cfg_ir.cfg[node_idx] { + let in_runtime = match rt_bounds { + Some((start, end)) => body.start_pc >= start && body.start_pc < end, + None => true, + }; + if in_runtime { + for instr in &body.instructions { + layout.push((instr.pc, instr.byte_size())); + } + } + } + } + layout.sort_by_key(|(pc, _)| *pc); + layout + }; + // Step 5: Reindex PCs tracing::debug!(" Reindexing PCs to normalize to 0-based addressing"); let (pc_mapping, old_runtime_bounds) = cfg_ir @@ -342,6 +366,12 @@ pub async fn obfuscate_bytecode( .map_err(|e| ObfuscationError::from_err(e, &cfg_ir.trace))?; tracing::debug!(" Patched jump immediates after PC reindexing"); + // Remap orphan jump-address PUSHes (e.g. return addresses for internal function calls) + // that are not part of any recognized jump pattern. + cfg_ir + .remap_orphan_jump_pushes(&pc_mapping, old_runtime_bounds) + .map_err(|e| ObfuscationError::from_err(e, &cfg_ir.trace))?; + // Re-apply dispatcher jump target patches with OLD controller PCs (before updating) // NOTE: These patches update the PUSH2 instructions (jump targets), not the PUSH4 token instructions if let (Some(controller_pcs), Some(dispatcher_patches)) = ( @@ -581,6 +611,52 @@ pub async fn obfuscate_bytecode( ); } + // Step 7b: Patch immutable reference offsets in init code. + // When transforms grow the runtime (e.g., PushSplit), the init code's hardcoded byte + // offsets for writing immutable variables become stale. Build a byte-level displacement + // map from the old instruction layout and pc_mapping, then patch the init code. + { + let new_runtime_start = cfg_ir.runtime_bounds.map(|(s, _)| s).unwrap_or(0); + // Build byte-level remap: for each byte in the old runtime, compute where it lands + // in the new runtime. We build a sorted list of (old_rel_offset, new_rel_offset) for + // each instruction start, then for any query offset, find the containing instruction + // and compute the intra-instruction delta. + let mut byte_remap_entries: Vec<(usize, usize, usize)> = Vec::new(); // (old_rel, new_rel, size) + for &(old_pc, byte_size) in &old_instr_layout { + if let Some(&new_pc) = pc_mapping.get(&old_pc) { + let old_rel = old_pc.saturating_sub(old_runtime_start); + let new_rel = new_pc.saturating_sub(new_runtime_start); + byte_remap_entries.push((old_rel, new_rel, byte_size)); + } + } + byte_remap_entries.sort_by_key(|(old_rel, _, _)| *old_rel); + + let remap = |old_offset: usize| -> Option { + // Binary search for the instruction containing this byte offset + match byte_remap_entries.binary_search_by_key(&old_offset, |(old_rel, _, _)| *old_rel) { + Ok(i) => { + // Exact match on instruction start + Some(byte_remap_entries[i].1) + } + Err(i) if i > 0 => { + // old_offset falls within the instruction at index i-1 + let (old_rel, new_rel, size) = byte_remap_entries[i - 1]; + let delta = old_offset - old_rel; + if delta < size { + Some(new_rel + delta) + } else { + None + } + } + _ => None, + } + }; + + if let Err(e) = cfg_ir.clean_report.patch_init_immutable_refs(&remap) { + tracing::warn!("Failed to patch init immutable refs: {}", e); + } + } + // Step 8: Reassemble final bytecode (init + runtime with data section + auxdata) let final_bytecode = cfg_ir.clean_report.reassemble(&obfuscated_bytes); let obfuscated_size = final_bytecode.len(); diff --git a/crates/transforms/src/push_split.rs b/crates/transforms/src/push_split.rs index 7aa1ccc..22b4a09 100644 --- a/crates/transforms/src/push_split.rs +++ b/crates/transforms/src/push_split.rs @@ -80,6 +80,14 @@ impl Transform for PushSplit { continue; } + if !matches!(body.control, BlockControl::Terminal) { + debug!( + "PushSplit: skipping non-terminal block at PC 0x{:x}", + body.start_pc + ); + continue; + } + if has_raw_jump_target(body) { debug!( "PushSplit: skipping block with raw jump target at PC 0x{:x}", @@ -262,32 +270,12 @@ enum CombineOp { /// Generate a randomized chain of (push, combine-op) pairs whose reduction yields `value`. fn generate_chain(value: u128, width_bytes: u8, rng: &mut StdRng) -> Vec<(u128, CombineOp)> { let bits = (width_bytes as u32) * 8; - let full_width = bits == 128; - let modulus = (!full_width).then(|| 1u128 << bits); - let mask = modulus.map(|m| m - 1).unwrap_or(u128::MAX); - let sample = |rng: &mut StdRng| -> u128 { - if let Some(m) = modulus { - rng.random_range(0..m) - } else { - rng.random::() - } - }; - - let add_mod = |acc: u128, part: u128| -> u128 { - if let Some(m) = modulus { - (acc + part) % m - } else { - acc.wrapping_add(part) - } - }; - - let sub_mod = |acc: u128, part: u128| -> u128 { - if let Some(m) = modulus { - (acc + m - part) % m - } else { - acc.wrapping_sub(part) - } + let max_value = if bits == 128 { + u128::MAX + } else { + (1u128 << bits) - 1 }; + let sample = |rng: &mut StdRng| -> u128 { rng.random_range(0..=max_value) }; let parts = rng.random_range(2..=4); let prefer_xor = rng.random_bool(0.4); @@ -296,66 +284,44 @@ fn generate_chain(value: u128, width_bytes: u8, rng: &mut StdRng) -> Vec<(u128, let mut acc = 0u128; for i in 0..parts { if i + 1 == parts { - pushes.push(((acc ^ value) & mask, CombineOp::Xor)); + pushes.push((acc ^ value, CombineOp::Xor)); } else { let part = sample(rng); acc ^= part; - pushes.push((part & mask, CombineOp::Xor)); + pushes.push((part, CombineOp::Xor)); } } return pushes; } - // Mixed add/sub chain: (((p1 (+|-) p2) (+|-) p3) ... ) == value mod modulus + // Build add/sub chains without modular wraparound. + // This preserves exact 256-bit EVM semantics for these <=16-byte literals. let mut pushes = Vec::with_capacity(parts); let mut acc = 0u128; - let mut ops: Vec = Vec::with_capacity(parts.saturating_sub(1)); - - for _ in 0..parts.saturating_sub(2) { - ops.push(if rng.random_bool(0.7) { - CombineOp::Add - } else { - CombineOp::Sub - }); - } - ops.push(if rng.random_bool(0.5) { - CombineOp::Add - } else { - CombineOp::Sub - }); - for (i, op) in ops.iter().enumerate() { - let part = sample(rng) & mask; - pushes.push((part, *op)); - acc = match op { - CombineOp::Add => add_mod(acc, part), - CombineOp::Sub => sub_mod(acc, part), - CombineOp::Xor => unreachable!(), - }; + for i in 0..parts { + if i + 1 == parts { + if acc <= value { + pushes.push((value - acc, CombineOp::Add)); + } else { + pushes.push((acc - value, CombineOp::Sub)); + } + break; + } - if i + 1 == ops.len() { - let final_op = *op; - let final_part = match final_op { - CombineOp::Add => { - if let Some(m) = modulus { - (value + m - acc) % m - } else { - value.wrapping_sub(acc) - } - } - CombineOp::Sub => { - if let Some(m) = modulus { - (acc + m - value) % m - } else { - acc.wrapping_sub(value) - } - } - CombineOp::Xor => unreachable!(), - }; - pushes.push((final_part & mask, final_op)); + let can_sub = acc > 0; + let use_sub = can_sub && rng.random_bool(0.3); + if use_sub { + let part = rng.random_range(0..=acc); + pushes.push((part, CombineOp::Sub)); + acc -= part; + } else { + let max_add = max_value.saturating_sub(acc); + let part = rng.random_range(0..=max_add); + pushes.push((part, CombineOp::Add)); + acc += part; } } - pushes } @@ -424,6 +390,7 @@ mod tests { use super::*; use azoth_core::process_bytecode_to_cfg; use azoth_core::seed::Seed; + use rand::SeedableRng; const STORAGE_BYTECODE: &str = include_str!("../../../tests/bytecode/storage.hex"); const FIXED_SEED: &str = "0x0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; @@ -480,4 +447,31 @@ mod tests { assert!(combine_count >= 1, "expected at least one combine op"); assert!(push_count >= 2, "expected multiple pushes in split chain"); } + + #[test] + fn generated_chains_preserve_literal_value() { + let mut rng = StdRng::seed_from_u64(7); + for width in 4u8..=16 { + let bits = (width as u32) * 8; + let max_value = if bits == 128 { + u128::MAX + } else { + (1u128 << bits) - 1 + }; + + for _ in 0..256 { + let value = rng.random_range(0..=max_value); + let chain = generate_chain(value, width, &mut rng); + let mut acc = 0u128; + for (part, op) in &chain { + acc = match op { + CombineOp::Add => acc + part, + CombineOp::Sub => acc - part, + CombineOp::Xor => acc ^ part, + }; + } + assert_eq!(acc, value, "width={width} value=0x{value:x}"); + } + } + } } diff --git a/tests/src/e2e/escrow.rs b/tests/src/e2e/escrow.rs index 3361307..d1bdbcd 100644 --- a/tests/src/e2e/escrow.rs +++ b/tests/src/e2e/escrow.rs @@ -135,10 +135,7 @@ async fn test_obfuscated_function_calls() -> Result<()> { // obfuscate contract println!("\n=== Proceeding with Obfuscated Deployment ==="); - let mut config = ObfuscationConfig::default(); - // TEMPORARY: PushSplit can rewrite jump-related immediates in this escrow runtime, - // causing invalid jump targets and REVM InvalidJump halts in this e2e test. - config.transforms.retain(|t| t.name() != "PushSplit"); + let config = ObfuscationConfig::default(); let obfuscation_result = obfuscate_bytecode( ESCROW_CONTRACT_DEPLOYMENT_BYTECODE, @@ -376,11 +373,19 @@ async fn test_obfuscated_function_calls() -> Result<()> { i, push_pc, target, jump_type ); } - println!("\n=== All Available JUMPDESTs (first 20) ==="); - let mut jd_list: Vec<_> = jumpdests.iter().collect(); + println!("\n=== Nearest JUMPDESTs for each invalid target ==="); + let mut jd_list: Vec<_> = jumpdests.iter().copied().collect::>(); jd_list.sort(); - for (i, jd) in jd_list.iter().take(20).enumerate() { - println!(" [{}] JUMPDEST at PC 0x{:x}", i, jd); + for (push_pc, target, _) in invalid_jumps.iter().take(10) { + let nearest: Vec<_> = jd_list + .iter() + .filter(|&&jd| (jd as isize - *target as isize).unsigned_abs() < 20) + .map(|jd| format!("0x{:x}", jd)) + .collect(); + println!( + " target 0x{:x} (from PUSH at 0x{:x}): nearby JUMPDESTs = {:?}", + target, push_pc, nearest + ); } return Err(eyre!( @@ -461,9 +466,13 @@ async fn test_obfuscated_function_calls() -> Result<()> { ..Default::default() }; - let fund_result = evm - .transact(fund_tx) - .map_err(|e| eyre!("Fund transaction failed: {:?}", e))?; + let trace_fund = std::env::var("TRACE_FUND").is_ok(); + let fund_result = if trace_fund { + evm.inspect(fund_tx, StepTracer::new(true)) + } else { + evm.transact(fund_tx) + } + .map_err(|e| eyre!("Fund transaction failed: {:?}", e))?; // Commit fund state changes to database evm.db_mut().commit(fund_result.state.clone());