diff --git a/src/crates/heuristics/src/ast/change.rs b/src/crates/heuristics/src/ast/change.rs index d8606be..c4fdbf7 100644 --- a/src/crates/heuristics/src/ast/change.rs +++ b/src/crates/heuristics/src/ast/change.rs @@ -7,7 +7,10 @@ use tx_indexer_pipeline::{ node::{Node, NodeId}, value::{TxMask, TxOutClustering, TxOutMask, TxOutSet, TxSet}, }; -use tx_indexer_primitives::unified::{AnyOutId, AnyTxId}; +use tx_indexer_primitives::{ + handle::SpendableTxConstituent, + unified::{AnyOutId, AnyTxId}, +}; use crate::change_identification::{ NLockTimeChangeIdentification, NaiveChangeIdentificationHueristic, TxOutChangeAnnotation, @@ -40,9 +43,14 @@ impl Node for ChangeIdentificationNode { let mut result = HashMap::new(); for output_id in txouts.iter() { - let output = output_id.with(ctx.unified_storage()); + let Ok(spendable) = + SpendableTxConstituent::try_new(output_id.with(ctx.unified_storage())) + else { + result.insert(*output_id, false); + continue; + }; let is_change = matches!( - NaiveChangeIdentificationHueristic::is_change(output), + NaiveChangeIdentificationHueristic::is_change(spendable), TxOutChangeAnnotation::Change ); result.insert(*output_id, is_change); @@ -108,11 +116,16 @@ impl Node for FingerPrintChangeIdentificationNode { for output_id in txouts.iter() { let output = output_id.with(ctx.unified_storage()); - let is_change = match output.spender_txin() { + let spender = output.spender_txin(); + let Ok(spendable) = SpendableTxConstituent::try_new(output) else { + result.insert(*output_id, false); + continue; + }; + let is_change = match spender { Some(spending_txin) => { let spending_tx = spending_txin.containing_tx(); matches!( - NLockTimeChangeIdentification::is_change(output, spending_tx), + NLockTimeChangeIdentification::is_change(spendable, spending_tx), TxOutChangeAnnotation::Change ) } diff --git a/src/crates/heuristics/src/ast/uih.rs b/src/crates/heuristics/src/ast/uih.rs index f0d5faa..6317d5f 100644 --- a/src/crates/heuristics/src/ast/uih.rs +++ b/src/crates/heuristics/src/ast/uih.rs @@ -4,56 +4,57 @@ //! - UIH1 (Optimal change): smallest output is likely change when min(out) < min(in). //! - UIH2 (Unnecessary input): transaction could pay outputs without the smallest input. +use std::collections::HashMap; + use tx_indexer_pipeline::{ engine::EvalContext, expr::Expr, node::{Node, NodeId}, - value::{TxMask, TxOutSet, TxSet}, + value::{TxMask, TxOutMask, TxOutSet, TxSet}, +}; +use tx_indexer_primitives::{ + handle::SpendableTxConstituent, + unified::{AnyOutId, AnyTxId}, }; -use tx_indexer_primitives::unified::{AnyOutId, AnyTxId}; use crate::uih::UnnecessaryInputHeuristic; /// Node that implements UIH1 (Optimal change heuristic). /// -/// For each transaction where min(output values) < min(input values), adds the -/// smallest output(s) by value to the result set (likely change). +/// For each output, returns `true` if its value is less than the minimum input +/// value of its containing transaction. pub struct UnnecessaryInputHeuristic1Node { - input: Expr, + input: Expr, } impl UnnecessaryInputHeuristic1Node { - pub fn new(input: Expr) -> Self { + pub fn new(input: Expr) -> Self { Self { input } } } impl Node for UnnecessaryInputHeuristic1Node { - type OutputValue = TxOutSet; + type OutputValue = TxOutMask; fn dependencies(&self) -> Vec { vec![self.input.id()] } - fn evaluate(&self, ctx: &EvalContext) -> Vec { - let tx_ids = ctx.get_or_default(&self.input); - let mut result = Vec::new(); - - for tx_id in tx_ids.iter() { - let tx = tx_id.with(ctx.unified_storage()); + fn evaluate(&self, ctx: &EvalContext) -> HashMap { + let txouts = ctx.get_or_default(&self.input); + let mut result = HashMap::new(); - let outputs: Vec<_> = tx.outputs().map(|o| (o.id(), o.value())).collect(); - if outputs.is_empty() { + for output_id in txouts.iter() { + let Ok(spendable) = + SpendableTxConstituent::try_new(output_id.with(ctx.unified_storage())) + else { + result.insert(*output_id, false); continue; - } - - if let Some(min_out) = UnnecessaryInputHeuristic::uih1_min_output_value(&tx) { - for output in tx.outputs() { - if output.value() == min_out { - result.push(output.id()); - } - } - } + }; + result.insert( + *output_id, + UnnecessaryInputHeuristic::is_uih1_candidate(spendable), + ); } result @@ -68,9 +69,9 @@ impl Node for UnnecessaryInputHeuristic1Node { pub struct UnnecessaryInputHeuristic1; impl UnnecessaryInputHeuristic1 { - /// Returns the set of outputs that are the smallest by value in each tx - /// where min(out) < min(in) (BlockSci optimal change heuristic). - pub fn new(input: Expr) -> Expr { + /// Returns a mask over outputs where `true` indicates a UIH1 candidate + /// (output value < min input value of its containing transaction). + pub fn new(input: Expr) -> Expr { let ctx = input.context().clone(); ctx.register(UnnecessaryInputHeuristic1Node::new(input)) } @@ -243,15 +244,16 @@ mod tests { let mut engine = engine_with_loose(ctx.clone(), all_txs); let source = AllLooseTxs::new(&ctx); - let uih1 = UnnecessaryInputHeuristic1::new(source.txs()); + let uih1 = UnnecessaryInputHeuristic1::new(source.txs().outputs()); let result = engine.eval(&uih1); let smallest_out = AnyOutId::from(TxOutId::new(TxId(3), 0)); - assert!( - result.contains(&smallest_out), - "UIH1 should contain the smallest output (value 50)" + assert_eq!( + result.get(&smallest_out), + Some(&true), + "UIH1 should flag the smallest output (value 50)" ); - assert_eq!(result.len(), 1); + assert_eq!(result.values().filter(|&&v| v).count(), 1); } #[test] @@ -261,12 +263,12 @@ mod tests { let mut engine = engine_with_loose(ctx.clone(), all_txs); let source = AllLooseTxs::new(&ctx); - let uih1 = UnnecessaryInputHeuristic1::new(source.txs()); + let uih1 = UnnecessaryInputHeuristic1::new(source.txs().outputs()); let result = engine.eval(&uih1); assert!( - result.is_empty(), - "UIH1 should be empty when min(out) >= min(in)" + result.values().all(|&v| !v), + "UIH1 should have no candidates when min(out) >= min(in)" ); } @@ -277,12 +279,18 @@ mod tests { let mut engine = engine_with_loose(ctx.clone(), all_txs); let source = AllLooseTxs::new(&ctx); - let uih1 = UnnecessaryInputHeuristic1::new(source.txs()); + let uih1 = UnnecessaryInputHeuristic1::new(source.txs().outputs()); let result = engine.eval(&uih1); - assert!(result.contains(&AnyOutId::from(TxOutId::new(TxId(3), 0)))); - assert!(result.contains(&AnyOutId::from(TxOutId::new(TxId(3), 1)))); - assert_eq!(result.len(), 2); + assert_eq!( + result.get(&AnyOutId::from(TxOutId::new(TxId(3), 0))), + Some(&true) + ); + assert_eq!( + result.get(&AnyOutId::from(TxOutId::new(TxId(3), 1))), + Some(&true) + ); + assert_eq!(result.values().filter(|&&v| v).count(), 2); } #[test] @@ -364,7 +372,7 @@ mod tests { let mut engine = engine_with_loose(ctx.clone(), all_txs); let source = AllLooseTxs::new(&ctx); - let uih1 = UnnecessaryInputHeuristic1::new(source.txs()); + let uih1 = UnnecessaryInputHeuristic1::new(source.txs().outputs()); let uih2 = UnnecessaryInputHeuristic2::new(source.txs()); // `.into_owned()` because we hold both results simultaneously below; @@ -372,13 +380,15 @@ mod tests { let uih1_result = engine.eval(&uih1).into_owned(); let uih2_result = engine.eval(&uih2).into_owned(); - assert!( - uih1_result.contains(&AnyOutId::from(TxOutId::new(TxId(5), 1))), + assert_eq!( + uih1_result.get(&AnyOutId::from(TxOutId::new(TxId(5), 1))), + Some(&true), "UIH1 should flag tx4's smallest output (vout=1)" ); - assert!( - !uih1_result.contains(&AnyOutId::from(TxOutId::new(TxId(6), 0))), - "UIH1 should not flag tx5 (min(out) >= min(in))" + assert_ne!( + uih1_result.get(&AnyOutId::from(TxOutId::new(TxId(6), 0))), + Some(&true), + "UIH1 should not flag tx5's vout=0 (min(out) >= min(in))" ); // uih2: tx4 true, tx5 false diff --git a/src/crates/heuristics/src/change_identification.rs b/src/crates/heuristics/src/change_identification.rs index 30a32fc..c4feb81 100644 --- a/src/crates/heuristics/src/change_identification.rs +++ b/src/crates/heuristics/src/change_identification.rs @@ -1,5 +1,5 @@ use tx_indexer_primitives::{ - handle::TxHandle, + handle::{SpendableTxConstituent, TxHandle}, traits::abstract_types::{HasNLockTime, HasScriptPubkey, OutputCount, TxConstituent}, }; @@ -13,7 +13,9 @@ pub struct NaiveChangeIdentificationHueristic; impl NaiveChangeIdentificationHueristic { /// Check if a txout is change based on its containing transaction. - pub fn is_change(txout: impl TxConstituent) -> TxOutChangeAnnotation { + pub fn is_change( + txout: SpendableTxConstituent>, + ) -> TxOutChangeAnnotation { let tx = txout.containing_tx(); if tx.output_count() > 0 && txout.vout() == tx.output_count() - 1 { TxOutChangeAnnotation::Change @@ -27,7 +29,7 @@ pub struct NLockTimeChangeIdentification; impl NLockTimeChangeIdentification { pub fn is_change( - tx_out: impl TxConstituent, + tx_out: SpendableTxConstituent>, spending_tx: impl HasNLockTime, ) -> TxOutChangeAnnotation { let containing_tx_n_locktime = tx_out.containing_tx().locktime(); @@ -56,7 +58,7 @@ impl ScriptTypesMatchingChangeIdentification { /// types, unresolved prevouts, or multiple matching outputs are all treated /// as inconclusive and return `NotChange`. pub fn is_change<'a>( - tx_out: impl TxConstituent>, + tx_out: SpendableTxConstituent>>, ) -> TxOutChangeAnnotation { let tx = tx_out.containing_tx(); let mut input_types = tx.inputs().map(|input| input.output_type()); @@ -94,6 +96,7 @@ mod tests { use tx_indexer_primitives::{ UnifiedStorage, + handle::SpendableTxConstituent, loose::LooseIndexBuilder, loose::{TxId, TxOutId}, test_utils::{DummyTxData, DummyTxOut, DummyTxOutData}, @@ -126,7 +129,9 @@ mod tests { containing_tx: DummyTxData::new_with_amounts(vec![100]), }; assert_eq!( - NaiveChangeIdentificationHueristic::is_change(txout), + NaiveChangeIdentificationHueristic::is_change( + SpendableTxConstituent::try_new(txout).unwrap() + ), TxOutChangeAnnotation::Change ); } @@ -139,7 +144,10 @@ mod tests { }; let spending_tx = DummyTxData::new_with_amounts(vec![100]); assert_eq!( - NLockTimeChangeIdentification::is_change(tx_out, spending_tx), + NLockTimeChangeIdentification::is_change( + SpendableTxConstituent::try_new(tx_out).unwrap(), + spending_tx + ), TxOutChangeAnnotation::NotChange ); @@ -150,7 +158,10 @@ mod tests { }; let spending_tx = DummyTxData::new(vec![DummyTxOutData::new(100, 0)], vec![], 1); assert_eq!( - NLockTimeChangeIdentification::is_change(tx_out, spending_tx), + NLockTimeChangeIdentification::is_change( + SpendableTxConstituent::try_new(tx_out).unwrap(), + spending_tx + ), TxOutChangeAnnotation::Change ); } @@ -195,11 +206,15 @@ mod tests { let change = AnyOutId::from(TxOutId::new(TxId(3), 1)).with(&storage); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(payment), + ScriptTypesMatchingChangeIdentification::is_change( + SpendableTxConstituent::try_new(payment).unwrap_or_else(|_| unreachable!()) + ), TxOutChangeAnnotation::NotChange ); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(change), + ScriptTypesMatchingChangeIdentification::is_change( + SpendableTxConstituent::try_new(change).unwrap_or_else(|_| unreachable!()) + ), TxOutChangeAnnotation::Change ); } @@ -244,11 +259,15 @@ mod tests { let change = AnyOutId::from(TxOutId::new(TxId(3), 1)).with(&storage); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(payment), + ScriptTypesMatchingChangeIdentification::is_change( + SpendableTxConstituent::try_new(payment).unwrap_or_else(|_| unreachable!()) + ), TxOutChangeAnnotation::NotChange ); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(change), + ScriptTypesMatchingChangeIdentification::is_change( + SpendableTxConstituent::try_new(change).unwrap_or_else(|_| unreachable!()) + ), TxOutChangeAnnotation::NotChange ); } @@ -290,11 +309,15 @@ mod tests { let output1 = AnyOutId::from(TxOutId::new(TxId(3), 1)).with(&storage); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(output0), + ScriptTypesMatchingChangeIdentification::is_change( + SpendableTxConstituent::try_new(output0).unwrap_or_else(|_| unreachable!()) + ), TxOutChangeAnnotation::NotChange ); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(output1), + ScriptTypesMatchingChangeIdentification::is_change( + SpendableTxConstituent::try_new(output1).unwrap_or_else(|_| unreachable!()) + ), TxOutChangeAnnotation::NotChange ); } diff --git a/src/crates/heuristics/src/uih.rs b/src/crates/heuristics/src/uih.rs index 46222a4..39d95e2 100644 --- a/src/crates/heuristics/src/uih.rs +++ b/src/crates/heuristics/src/uih.rs @@ -1,38 +1,36 @@ use bitcoin::Amount; -use tx_indexer_primitives::traits::abstract_types::{ - EnumerateInputValueInArbitraryOrder, EnumerateOutputValueInArbitraryOrder, +use tx_indexer_primitives::{ + AbstractTransaction, + handle::SpendableTxConstituent, + traits::abstract_types::{ + EnumerateInputValueInArbitraryOrder, EnumerateOutputValueInArbitraryOrder, TxConstituent, + }, }; pub struct UnnecessaryInputHeuristic; impl UnnecessaryInputHeuristic { - pub fn uih1_min_output_value(tx: &T) -> Option + /// Returns the minimum output value that is smaller than the minimum input value. + pub fn is_uih1_candidate(txout: SpendableTxConstituent) -> bool where - T: EnumerateInputValueInArbitraryOrder + EnumerateOutputValueInArbitraryOrder, + T: TxConstituent< + Handle: EnumerateInputValueInArbitraryOrder + EnumerateOutputValueInArbitraryOrder, + >, { - let input_values: Vec = tx.input_values().collect(); - let output_values: Vec = tx.output_values().collect(); - - if input_values.is_empty() || output_values.is_empty() { - return None; + let tx = txout.containing_tx(); + let min_in = tx.input_values().min(); + let output_val = tx + .output_at(txout.vout()) + .expect("vout should be present") + .value(); + + if let Some(min_in) = min_in + && output_val < min_in + { + return true; } - let min_in = input_values - .iter() - .min() - .copied() - .expect("non-empty inputs"); - let min_out = output_values - .iter() - .min() - .copied() - .expect("non-empty outputs"); - - if min_out < min_in { - Some(min_out) - } else { - None - } + false } pub fn is_uih2(tx: &T) -> bool diff --git a/src/crates/primitives/src/handle.rs b/src/crates/primitives/src/handle.rs index 3fa41f7..3af625e 100644 --- a/src/crates/primitives/src/handle.rs +++ b/src/crates/primitives/src/handle.rs @@ -305,3 +305,35 @@ impl<'a> TxConstituent for TxOutHandle<'a> { self.vout() as usize } } + +/// Wrapper guaranteeing the inner value is a spendable output. +/// +/// Construct via [`SpendableTxConstituent::try_new`]: `Ok` for spendable outputs, +/// `Err` returns the original value back so callers can handle the unspendable +/// case explicitly. +pub struct SpendableTxConstituent(T); + +impl SpendableTxConstituent { + /// Wraps `value` if it is spendable; returns it back as `Err` if OP_RETURN. + pub fn try_new(value: T) -> Result { + if !value.is_spendable() { + Err(value) + } else { + Ok(Self(value)) + } + } +} + +impl SpendableTxConstituent { + pub fn into_inner(self) -> T { + self.0 + } +} + +impl std::ops::Deref for SpendableTxConstituent { + type Target = T; + + fn deref(&self) -> &T { + &self.0 + } +} diff --git a/src/crates/primitives/src/output_type.rs b/src/crates/primitives/src/output_type.rs index c49e8b7..e3b04a7 100644 --- a/src/crates/primitives/src/output_type.rs +++ b/src/crates/primitives/src/output_type.rs @@ -15,6 +15,10 @@ impl OutputType { pub fn as_u32(self) -> u32 { self as u32 } + + pub fn is_spendable(self) -> bool { + self != OutputType::OpReturn && self != OutputType::NonStandard + } } /// Classify a scriptPubKey by type from raw bytes. diff --git a/src/crates/primitives/src/test_utils/mod.rs b/src/crates/primitives/src/test_utils/mod.rs index a8eec35..b4dcaea 100644 --- a/src/crates/primitives/src/test_utils/mod.rs +++ b/src/crates/primitives/src/test_utils/mod.rs @@ -120,12 +120,17 @@ pub struct DummyTxOutData { } impl DummyTxOutData { - /// Create a new output with empty script pubkey. + /// Create a new output. Uses a placeholder P2PKH script (all-zero hash) so outputs are + /// spendable by default (empty bytes classify as NonStandard, which is not spendable). pub fn new(amount: u64, vout: u32) -> Self { Self { value: amount, vout, - script_pubkey: vec![], + // OP_DUP OP_HASH160 <20 zero bytes> OP_EQUALVERIFY OP_CHECKSIG + script_pubkey: vec![ + 0x76, 0xa9, 0x14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x88, + 0xac, + ], } } @@ -256,6 +261,7 @@ impl From for Box { } } +#[derive(Debug)] pub struct DummyTxOut { pub vout: usize, pub containing_tx: DummyTxData, @@ -271,3 +277,12 @@ impl TxConstituent for DummyTxOut { self.vout } } + +impl HasScriptPubkey for DummyTxOut { + fn script_pubkey_bytes(&self) -> Vec { + self.containing_tx + .output_at(self.vout) + .map(|o| o.script_pubkey_bytes()) + .unwrap_or_default() + } +} diff --git a/src/crates/primitives/src/traits/abstract_types.rs b/src/crates/primitives/src/traits/abstract_types.rs index 8beaac3..4f40298 100644 --- a/src/crates/primitives/src/traits/abstract_types.rs +++ b/src/crates/primitives/src/traits/abstract_types.rs @@ -88,6 +88,10 @@ pub trait HasScriptPubkey { fn output_type(&self) -> OutputType { classify_script_pubkey(&self.script_pubkey_bytes()) } + + fn is_spendable(&self) -> bool { + self.output_type().is_spendable() + } } /// Value (amount) of a transaction output