@@ -66,14 +66,12 @@ impl<T> Row<T> {
6666 self . estimate
6767 }
6868
69- /// Returns the upper bound for the frequency.
69+ /// Returns the guaranteed upper bound for the frequency.
7070 pub fn upper_bound ( & self ) -> u64 {
7171 self . upper_bound
7272 }
7373
7474 /// Returns the guaranteed lower bound for the frequency.
75- ///
76- /// This value is never negative.
7775 pub fn lower_bound ( & self ) -> u64 {
7876 self . lower_bound
7977 }
@@ -115,7 +113,11 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
115113 /// assert_eq!(sketch.num_active_items(), 2);
116114 /// ```
117115 pub fn new ( max_map_size : usize ) -> Self {
118- let lg_max_map_size = exact_log2 ( max_map_size) ;
116+ assert ! (
117+ max_map_size. is_power_of_two( ) ,
118+ "max_map_size must be power of 2"
119+ ) ;
120+ let lg_max_map_size = max_map_size. trailing_zeros ( ) as u8 ;
119121 Self :: with_lg_map_sizes ( lg_max_map_size, LG_MIN_MAP_SIZE )
120122 }
121123
@@ -155,16 +157,16 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
155157
156158 /// Returns the guaranteed lower bound frequency for an item.
157159 ///
158- /// This value is never negative and is guaranteed to be no larger than the true frequency.
159- /// If the item is not tracked, the lower bound is zero.
160+ /// This value is guaranteed to be no larger than the true frequency. If the item is not
161+ /// tracked, the lower bound is zero.
160162 pub fn lower_bound ( & self , item : & T ) -> u64 {
161163 self . hash_map . get ( item)
162164 }
163165
164166 /// Returns the guaranteed upper bound frequency for an item.
165167 ///
166- /// This value is guaranteed to be no smaller than the true frequency.
167- /// If the item is tracked, this is `item_count + offset`.
168+ /// This value is guaranteed to be no smaller than the true frequency. If the item is tracked,
169+ /// this is `item_count + offset`.
168170 pub fn upper_bound ( & self , item : & T ) -> u64 {
169171 self . hash_map . get ( item) + self . offset
170172 }
@@ -544,7 +546,14 @@ impl FrequentItemsSketch<i64> {
544546 /// assert!(decoded.estimate(&7) >= 2);
545547 /// ```
546548 pub fn serialize ( & self ) -> Vec < u8 > {
547- self . serialize_inner ( count_i64_items_bytes, serialize_i64_items)
549+ self . serialize_inner (
550+ |items| items. iter ( ) . map ( i64:: serialize_size) . sum ( ) ,
551+ |bytes, items| {
552+ for item in items {
553+ item. serialize_value ( bytes) ;
554+ }
555+ } ,
556+ )
548557 }
549558
550559 /// Deserializes a sketch from bytes.
@@ -560,7 +569,70 @@ impl FrequentItemsSketch<i64> {
560569 /// assert!(decoded.estimate(&7) >= 2);
561570 /// ```
562571 pub fn deserialize ( bytes : & [ u8 ] ) -> Result < Self , Error > {
563- Self :: deserialize_inner ( bytes, deserialize_i64_items)
572+ Self :: deserialize_inner ( bytes, |mut cursor, num_items| {
573+ let mut items = Vec :: with_capacity ( num_items) ;
574+ for i in 0 ..num_items {
575+ let item = i64:: deserialize_value ( & mut cursor) . map_err ( |_| {
576+ Error :: insufficient_data ( format ! (
577+ "expected {num_items} items, failed to read item at index {i}"
578+ ) )
579+ } ) ?;
580+ items. push ( item) ;
581+ }
582+ Ok ( items)
583+ } )
584+ }
585+ }
586+
587+ impl FrequentItemsSketch < u64 > {
588+ /// Serializes this sketch into a byte vector.
589+ ///
590+ /// # Examples
591+ ///
592+ /// ```
593+ /// # use datasketches::frequencies::FrequentItemsSketch;
594+ /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
595+ /// # sketch.update_with_count(7, 2);
596+ /// let bytes = sketch.serialize();
597+ /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
598+ /// assert!(decoded.estimate(&7) >= 2);
599+ /// ```
600+ pub fn serialize ( & self ) -> Vec < u8 > {
601+ self . serialize_inner (
602+ |items| items. iter ( ) . map ( u64:: serialize_size) . sum ( ) ,
603+ |bytes, items| {
604+ for item in items {
605+ item. serialize_value ( bytes) ;
606+ }
607+ } ,
608+ )
609+ }
610+
611+ /// Deserializes a sketch from bytes.
612+ ///
613+ /// # Examples
614+ ///
615+ /// ```
616+ /// # use datasketches::frequencies::FrequentItemsSketch;
617+ /// # let mut sketch = FrequentItemsSketch::<u64>::new(64);
618+ /// # sketch.update_with_count(7, 2);
619+ /// # let bytes = sketch.serialize();
620+ /// let decoded = FrequentItemsSketch::<u64>::deserialize(&bytes).unwrap();
621+ /// assert!(decoded.estimate(&7) >= 2);
622+ /// ```
623+ pub fn deserialize ( bytes : & [ u8 ] ) -> Result < Self , Error > {
624+ Self :: deserialize_inner ( bytes, |mut cursor, num_items| {
625+ let mut items = Vec :: with_capacity ( num_items) ;
626+ for i in 0 ..num_items {
627+ let item = u64:: deserialize_value ( & mut cursor) . map_err ( |_| {
628+ Error :: insufficient_data ( format ! (
629+ "expected {num_items} items, failed to read item at index {i}"
630+ ) )
631+ } ) ?;
632+ items. push ( item) ;
633+ }
634+ Ok ( items)
635+ } )
564636 }
565637}
566638
@@ -579,7 +651,14 @@ impl FrequentItemsSketch<String> {
579651 /// assert!(decoded.estimate(&apple) >= 2);
580652 /// ```
581653 pub fn serialize ( & self ) -> Vec < u8 > {
582- self . serialize_inner ( count_string_items_bytes, serialize_string_items)
654+ self . serialize_inner (
655+ |items| items. iter ( ) . map ( String :: serialize_size) . sum ( ) ,
656+ |bytes, items| {
657+ for item in items {
658+ item. serialize_value ( bytes) ;
659+ }
660+ } ,
661+ )
583662 }
584663
585664 /// Deserializes a sketch from bytes.
@@ -596,11 +675,17 @@ impl FrequentItemsSketch<String> {
596675 /// assert!(decoded.estimate(&apple) >= 2);
597676 /// ```
598677 pub fn deserialize ( bytes : & [ u8 ] ) -> Result < Self , Error > {
599- Self :: deserialize_inner ( bytes, deserialize_string_items)
678+ Self :: deserialize_inner ( bytes, |mut cursor, num_items| {
679+ let mut items = Vec :: with_capacity ( num_items) ;
680+ for i in 0 ..num_items {
681+ let item = String :: deserialize_value ( & mut cursor) . map_err ( |_| {
682+ Error :: insufficient_data ( format ! (
683+ "expected {num_items} items, failed to read item at index {i}"
684+ ) )
685+ } ) ?;
686+ items. push ( item) ;
687+ }
688+ Ok ( items)
689+ } )
600690 }
601691}
602-
603- fn exact_log2 ( value : usize ) -> u8 {
604- assert ! ( value. is_power_of_two( ) , "value must be power of 2" ) ;
605- value. trailing_zeros ( ) as u8
606- }
0 commit comments