@@ -99,7 +99,7 @@ type private AidCount = { FlattenedSum: float; Flattening: float; NoiseSD: float
9999let inline private aidFlattening
100100 ( executionContext : ExecutionContext )
101101 ( unaccountedFor : int64 )
102- ( aidContributions : ( AidHash * ^Contribution ) list )
102+ ( aidContributions : ( AidHash * ^Contribution ) array )
103103 : AidCount option =
104104 let anonParams = executionContext.AnonymizationParams
105105
@@ -109,30 +109,30 @@ let inline private aidFlattening
109109 let outlierInterval , topInterval =
110110 compactFlatteningIntervals anonParams.OutlierCount anonParams.TopCount aidContributions.Length
111111
112- let sortedAidContributions = aidContributions |> List .sortByDescending snd
112+ let sortedAidContributions = aidContributions |> Array .sortByDescending snd
113113
114114 let flatSeed =
115115 sortedAidContributions
116- |> List .take ( outlierInterval.Upper + topInterval.Upper)
116+ |> Seq .take ( outlierInterval.Upper + topInterval.Upper)
117117 |> Seq.map fst
118118 |> seedFromAidSet
119119 |> cryptoHashSaltedSeed anonParams.Salt
120120
121121 let outlierCount = flatSeed |> mixSeed " outlier" |> randomUniform outlierInterval
122122 let topCount = flatSeed |> mixSeed " top" |> randomUniform topInterval
123123
124- let outliersSummed = sortedAidContributions |> List .take outlierCount |> List .sumBy snd
124+ let outliersSummed = sortedAidContributions |> Seq .take outlierCount |> Seq .sumBy snd
125125
126126 let topGroupValuesSummed =
127127 sortedAidContributions
128- |> List .skip outlierCount
129- |> List .take topCount
130- |> List .sumBy snd
128+ |> Seq .skip outlierCount
129+ |> Seq .take topCount
130+ |> Seq .sumBy snd
131131
132132 let topGroupAverage = ( float topGroupValuesSummed) / ( float topCount)
133133 let outlierReplacement = topGroupAverage * ( float outlierCount)
134134
135- let summedContributions = aidContributions |> List .sumBy snd
135+ let summedContributions = aidContributions |> Array .sumBy snd
136136 let flattening = float outliersSummed - outlierReplacement
137137 let flattenedUnaccountedFor = float unaccountedFor - flattening |> max 0.
138138 let flattenedSum = float summedContributions - flattening
@@ -171,28 +171,48 @@ let private transposeToPerAid (aidsPerValue: KeyValuePair<Value, HashSet<AidHash
171171
172172 result
173173
174- let rec private distributeValues valuesByAID =
175- match valuesByAID with
176- | [] -> [] // Done :D
177- | (_ aid, []) :: restValuesByAID -> distributeValues restValuesByAID
178- | ( aid, value :: restValues) :: restValuesByAID ->
179- let restValuesByAID = // Drop current value from the remaining items.
180- List.map ( fun ( aid , values ) -> aid, values |> List.filter ((<>) value)) restValuesByAID
174+ let private distributeValues ( valuesByAID : seq < AidHash * array < Value >>) : seq < AidHash * Value > =
175+ let usedValues = HashSet< Value>()
181176
182- ( aid, value) :: distributeValues ( restValuesByAID @ [ aid, restValues ])
177+ let rec pickUnusedValue ( values : Stack < Value >) =
178+ match values.TryPop() with
179+ | true , value -> if usedValues.Contains( value) then pickUnusedValue values else Some value
180+ | false , _ -> None
181+
182+ let result = List< AidHash * Value>()
183+
184+ let mutable remainingItems =
185+ valuesByAID
186+ |> Seq.filter ( fun ( _aid , values ) -> values.Length > 0 )
187+ |> Seq.map ( fun ( aid , values ) -> aid, Stack< Value>( values))
188+ |> Seq.toArray
189+
190+ while remainingItems.Length > 0 do
191+ remainingItems <-
192+ remainingItems
193+ |> Array.filter ( fun ( aid , values ) ->
194+ match pickUnusedValue values with
195+ | Some value ->
196+ result.Add(( aid, value))
197+ usedValues.Add( value) |> ignore
198+ values.Count > 0
199+ | None -> false
200+ )
201+
202+ result :> seq< AidHash * Value>
183203
184204let private countDistinctFlatteningByAid
185205 ( executionContext : ExecutionContext )
186206 ( perAidContributions : Dictionary < AidHash , HashSet < Value >>)
187207 =
188208 perAidContributions
189209 // keep low count values in sorted order to ensure the algorithm is deterministic
190- |> Seq.map ( fun pair -> pair.Key, pair.Value |> Seq.toList )
210+ |> Seq.map ( fun pair -> pair.Key, pair.Value |> Seq.toArray )
191211 |> Seq.sortBy ( fun ( aid , values ) -> values.Length, aid)
192- |> Seq.toList
193212 |> distributeValues
194- |> List.countBy fst
195- |> List.map ( fun ( aid , count ) -> aid, int64 count)
213+ |> Seq.countBy fst
214+ |> Seq.map ( fun ( aid , count ) -> aid, int64 count)
215+ |> Seq.toArray
196216 |> aidFlattening executionContext 0 L
197217
198218let private anonymizedSum ( byAidSum : AidCount seq ) =
@@ -230,8 +250,8 @@ let countDistinct
230250 // without any additional noise.
231251 let lowCountValues , highCountValues =
232252 aidsPerValue
233- |> Seq.toList
234- |> List .partition ( fun pair -> isLowCount executionContext pair.Value)
253+ |> Seq.toArray
254+ |> Array .partition ( fun pair -> isLowCount executionContext pair.Value)
235255
236256 let byAid =
237257 [ 0 .. aidsCount - 1 ]
@@ -262,7 +282,7 @@ let count (executionContext: ExecutionContext) (perAidContributions: AidCountSta
262282 |> Array.map ( fun aidState ->
263283 aidState.AidContributions
264284 |> Seq.map ( fun pair -> pair.Key, pair.Value)
265- |> Seq.toList
285+ |> Seq.toArray
266286 |> aidFlattening executionContext aidState.UnaccountedFor
267287 )
268288
0 commit comments