@@ -99,7 +99,7 @@ type private AidCount = { FlattenedSum: float; Flattening: float; NoiseSD: float
99
99
let inline private aidFlattening
100
100
( executionContext : ExecutionContext )
101
101
( unaccountedFor : int64 )
102
- ( aidContributions : ( AidHash * ^Contribution ) list )
102
+ ( aidContributions : ( AidHash * ^Contribution ) array )
103
103
: AidCount option =
104
104
let anonParams = executionContext.AnonymizationParams
105
105
@@ -109,30 +109,30 @@ let inline private aidFlattening
109
109
let outlierInterval , topInterval =
110
110
compactFlatteningIntervals anonParams.OutlierCount anonParams.TopCount aidContributions.Length
111
111
112
- let sortedAidContributions = aidContributions |> List .sortByDescending snd
112
+ let sortedAidContributions = aidContributions |> Array .sortByDescending snd
113
113
114
114
let flatSeed =
115
115
sortedAidContributions
116
- |> List .take ( outlierInterval.Upper + topInterval.Upper)
116
+ |> Seq .take ( outlierInterval.Upper + topInterval.Upper)
117
117
|> Seq.map fst
118
118
|> seedFromAidSet
119
119
|> cryptoHashSaltedSeed anonParams.Salt
120
120
121
121
let outlierCount = flatSeed |> mixSeed " outlier" |> randomUniform outlierInterval
122
122
let topCount = flatSeed |> mixSeed " top" |> randomUniform topInterval
123
123
124
- let outliersSummed = sortedAidContributions |> List .take outlierCount |> List .sumBy snd
124
+ let outliersSummed = sortedAidContributions |> Seq .take outlierCount |> Seq .sumBy snd
125
125
126
126
let topGroupValuesSummed =
127
127
sortedAidContributions
128
- |> List .skip outlierCount
129
- |> List .take topCount
130
- |> List .sumBy snd
128
+ |> Seq .skip outlierCount
129
+ |> Seq .take topCount
130
+ |> Seq .sumBy snd
131
131
132
132
let topGroupAverage = ( float topGroupValuesSummed) / ( float topCount)
133
133
let outlierReplacement = topGroupAverage * ( float outlierCount)
134
134
135
- let summedContributions = aidContributions |> List .sumBy snd
135
+ let summedContributions = aidContributions |> Array .sumBy snd
136
136
let flattening = float outliersSummed - outlierReplacement
137
137
let flattenedUnaccountedFor = float unaccountedFor - flattening |> max 0.
138
138
let flattenedSum = float summedContributions - flattening
@@ -171,28 +171,48 @@ let private transposeToPerAid (aidsPerValue: KeyValuePair<Value, HashSet<AidHash
171
171
172
172
result
173
173
174
- let rec private distributeValues valuesByAID =
175
- match valuesByAID with
176
- | [] -> [] // Done :D
177
- | (_ aid, []) :: restValuesByAID -> distributeValues restValuesByAID
178
- | ( aid, value :: restValues) :: restValuesByAID ->
179
- let restValuesByAID = // Drop current value from the remaining items.
180
- List.map ( fun ( aid , values ) -> aid, values |> List.filter ((<>) value)) restValuesByAID
174
+ let private distributeValues ( valuesByAID : seq < AidHash * array < Value >>) : seq < AidHash * Value > =
175
+ let usedValues = HashSet< Value>()
181
176
182
- ( aid, value) :: distributeValues ( restValuesByAID @ [ aid, restValues ])
177
+ let rec pickUnusedValue ( values : Stack < Value >) =
178
+ match values.TryPop() with
179
+ | true , value -> if usedValues.Contains( value) then pickUnusedValue values else ValueSome value
180
+ | false , _ -> ValueNone
181
+
182
+ let result = List< AidHash * Value>()
183
+
184
+ let mutable remainingItems =
185
+ valuesByAID
186
+ |> Seq.filter ( fun ( _aid , values ) -> values.Length > 0 )
187
+ |> Seq.map ( fun ( aid , values ) -> aid, Stack< Value>( values))
188
+ |> Seq.toArray
189
+
190
+ while remainingItems.Length > 0 do
191
+ remainingItems <-
192
+ remainingItems
193
+ |> Array.filter ( fun ( aid , values ) ->
194
+ match pickUnusedValue values with
195
+ | ValueSome value ->
196
+ result.Add(( aid, value))
197
+ usedValues.Add( value) |> ignore
198
+ values.Count > 0
199
+ | ValueNone -> false
200
+ )
201
+
202
+ result :> seq< AidHash * Value>
183
203
184
204
let private countDistinctFlatteningByAid
185
205
( executionContext : ExecutionContext )
186
206
( perAidContributions : Dictionary < AidHash , HashSet < Value >>)
187
207
=
188
208
perAidContributions
189
209
// keep low count values in sorted order to ensure the algorithm is deterministic
190
- |> Seq.map ( fun pair -> pair.Key, pair.Value |> Seq.toList )
210
+ |> Seq.map ( fun pair -> pair.Key, pair.Value |> Seq.toArray )
191
211
|> Seq.sortBy ( fun ( aid , values ) -> values.Length, aid)
192
- |> Seq.toList
193
212
|> distributeValues
194
- |> List.countBy fst
195
- |> List.map ( fun ( aid , count ) -> aid, int64 count)
213
+ |> Seq.countBy fst
214
+ |> Seq.map ( fun ( aid , count ) -> aid, int64 count)
215
+ |> Seq.toArray
196
216
|> aidFlattening executionContext 0 L
197
217
198
218
let private anonymizedSum ( byAidSum : AidCount seq ) =
@@ -230,8 +250,8 @@ let countDistinct
230
250
// without any additional noise.
231
251
let lowCountValues , highCountValues =
232
252
aidsPerValue
233
- |> Seq.toList
234
- |> List .partition ( fun pair -> isLowCount executionContext pair.Value)
253
+ |> Seq.toArray
254
+ |> Array .partition ( fun pair -> isLowCount executionContext pair.Value)
235
255
236
256
let byAid =
237
257
[ 0 .. aidsCount - 1 ]
@@ -262,7 +282,7 @@ let count (executionContext: ExecutionContext) (perAidContributions: AidCountSta
262
282
|> Array.map ( fun aidState ->
263
283
aidState.AidContributions
264
284
|> Seq.map ( fun pair -> pair.Key, pair.Value)
265
- |> Seq.toList
285
+ |> Seq.toArray
266
286
|> aidFlattening executionContext aidState.UnaccountedFor
267
287
)
268
288
0 commit comments