Skip to content

Commit ca08332

Browse files
Initial implementation of the Adaptive Buckets anonymization method.
1 parent 31abaa2 commit ca08332

24 files changed

+1290
-91
lines changed

queries-sample.json

+15
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
"layer_s_d": 1,
1818
"low_mean_gap": 2
1919
},
20+
"use_adaptive_buckets": false,
21+
"adaptive_buckets": {
22+
"singularity_low_threshold": 4,
23+
"range_low_threshold": 8
24+
},
2025
"outlier_count": {
2126
"lower": 1,
2227
"upper": 2
@@ -46,6 +51,11 @@
4651
"layer_s_d": 1,
4752
"low_mean_gap": 2
4853
},
54+
"use_adaptive_buckets": false,
55+
"adaptive_buckets": {
56+
"singularity_low_threshold": 4,
57+
"range_low_threshold": 8
58+
},
4959
"outlier_count": {
5060
"lower": 1,
5161
"upper": 2
@@ -75,6 +85,11 @@
7585
"layer_s_d": 1,
7686
"low_mean_gap": 2
7787
},
88+
"use_adaptive_buckets": false,
89+
"adaptive_buckets": {
90+
"singularity_low_threshold": 4,
91+
"range_low_threshold": 8
92+
},
7893
"outlier_count": {
7994
"lower": 1,
8095
"upper": 2

src/OpenDiffix.CLI/Program.fs

+2
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,12 @@ let constructAnonParameters (parsedArgs: ParseResults<CliArguments>) : Anonymiza
132132
AccessLevel = parsedArgs.TryGetResult Access_Level |> toAccessLevel
133133
Strict = parsedArgs.TryGetResult Strict |> Option.defaultValue true
134134
Suppression = suppression
135+
AdaptiveBuckets = AdaptiveBucketsParams.Default
135136
OutlierCount = parsedArgs.TryGetResult Outlier_Count |> toInterval
136137
TopCount = parsedArgs.TryGetResult Top_Count |> toInterval
137138
LayerNoiseSD = parsedArgs.TryGetResult Layer_Noise_SD |> toNoise
138139
RecoverOutliers = parsedArgs.TryGetResult Recover_Outliers |> Option.defaultValue true
140+
UseAdaptiveBuckets = false
139141
}
140142

141143
let getQuery (parsedArgs: ParseResults<CliArguments>) =
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
module OpenDiffix.Core.AdaptiveBuckets.CommonTests
2+
3+
open Xunit
4+
open FsUnit.Xunit
5+
6+
open System
7+
open System.Globalization
8+
9+
open OpenDiffix.Core
10+
11+
let private parseTimestamp (str: string) =
12+
match
13+
DateTime.TryParse(
14+
str,
15+
CultureInfo.InvariantCulture,
16+
DateTimeStyles.AdjustToUniversal ||| DateTimeStyles.AssumeUniversal
17+
)
18+
with
19+
| true, value -> Timestamp value
20+
| _ -> Null
21+
22+
23+
[<Fact>]
24+
let ``Parse timestamp from ISO 8601`` () =
25+
parseTimestamp "2023-01-24T14:25:47.000Z"
26+
|> should equal (makeTimestamp (2023, 1, 24) (14, 25, 47))
27+
28+
parseTimestamp "2023-01-24T14:25:47-05:00"
29+
|> should equal (makeTimestamp (2023, 1, 24) (19, 25, 47))
30+
31+
parseTimestamp "2023-01-24T14:25:47+01:00"
32+
|> should equal (makeTimestamp (2023, 1, 24) (13, 25, 47))
33+
34+
[<Fact>]
35+
let ``Parse timestamp from loose string`` () =
36+
parseTimestamp "2023-01-24"
37+
|> should equal (makeTimestamp (2023, 1, 24) (0, 0, 0))
38+
39+
parseTimestamp "2023-01-24 14:25"
40+
|> should equal (makeTimestamp (2023, 1, 24) (14, 25, 0))
41+
42+
parseTimestamp "2023/01/24 14:25:47"
43+
|> should equal (makeTimestamp (2023, 1, 24) (14, 25, 47))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
module OpenDiffix.Core.AdaptiveBuckets.ForestTests
2+
3+
open Xunit
4+
open FsUnit.Xunit
5+
6+
open OpenDiffix.Core
7+
open OpenDiffix.Core.AdaptiveBuckets.Range
8+
open OpenDiffix.Core.AdaptiveBuckets.Forest
9+
10+
[<Fact>]
11+
let ``Root ranges are anonymized`` () =
12+
let root, _ =
13+
[
14+
[| List [ Integer 1 ]; Integer 0; Integer 1 |]
15+
[| List [ Integer 2 ]; Integer 0; Integer 5 |]
16+
[| List [ Integer 3 ]; Integer 0; Integer 2 |]
17+
[| List [ Integer 4 ]; Integer 0; Integer 7 |]
18+
[| List [ Integer 5 ]; Integer 0; Integer 21 |]
19+
[| List [ Integer 6 ]; Integer 0; Integer 4 |]
20+
[| List [ Integer 7 ]; Integer 0; Integer 21 |]
21+
[| List [ Integer 8 ]; Integer 0; Integer 28 |]
22+
[| List [ Integer 9 ]; Integer 0; Integer 19 |]
23+
[| List [ Integer 10 ]; Integer 0; Integer 2 |]
24+
[| List [ Integer 11 ]; Integer 1; Integer 1 |]
25+
[| List [ Integer 12 ]; Integer 1; Integer 13 |]
26+
[| List [ Integer 13 ]; Integer 1; Integer 25 |]
27+
[| List [ Integer 14 ]; Integer 1; Integer 30 |]
28+
[| List [ Integer 15 ]; Integer 1; Integer 6 |]
29+
[| List [ Integer 16 ]; Integer 1; Integer 2 |]
30+
[| List [ Integer 17 ]; Integer 1; Integer 15 |]
31+
[| List [ Integer 18 ]; Integer 1; Integer 24 |]
32+
[| List [ Integer 19 ]; Integer 1; Integer 9 |]
33+
[| List [ Integer 20 ]; Integer 0; Integer 100 |]
34+
[| List [ Integer 21 ]; Integer -5; Integer 0 |]
35+
]
36+
|> buildForest defaultAnonContext 2
37+
38+
(Tree.nodeData root).SnappedRanges
39+
|> should equal [| { Min = 0.0; Max = 2.0 }; { Min = 0.0; Max = 32.0 } |]
40+
41+
[<Fact>]
42+
let ``Multiple rows per AID`` () =
43+
let rows =
44+
[
45+
[| List [ Integer 1 ]; Integer 1 |]
46+
[| List [ Integer 2 ]; Integer 1 |]
47+
[| List [ Integer 3 ]; Integer 1 |]
48+
[| List [ Integer 4 ]; Integer 1 |]
49+
[| List [ Integer 5 ]; Integer 1 |]
50+
[| List [ Integer 6 ]; Integer 1 |]
51+
[| List [ Integer 7 ]; Integer 1 |]
52+
[| List [ Integer 8 ]; Integer 1 |]
53+
[| List [ Integer 9 ]; Integer 1 |]
54+
[| List [ Integer 10 ]; Integer 0 |]
55+
[| List [ Integer 11 ]; Integer 0 |]
56+
[| List [ Integer 12 ]; Integer 0 |]
57+
[| List [ Integer 13 ]; Integer 0 |]
58+
[| List [ Integer 14 ]; Integer 0 |]
59+
[| List [ Integer 15 ]; Integer 0 |]
60+
[| List [ Integer 16 ]; Integer 0 |]
61+
[| List [ Integer 17 ]; Integer 0 |]
62+
]
63+
64+
let root, _ = rows |> buildForest defaultAnonContext 1
65+
66+
// Sanity check, there's enough AIDs to branch at least once.
67+
match root with
68+
| Tree.Branch _ -> ()
69+
| _ -> failwith "Expected a branch root"
70+
71+
let rowsAid =
72+
rows
73+
|> List.map (fun row -> row |> Array.tail |> Array.insertAt 0 (List [ Integer 1L ]))
74+
75+
let rootAid, _ = rowsAid |> buildForest defaultAnonContext 1
76+
77+
match rootAid with
78+
| Tree.Leaf l ->
79+
let aidContributions = l.Data.Contributions.[0].AidContributions.Values |> Seq.toList
80+
81+
// There's a single AID contributing all rows.
82+
aidContributions.Length |> should equal 1
83+
aidContributions.Head |> should equal (float rows.Length)
84+
| _ -> failwith "Expected a leaf root"
85+
86+
[<Fact>]
87+
let ``Outliers are not dropped from 1-dim trees`` () =
88+
let root, _ =
89+
[
90+
[| List [ Integer 1 ]; Integer 1 |]
91+
[| List [ Integer 2 ]; Integer 5 |]
92+
[| List [ Integer 3 ]; Integer 2 |]
93+
[| List [ Integer 4 ]; Integer 7 |]
94+
[| List [ Integer 5 ]; Integer 21 |]
95+
[| List [ Integer 6 ]; Integer 4 |]
96+
[| List [ Integer 7 ]; Integer 21 |]
97+
[| List [ Integer 8 ]; Integer 28 |]
98+
[| List [ Integer 9 ]; Integer 19 |]
99+
[| List [ Integer 10 ]; Integer 2 |]
100+
[| List [ Integer 11 ]; Integer 1 |]
101+
[| List [ Integer 12 ]; Integer 13 |]
102+
[| List [ Integer 13 ]; Integer 25 |]
103+
[| List [ Integer 14 ]; Integer 30 |]
104+
[| List [ Integer 15 ]; Integer 6 |]
105+
[| List [ Integer 16 ]; Integer 2 |]
106+
[| List [ Integer 17 ]; Integer 15 |]
107+
[| List [ Integer 18 ]; Integer 24 |]
108+
[| List [ Integer 19 ]; Integer 9 |]
109+
[| List [ Integer 20 ]; Integer 100 |]
110+
[| List [ Integer 21 ]; Integer 0 |]
111+
]
112+
|> buildForest noiselessAnonContext 1
113+
114+
root |> Tree.nodeData |> Tree.noisyRowCount |> should equal 21L
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
module OpenDiffix.Core.AdaptiveBuckets.RangeTests
2+
3+
open System
4+
open Xunit
5+
open FsUnit.Xunit
6+
7+
open OpenDiffix.Core
8+
open OpenDiffix.Core.AdaptiveBuckets.Range
9+
open OpenDiffix.Core.AdaptiveBuckets.Forest
10+
11+
let valueToFloat value =
12+
value |> Tree.castValueToFloat |> Option.defaultValue Double.NaN
13+
14+
let createSnappedRange min max =
15+
(valueToFloat min, valueToFloat max) ||> createRange |> snapRange
16+
17+
[<Fact>]
18+
let ``Creates snapped ranges for ints`` () =
19+
(createSnappedRange (Integer 1) (Integer 2))
20+
|> should equal { Min = 1.0; Max = 2.0 }
21+
22+
(createSnappedRange (Integer 3) (Integer 7))
23+
|> should equal { Min = 0.0; Max = 8.0 }
24+
25+
(createSnappedRange (Integer 11) (Integer 21))
26+
|> should equal { Min = 8.0; Max = 24.0 }
27+
28+
(createSnappedRange (Integer 11) (Integer 14))
29+
|> should equal { Min = 10.0; Max = 14.0 }
30+
31+
(createSnappedRange (Integer -1) (Integer 2))
32+
|> should equal { Min = -2.0; Max = 2.0 }
33+
34+
(createSnappedRange (Integer -3) (Integer -2))
35+
|> should equal { Min = -3.0; Max = -2.0 }
36+
37+
(createSnappedRange (Integer -7) (Integer 0))
38+
|> should equal { Min = -8.0; Max = 0.0 }
39+
40+
(createSnappedRange (Integer 0) (Integer 5))
41+
|> should equal { Min = 0.0; Max = 8.0 }
42+
43+
(createSnappedRange (Integer -5) (Integer -2))
44+
|> should equal { Min = -6.0; Max = -2.0 }
45+
46+
(createSnappedRange (Integer -5) (Integer 7))
47+
|> should equal { Min = -8.0; Max = 8.0 }
48+
49+
(createSnappedRange (Integer -6) (Integer 2))
50+
|> should equal { Min = -8.0; Max = 8.0 }
51+
52+
(createSnappedRange (Integer 21) (Integer 23))
53+
|> should equal { Min = 21.0; Max = 23.0 }
54+
55+
(createSnappedRange (Integer 0) (Integer 0))
56+
|> should equal { Min = 0.0; Max = 1.0 }
57+
58+
59+
[<Fact>]
60+
let ``Creates snapped ranges for floats`` () =
61+
(createSnappedRange (Real 0.200000) (Real 0.400000))
62+
|> should equal { Min = 0.0; Max = 0.5 }
63+
64+
(createSnappedRange (Real 0.010000) (Real 0.100000))
65+
|> should equal { Min = 0.0; Max = 0.125 }
66+
67+
(createSnappedRange (Real -1.400000) (Real -0.300000))
68+
|> should equal { Min = -2.0; Max = -0.0 }
69+
70+
(createSnappedRange (Real 0.333000) (Real 0.780000))
71+
|> should equal { Min = 0.0; Max = 1.0 }
72+
73+
(createSnappedRange (Real 0.002000) (Real 0.010000))
74+
|> should equal { Min = 0.0; Max = 0.015625 }
75+
76+
(createSnappedRange (Real 0.660000) (Real 0.900000))
77+
|> should equal { Min = 0.5; Max = 1.0 }
78+
79+
(createSnappedRange (Real 10.001) (Real 10.002))
80+
|> should equal { Min = 10.0009765625; Max = 10.0029296875 }
81+
82+
(createSnappedRange (Real 158.88434124351295) (Real 158.94684124353768))
83+
|> should equal { Min = 158.875; Max = 159.0 }
84+
85+
(createSnappedRange (Real 0.0) (Real 1e-17))
86+
|> should equal { Min = 0.0; Max = Math.Pow(2.0, -56) }
87+
88+
(createSnappedRange (Real 0) (Real(Math.Pow(2.0, -1073))))
89+
|> should equal { Min = 0.0; Max = Math.Pow(2.0, -1073) }
90+
91+
(createSnappedRange (Real 0) (Real(Math.Pow(2.0, -1073) + Math.Pow(2.0, -1074))))
92+
|> should equal { Min = 0.0; Max = Math.Pow(2.0, -1072) }
93+
94+
[<Fact>]
95+
let ``Creates snapped ranges for timestamps`` () =
96+
(createSnappedRange (makeTimestamp (2004, 1, 3) (13, 25, 47)) (makeTimestamp (2005, 3, 5) (21, 13, 7)))
97+
|> should equal { Min = 6408896512.0; Max = 6476005376.0 }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
[<AutoOpen>]
2+
module OpenDiffix.Core.AdaptiveBuckets.TestHelpers
3+
4+
open System
5+
6+
open OpenDiffix.Core
7+
open OpenDiffix.Core.AdaptiveBuckets.Forest
8+
open OpenDiffix.Core.AdaptiveBuckets.Bucket
9+
open OpenDiffix.Core.AdaptiveBuckets.Microdata
10+
11+
let defaultAnonContext =
12+
{
13+
AnonymizationParams = AnonymizationParams.Default
14+
BucketSeed = 0UL
15+
BaseLabels = []
16+
}
17+
18+
let noiselessAnonContext =
19+
{ defaultAnonContext with
20+
AnonymizationParams =
21+
{ AnonymizationParams.Default with
22+
LayerNoiseSD = 0.
23+
Suppression = { LowThreshold = 3; LayerSD = 0.; LowMeanGap = 0. }
24+
OutlierCount = { Lower = 1; Upper = 1 }
25+
TopCount = { Lower = 1; Upper = 1 }
26+
}
27+
}
28+
29+
let makeTimestamp (year, month, day) (hour, minute, second) =
30+
Timestamp(DateTime(year, month, day, hour, minute, second, DateTimeKind.Utc))
31+
32+
let processDataWithParams anonContext columns rows =
33+
let columnTypes = columns |> List.map (fun column -> column.Type)
34+
let microdataColumns = extractValueMaps columnTypes rows
35+
let forest, nullMappings = rows |> buildForest anonContext columns.Length
36+
37+
forest
38+
|> harvestBuckets
39+
|> generateMicrodata microdataColumns nullMappings
40+
|> Seq.map Array.tail // Drop the dummy AID instances field.
41+
|> Seq.toList
42+
43+
let processData columns rows =
44+
processDataWithParams defaultAnonContext columns rows

src/OpenDiffix.Core.Tests/Analyzer.Tests.fs

+2
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,12 @@ type Tests(db: DBFixture) =
337337
AccessLevel = PublishTrusted
338338
Strict = false
339339
Suppression = { LowThreshold = 2; LowMeanGap = 0.0; LayerSD = 0. }
340+
AdaptiveBuckets = AdaptiveBucketsParams.Default
340341
OutlierCount = { Lower = 1; Upper = 1 }
341342
TopCount = { Lower = 1; Upper = 1 }
342343
LayerNoiseSD = 0.
343344
RecoverOutliers = true
345+
UseAdaptiveBuckets = false
344346
}
345347

346348
let queryContext accessLevel =

src/OpenDiffix.Core.Tests/Anonymizer.Tests.fs

+2
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,12 @@ let anonParams =
3838
AccessLevel = Direct
3939
Strict = false
4040
Suppression = { LowThreshold = 2; LowMeanGap = 0.0; LayerSD = 0. }
41+
AdaptiveBuckets = AdaptiveBucketsParams.Default
4142
OutlierCount = { Lower = 1; Upper = 1 }
4243
TopCount = { Lower = 1; Upper = 1 }
4344
LayerNoiseSD = 0.
4445
RecoverOutliers = true
46+
UseAdaptiveBuckets = false
4547
}
4648

4749
let aggContext = { GroupingLabels = [||]; Aggregators = [||] }

src/OpenDiffix.Core.Tests/HookTestHelpers.fs

+2
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ let private noiselessAnonParams: AnonymizationParams =
99
AccessLevel = Direct
1010
Strict = false
1111
Suppression = { LowThreshold = 3; LowMeanGap = 0.; LayerSD = 0. }
12+
AdaptiveBuckets = AdaptiveBucketsParams.Default
1213
OutlierCount = { Lower = 1; Upper = 1 }
1314
TopCount = { Lower = 1; Upper = 1 }
1415
LayerNoiseSD = 0.
1516
RecoverOutliers = true
17+
UseAdaptiveBuckets = false
1618
}
1719

1820
let private csvReader (csv: string) =

0 commit comments

Comments
 (0)