Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: lxuechen/private-transformers
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.2.3
Choose a base ref
...
head repository: lxuechen/private-transformers
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: main
Choose a head ref
  • 10 commits
  • 48 files changed
  • 1 contributor

Commits on Aug 15, 2022

  1. Copy the full SHA
    900a9a9 View commit details
  2. Fix scripts for plotting.

    lxuechen committed Aug 15, 2022
    Copy the full SHA
    6f3a1e5 View commit details

Commits on Sep 13, 2022

  1. Clean lora utils.

    lxuechen committed Sep 13, 2022
    Copy the full SHA
    a43c1be View commit details
  2. Lora easy access.

    lxuechen committed Sep 13, 2022
    Copy the full SHA
    0b275b5 View commit details

Commits on Nov 4, 2022

  1. Add figure.

    lxuechen committed Nov 4, 2022
    Copy the full SHA
    f9ec3eb View commit details
  2. Update visuals.

    lxuechen committed Nov 4, 2022
    Copy the full SHA
    3f265d6 View commit details

Commits on Nov 8, 2022

  1. Refine visuals.

    lxuechen committed Nov 8, 2022
    Copy the full SHA
    42b7d2d View commit details

Commits on Dec 9, 2022

  1. Patch licenses.

    lxuechen committed Dec 9, 2022
    Copy the full SHA
    d6e4cc4 View commit details
  2. Patch license.

    lxuechen committed Dec 9, 2022
    Copy the full SHA
    ef97da7 View commit details
  3. Citation.

    lxuechen committed Dec 9, 2022
    Copy the full SHA
    18ccc4e View commit details
Showing with 983 additions and 12 deletions.
  1. +10 −0 README.md
  2. +13 −0 examples/__init__.py
  3. +13 −0 examples/classification/__init__.py
  4. +14 −0 examples/classification/data/make_k_shot_without_dev.py
  5. +14 −0 examples/classification/data/make_valid_data.py
  6. +14 −0 examples/classification/run_classification.py
  7. +14 −0 examples/classification/run_wrapper.py
  8. BIN examples/classification/spectral_analysis/3d_surface.png
  9. +13 −0 examples/classification/spectral_analysis/__init__.py
  10. +167 −0 examples/classification/spectral_analysis/density.py
  11. +14 −0 examples/classification/spectral_analysis/geometric_median.py
  12. +17 −2 examples/classification/spectral_analysis/rebuttal_neurips_2022.py
  13. +19 −4 examples/classification/spectral_analysis/rebuttal_plots_neurips_2022.py
  14. +126 −0 examples/classification/spectral_analysis/visuals.ipynb
  15. +13 −0 examples/classification/src/__init__.py
  16. +14 −0 examples/classification/src/common.py
  17. +14 −0 examples/classification/src/compiled_args.py
  18. +14 −0 examples/classification/src/dataset.py
  19. +14 −0 examples/classification/src/label_search.py
  20. +14 −0 examples/classification/src/models.py
  21. +14 −0 examples/classification/src/processors.py
  22. +1 −2 examples/classification/src/trainer.py
  23. +13 −0 examples/image_classification/__init__.py
  24. +14 −0 examples/image_classification/main.py
  25. +13 −0 examples/table2text/__init__.py
  26. +14 −0 examples/table2text/compiled_args.py
  27. +13 −0 examples/table2text/data_utils/__init__.py
  28. +14 −0 examples/table2text/data_utils/data_collator.py
  29. +14 −0 examples/table2text/data_utils/language_modeling.py
  30. +14 −0 examples/table2text/decoding_utils.py
  31. +1 −2 examples/table2text/density.py
  32. +14 −0 examples/table2text/misc.py
  33. +14 −0 examples/table2text/models.py
  34. +1 −0 examples/table2text/run_language_modeling.py
  35. +14 −0 examples/table2text/trainer.py
  36. +15 −0 private_transformers/__init__.py
  37. +13 −0 private_transformers/accounting/__init__.py
  38. +14 −0 private_transformers/accounting/accounting_manager.py
  39. +16 −2 private_transformers/accounting/rdp_accounting.py
  40. +15 −0 private_transformers/autograd_grad_sample.py
  41. +113 −0 private_transformers/lora_utils.py
  42. +15 −0 private_transformers/privacy_engine.py
  43. +14 −0 private_transformers/settings.py
  44. +14 −0 private_transformers/supported_layers_grad_samplers.py
  45. +14 −0 private_transformers/transformers_support.py
  46. +14 −0 setup.py
  47. +13 −0 tests/__init__.py
  48. +14 −0 tests/test_privacy_engine.py
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -185,4 +185,14 @@ If you found this codebase useful in your research, please consider citing:
year={2022},
url={https://openreview.net/forum?id=bVuP3ltATMz}
}
@inproceedings{
li2022when,
title={When Does Differentially Private Learning Not Suffer in High Dimensions?},
author={Xuechen Li and Daogao Liu and Tatsunori Hashimoto and Huseyin A Inan and Janardhan Kulkarni and YinTat Lee and Abhradeep Guha Thakurta},
booktitle={Advances in Neural Information Processing Systems},
editor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},
year={2022},
url={https://openreview.net/forum?id=FR--mkQu0dw}
}
```
13 changes: 13 additions & 0 deletions examples/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions examples/classification/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 changes: 14 additions & 0 deletions examples/classification/data/make_k_shot_without_dev.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""The datasets in the k-shot folder contain dev.tsv; we make the test set the dev set in the new k-shot.
python -m classification.data.make_k_shot_without_dev
14 changes: 14 additions & 0 deletions examples/classification/data/make_valid_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Make the separate validation data, so that we don't tune on dev set.
python -m classification.data.make_valid_data
14 changes: 14 additions & 0 deletions examples/classification/run_classification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Finetuning the library models for sequence classification on GLUE."""

import collections
14 changes: 14 additions & 0 deletions examples/classification/run_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Wrapper launcher script."""

import os
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions examples/classification/spectral_analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
167 changes: 167 additions & 0 deletions examples/classification/spectral_analysis/density.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Code for converting Lanczos outputs to densities."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

import numpy as np


def eigv_to_density(eig_vals, all_weights=None, grids=None,
grid_len=10000, sigma_squared=None, grid_expand=1e-2):
"""Compute the smoothed spectral density from a set of eigenvalues.
Convolves the given eigenvalues with a Gaussian kernel, weighting the values
by all_weights (or uniform weighting if all_weights is None). Example output
can be seen in Figure 1 of https://arxiv.org/pdf/1901.10159.pdf. Visualizing
the estimated density can be done by calling plt.plot(grids, density). There
is likely not a best value of sigma_squared that works for all use cases,
so it is recommended to try multiple values in the range [1e-5,1e-1].
Args:
eig_vals: Array of shape [num_draws, order]
all_weights: Array of shape [num_draws, order], if None then weights will be
taken to be uniform.
grids: Array of shape [grid_len], the smoothed spectrum will be plotted
in the interval [grids[0], grids[-1]]. If None then grids will be
computed based on max and min eigenvalues and grid length.
grid_len: Integer specifying number of grid cells to use, only used if
grids is None
sigma_squared: Scalar. Controls the smoothing of the spectrum estimate.
If None, an appropriate value is inferred.
grid_expand: Controls the window of values that grids spans.
grids[0] = smallest eigenvalue - grid_expand.
grids[-1] = largest_eigenvalue + grid_expand.
Returns:
density: Array of shape [grid_len], the estimated density, averaged over
all draws.
grids: Array of shape [grid_len]. The values the density is estimated on.
"""
if all_weights is None:
all_weights = np.ones(eig_vals.shape) * 1.0 / float(eig_vals.shape[1])
num_draws = eig_vals.shape[0]

lambda_max = np.nanmean(np.max(eig_vals, axis=1), axis=0) + grid_expand
lambda_min = np.nanmean(np.min(eig_vals, axis=1), axis=0) - grid_expand

if grids is None:
assert grid_len is not None, 'grid_len is required if grids is None.'
grids = np.linspace(lambda_min, lambda_max, num=grid_len)

grid_len = grids.shape[0]
if sigma_squared is None:
sigma = 10 ** -5 * max(1, (lambda_max - lambda_min))
else:
sigma = sigma_squared * max(1, (lambda_max - lambda_min))

density_each_draw = np.zeros((num_draws, grid_len))
for i in range(num_draws):

if np.isnan(eig_vals[i, 0]):
raise ValueError('tridaig has nan values.')
else:
for j in range(grid_len):
x = grids[j]
vals = _kernel(eig_vals[i, :], x, sigma)
density_each_draw[i, j] = np.sum(vals * all_weights[i, :])
density = np.nanmean(density_each_draw, axis=0)
norm_fact = np.sum(density) * (grids[1] - grids[0])
density = density / norm_fact
return density, grids


def tridiag_to_eigv(tridiag_list):
"""Preprocess the tridiagonal matrices for density estimation.
Args:
tridiag_list: Array of shape [num_draws, order, order] List of the
tridiagonal matrices computed from running num_draws independent runs
of lanczos. The output of this function can be fed directly into
eigv_to_density.
Returns:
eig_vals: Array of shape [num_draws, order]. The eigenvalues of the
tridiagonal matricies.
all_weights: Array of shape [num_draws, order]. The weights associated with
each eigenvalue. These weights are to be used in the kernel density
estimate.
"""
# Calculating the node / weights from Jacobi matrices.
num_draws = len(tridiag_list)
num_lanczos = tridiag_list[0].shape[0]
eig_vals = np.zeros((num_draws, num_lanczos))
all_weights = np.zeros((num_draws, num_lanczos))
for i in range(num_draws):
nodes, evecs = np.linalg.eigh(tridiag_list[i])
index = np.argsort(nodes)
nodes = nodes[index]
evecs = evecs[:, index]
eig_vals[i, :] = nodes
all_weights[i, :] = evecs[0] ** 2
return eig_vals, all_weights


def tridiag_to_density(tridiag_list, sigma_squared=1e-5, grid_len=10000):
"""This function estimates the smoothed density from the output of lanczos.
Args:
tridiag_list: Array of shape [num_draws, order, order] List of the
tridiagonal matrices computed from running num_draws independent runs
of lanczos.
sigma_squared: Controls the smoothing of the density.
grid_len: Controls the granularity of the density.
Returns:
density: Array of size [grid_len]. The smoothed density estimate averaged
over all num_draws.
grids: Array of size [grid_len]. The values the density estimate is on.
"""
eig_vals, all_weights = tridiag_to_eigv(tridiag_list)
density, grids = eigv_to_density(eig_vals, all_weights,
grid_len=grid_len,
sigma_squared=sigma_squared)
return density, grids


def _kernel(x, x0, variance):
"""Point estimate of the Gaussian kernel.
This function computes the Gaussian kernel for
C exp(-(x - x0) ^2 /(2 * variance)) where C is the appropriate normalization.
variance should be a list of length 1. Either x0 or x should be a scalar. Only
one of the x or x0 can be a numpy array.
Args:
x: Can be either scalar or array of shape [order]. Points to estimate
the kernel on.
x0: Scalar. Mean of the kernel.
variance: Scalar. Variance of the kernel.
Returns:
point_estimate: A scalar corresponding to
C exp(-(x - x0) ^2 /(2 * variance)).
"""
coeff = 1.0 / np.sqrt(2 * math.pi * variance)
val = -(x0 - x) ** 2
val = val / (2.0 * variance)
val = np.exp(val)
point_estimate = coeff * val
return point_estimate
14 changes: 14 additions & 0 deletions examples/classification/spectral_analysis/geometric_median.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Toy example on geometric median estimation in the paper.
19 changes: 17 additions & 2 deletions examples/classification/spectral_analysis/rebuttal_neurips_2022.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
# Copyright (c) Xuechen Li. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Experiments ran pre- and post-rebuttals."""
import logging
import os
from typing import Optional

import fire
import torch
import tqdm
@@ -47,6 +62,7 @@ def run_pca(
eval_steps=5, # Evaluate PCA accuracy once this many iterations.
save_steps=5, # Save eigenvalue and eigenvector tensors once this many iterations.
disable_tqdm=False,
dtype="float", # String repr of dtype.
):
utils.manual_seed(seed)

@@ -57,7 +73,7 @@ def run_pca(
tgt_ckpts = all_ckpts[start_index:start_index + n]
dataset = torch.stack([
torch.load(ckpt_path)["flat_grad"] for ckpt_path in tqdm.tqdm(tgt_ckpts, desc="load data")
])
]).to(utils.get_dtype(dtype))
input_mat = DataLoader(dataset=TensorDataset(dataset), batch_size=batch_size)

def callback(global_step, eigenvalues, eigenvectors):
@@ -80,7 +96,6 @@ def callback(global_step, eigenvalues, eigenvectors):
input_mat=input_mat,
k=k,
num_power_iteration=num_power_iteration,
dtype=torch.get_default_dtype(),
callback=callback,
disable_tqdm=disable_tqdm,
)
Loading