Make scalar bias term optional in _batchnorm.py (e3nn#416)

amorehead · web-flow · commit a221b2a54981 · 2023-09-03T00:23:35.000+02:00
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Added
+- Optional scalar bias term in `_batchnorm.py`
 
 ## [0.5.1] - 2022-12-12
 ### Added
diff --git a/e3nn/nn/_batchnorm.py b/e3nn/nn/_batchnorm.py
@@ -32,6 +32,12 @@ class BatchNorm(nn.Module):
 
     instance : bool
         apply instance norm instead of batch norm
+
+    include_bias : bool
+        include a bias term for batch norm of scalars
+
+    normalization : str
+        which normalization method to apply (i.e., `norm` or `component`)
     """
 
     __constants__ = ["instance", "normalization", "irs", "affine"]
@@ -44,6 +50,7 @@ def __init__(
         affine: bool = True,
         reduce: str = "mean",
         instance: bool = False,
+        include_bias: bool = True,
         normalization: str = "component",
     ) -> None:
         super().__init__()
@@ -53,6 +60,7 @@ def __init__(
         self.momentum = momentum
         self.affine = affine
         self.instance = instance
+        self.include_bias = include_bias
 
         num_scalar = sum(mul for mul, ir in self.irreps if ir.is_scalar())
         num_features = self.irreps.num_irreps
@@ -67,10 +75,12 @@ def __init__(
 
         if affine:
             self.weight = nn.Parameter(torch.ones(num_features))
-            self.bias = nn.Parameter(torch.zeros(num_scalar))
+            if self.include_bias:
+                self.bias = nn.Parameter(torch.zeros(num_scalar))
         else:
             self.register_parameter("weight", None)
-            self.register_parameter("bias", None)
+            if self.include_bias:
+                self.register_parameter("bias", None)
 
         assert isinstance(reduce, str), "reduce should be passed as a string value"
         assert reduce in ["mean", "max"], "reduce needs to be 'mean' or 'max'"
@@ -171,7 +181,7 @@ def forward(self, input) -> torch.Tensor:
 
             field = field * field_norm.reshape(-1, 1, mul, 1)  # [batch, sample, mul, repr]
 
-            if self.affine and is_scalar:
+            if self.affine and self.include_bias and is_scalar:
                 bias = self.bias[ib : ib + mul]  # [mul]
                 ib += mul
                 field += bias.reshape(mul, 1)  # [batch, sample, mul, repr]
@@ -185,7 +195,8 @@ def forward(self, input) -> torch.Tensor:
             torch._assert(irv == self.running_var.size(0), "irv == self.running_var.size(0)")
         if self.affine:
             torch._assert(iw == self.weight.size(0), "iw == self.weight.size(0)")
-            torch._assert(ib == self.bias.numel(), "ib == self.bias.numel()")
+            if self.include_bias:
+                torch._assert(ib == self.bias.numel(), "ib == self.bias.numel()")
 
         if self.training and not self.instance:
             if len(new_means) > 0:
diff --git a/e3nn/o3/_tensor_product/_sub.py b/e3nn/o3/_tensor_product/_sub.py
@@ -102,7 +102,6 @@ class ElementwiseTensorProduct(TensorProduct):
     """
 
     def __init__(self, irreps_in1, irreps_in2, filter_ir_out=None, irrep_normalization: str = None, **kwargs) -> None:
-
         irreps_in1 = o3.Irreps(irreps_in1).simplify()
         irreps_in2 = o3.Irreps(irreps_in2).simplify()
         if filter_ir_out is not None:
@@ -135,7 +134,6 @@ def __init__(self, irreps_in1, irreps_in2, filter_ir_out=None, irrep_normalizati
         for i, ((mul, ir_1), (mul_2, ir_2)) in enumerate(zip(irreps_in1, irreps_in2)):
             assert mul == mul_2
             for ir in ir_1 * ir_2:
-
                 if filter_ir_out is not None and ir not in filter_ir_out:
                     continue
 
@@ -179,7 +177,6 @@ def __init__(
         irrep_normalization: str = None,
         **kwargs,
     ) -> None:
-
         irreps_in1 = o3.Irreps(irreps_in1).simplify()
         irreps_in2 = o3.Irreps(irreps_in2).simplify()
         if filter_ir_out is not None:
@@ -193,7 +190,6 @@ def __init__(
         for i_1, (mul_1, ir_1) in enumerate(irreps_in1):
             for i_2, (mul_2, ir_2) in enumerate(irreps_in2):
                 for ir_out in ir_1 * ir_2:
-
                     if filter_ir_out is not None and ir_out not in filter_ir_out:
                         continue
 
@@ -238,7 +234,6 @@ def _square_instructions_full(irreps_in, filter_ir_out=None, irrep_normalization
     for i_1, (mul_1, ir_1) in enumerate(irreps_in):
         for i_2, (mul_2, ir_2) in enumerate(irreps_in):
             for ir_out in ir_1 * ir_2:
-
                 if filter_ir_out is not None and ir_out not in filter_ir_out:
                     continue
 
@@ -311,7 +306,6 @@ def _square_instructions_fully_connected(irreps_in, irreps_out, irrep_normalizat
         for i_2, (_mul_2, ir_2) in enumerate(irreps_in):
             for i_out, (_mul_out, ir_out) in enumerate(irreps_out):
                 if ir_out in ir_1 * ir_2:
-
                     if irrep_normalization == "component":
                         alpha = ir_out.dim
                     if irrep_normalization == "norm":
@@ -374,7 +368,6 @@ def __init__(
         irrep_normalization: str = None,
         **kwargs,
     ) -> None:
-
         if irrep_normalization is None:
             irrep_normalization = "component"
 
diff --git a/examples/s2cnn/mnist/gendata.py b/examples/s2cnn/mnist/gendata.py
@@ -199,15 +199,13 @@ def main() -> None:
     no_rotate = {"train": args.no_rotate_train, "test": args.no_rotate_test}
 
     for label, data in zip(["train", "test"], [mnist_train, mnist_test]):
-
         print(f"projecting {label} data set")
         current = 0
         signals = data["images"].reshape(-1, 28, 28).astype(np.float64)
         n_signals = signals.shape[0]
         projections = np.ndarray((signals.shape[0], 2 * args.bandwidth, 2 * args.bandwidth), dtype=np.uint8)
 
         while current < n_signals:
-
             if not no_rotate[label]:
                 rot = rand_rotation_matrix(deflection=args.noise)
                 rotated_grid = rotate_grid(rot, grid)
diff --git a/examples/s2cnn/mnist/train.py b/examples/s2cnn/mnist/train.py
@@ -135,7 +135,6 @@ def forward(self, x):
 
 
 def load_data(path, batch_size):
-
     with gzip.open(path, "rb") as f:
         dataset = pickle.load(f)
 
@@ -192,7 +191,6 @@ def main() -> None:
         correct = 0
         total = 0
         for images, labels in test_loader:
-
             classifier.eval()
 
             with torch.no_grad():
diff --git a/tests/o3/cartesian_spherical_harmonics_test.py b/tests/o3/cartesian_spherical_harmonics_test.py
@@ -77,7 +77,6 @@ def func(pos):
 
 @pytest.mark.parametrize("l", range(10 + 1))
 def test_normalization(float_tolerance, l) -> None:
-
     n = o3.spherical_harmonics(l, torch.randn(3), normalize=True, normalization="integral").pow(2).mean()
     assert abs(n - 1 / (4 * math.pi)) < float_tolerance