pytorch · msaroufim · May 2, 2025 · May 1, 2025 · dvrogozh · May 2, 2025
diff --git a/siamese_network/README.md b/siamese_network/README.md
@@ -1,7 +1,37 @@
 # Siamese Network Example
+Siamese network for image similarity estimation.
+The network is composed of two identical networks, one for each input.
+The output of each network is concatenated and passed to a linear layer.
+The output of the linear layer passed through a sigmoid function.
+[FaceNet](https://arxiv.org/pdf/1503.03832.pdf) is a variant of the Siamese network.
+This implementation varies from FaceNet as we use the `ResNet-18` model from
+[Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) as our feature extractor.
+In addition, we aren't using `TripletLoss` as the MNIST dataset is simple, so `BCELoss` can do the trick.
 
 ```bash
 pip install -r requirements.txt
 python main.py
 # CUDA_VISIBLE_DEVICES=2 python main.py  # to specify GPU id to ex. 2
 ```
+Optionally, you can add the following arguments to customize your execution.
+
+```bash
+--batch-size            input batch size for training (default: 64)
+--test-batch-size       input batch size for testing (default: 1000)
+--epochs                number of epochs to train (default: 14)
+--lr                    learning rate (default: 1.0)
+--gamma                 learning rate step gamma (default: 0.7)
+--accel                 use accelerator
+--dry-run               quickly check a single pass
+--seed                  random seed (default: 1)
+--log-interval          how many batches to wait before logging training status
+--save-model            Saving the current Model
+```
+
+To execute in an GPU, add the --accel argument to the command. For example:
+
+```bash
+python main.py --accel
+```
+
+This command will execute the example on the detected GPU.
diff --git a/siamese_network/main.py b/siamese_network/main.py
@@ -105,7 +105,7 @@ def group_examples(self):
         """
 
         # get the targets from MNIST dataset
-        np_arr = np.array(self.dataset.targets.clone())
+        np_arr = np.array(self.dataset.targets.clone(), dtype=None, copy=None)
 
         # group examples based on class
         self.grouped_examples = {}
@@ -247,10 +247,8 @@ def main():
                         help='learning rate (default: 1.0)')
     parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
                         help='Learning rate step gamma (default: 0.7)')
-    parser.add_argument('--no-cuda', action='store_true', default=False,
-                        help='disables CUDA training')
-    parser.add_argument('--no-mps', action='store_true', default=False,
-                        help='disables macOS GPU training')
+    parser.add_argument('--accel', action='store_true', 
 uv run main.py --epochs 1 --dry-run || error "siamese network example failed" 
 uv run main.py --epochs 1 --dry-run || error "siamese network example failed" 
+                    help='use accelerator')
     parser.add_argument('--dry-run', action='store_true', default=False,
                         help='quickly check a single pass')
     parser.add_argument('--seed', type=int, default=1, metavar='S',
@@ -260,22 +258,25 @@ def main():
     parser.add_argument('--save-model', action='store_true', default=False,
                         help='For Saving the current Model')
     args = parser.parse_args()
-
-    use_cuda = not args.no_cuda and torch.cuda.is_available()
-    use_mps = not args.no_mps and torch.backends.mps.is_available()
 
     torch.manual_seed(args.seed)
 
-    if use_cuda:
-        device = torch.device("cuda")
-    elif use_mps:
-        device = torch.device("mps")
+    if args.accel and not torch.accelerator.is_available():
+        print("ERROR: accelerator is not available, try running on CPU")
+        sys.exit(1)
+    if not args.accel and torch.accelerator.is_available():
+        print("WARNING: accelerator is available, run with --accel to enable it")
+
+    if args.accel:
+        device = torch.accelerator.current_accelerator()
     else:
         device = torch.device("cpu")
+
+    print(f"Using device: {device}")
 
     train_kwargs = {'batch_size': args.batch_size}
     test_kwargs = {'batch_size': args.test_batch_size}
-    if use_cuda:
+    if device=="cuda":
         cuda_kwargs = {'num_workers': 1,
                        'pin_memory': True,
                        'shuffle': True}

diff --git a/siamese_network/requirements.txt b/siamese_network/requirements.txt
@@ -1,2 +1,2 @@
 torch
-torchvision==0.20.0
+torchvision