FocoosAI · CuriousDolphin · Jun 6, 2025 · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/README.md b/README.md
@@ -72,7 +72,7 @@ valid_dataset = auto_dataset.get_split(augs=val_augs.get_augmentations(), split=
 model = ModelManager.get("fai-detr-l-obj365")
 
 args = TrainerArgs(
-    run_name=f"{ds_name}-{model.model_info.name}",
+    run_name=f"{model.name}_{train_dataset.name}",
     batch_size=16,
     max_iters=50,
     eval_period=50,

diff --git a/docs/training.md b/docs/training.md
@@ -90,14 +90,14 @@ Optionally, if you are using the hub, you can specify `sync_to_hub=True` to trac
 from focoos.ports import TrainerArgs
 
 args = TrainerArgs(
-    run_name="football-tutorial",  # the name of the experiment
-    output_dir="./experiments",  # the folder where the model is saved
+    run_name=f"{model.name}_{train_dataset.name}",  # the name of the experiment
+    output_dir="./experiments",  # the folder where the model is saved, DEFAULT  ~/FocoosAI/models"
     batch_size=16,  # how many images in each iteration
     max_iters=500,  # how many iterations lasts the training
     eval_period=100,  # period after we eval the model on the validation (in iterations)
     learning_rate=0.0001,  # learning rate
     weight_decay=0.0001,  # regularization strenght (set it properly to avoid under/over fitting)
-    sync_to_hub=True,  # Use this to see the model under training on the platform
+    sync_to_hub=True, # Use this to sync model info, weights and metrics on the platform
 )
 ```
 

diff --git a/focoos/data/datasets/map_dataset.py b/focoos/data/datasets/map_dataset.py
@@ -68,6 +68,14 @@ def __getitem__(self, idx):
             if retry_count >= 3:
                 self.logger.warning("Failed to apply `_map_func` for idx: {}, retry count: {}".format(idx, retry_count))
 
+    @property
+    def name(self):
+        return self.dataset.metadata.name
+
+    @property
+    def task(self):
+        return self.dataset.metadata.task
+
     def preview(self, index=None, use_augmentations=True):
         if not use_augmentations:
             current_augmentations = self.mapper.augmentations

diff --git a/focoos/models/focoos_model.py b/focoos/models/focoos_model.py
@@ -152,6 +152,7 @@ def _setup_model_for_training(self, train_args: TrainerArgs, data_train: MapData
         self.model_info.config["num_classes"] = len(data_train.dataset.metadata.classes)
         self._reload_model()
         self.model_info.name = train_args.run_name.strip()
+        self.processor = ProcessorManager.get_processor(self.model_info.model_family, self.model_info.config)
         assert self.model_info.task == data_train.dataset.metadata.task, "Task mismatch between model and dataset."
 
     def train(self, args: TrainerArgs, data_train: MapDataset, data_val: MapDataset, hub: Optional[FocoosHUB] = None):
@@ -252,6 +253,10 @@ def test(self, args: TrainerArgs, data_test: MapDataset):
         else:
             run_test(args, data_test, self.model, self.processor, self.model_info)
 
+    @property
+    def name(self):
+        return self.model_info.name
+
     @property
     def device(self):
         """Get the device where the model is located.
@@ -299,7 +304,7 @@ def task(self):
 
     def export(
         self,
-        runtime_type: RuntimeType = RuntimeType.ONNX_CUDA32,
+        runtime_type: RuntimeType = RuntimeType.TORCHSCRIPT_32,
         onnx_opset: int = 17,
         out_dir: Optional[str] = None,
         device: Literal["cuda", "cpu"] = "cuda",

diff --git a/focoos/trainer/hooks/sync_to_hub.py b/focoos/trainer/hooks/sync_to_hub.py
@@ -67,8 +67,8 @@ def after_step(self):
             )
 
     def after_train(self):
+        # Catch exception and sync training info, final weights will be synced in main trainer fn
         exc_type, exc_value, exc_traceback = sys.exc_info()
-        status = ModelStatus.TRAINING_COMPLETED
         if exc_type is not None:
             logger.error(
                 f"Exception during training, status set to TRAINING_ERROR: {str(exc_type.__name__)} {str(exc_value)}"
@@ -88,23 +88,20 @@ def after_train(self):
                         detail=f"{str(exc_type.__name__)}:  {str(exc_value)}",
                     )
                 )
-
-        self.model_info.dump_json(os.path.join(self.output_dir, ArtifactName.INFO))
-        self._sync_train_job(
-            sync_info=HubSyncLocalTraining(
-                status=status,
-                iterations=self.iteration,
-                training_info=self.model_info.training_info,
-            ),
-            upload_artifacts=[
-                ArtifactName.WEIGHTS,
-                ArtifactName.LOGS,
-                ArtifactName.PT,
-                ArtifactName.ONNX,
-                ArtifactName.INFO,
-                ArtifactName.METRICS,
-            ],
-        )
+            self.model_info.dump_json(os.path.join(self.output_dir, ArtifactName.INFO))
+            self._sync_train_job(
+                sync_info=HubSyncLocalTraining(
+                    status=status,
+                    iterations=self.iteration,
+                    training_info=self.model_info.training_info,
+                ),
+                upload_artifacts=[
+                    ArtifactName.WEIGHTS,
+                    ArtifactName.LOGS,
+                    ArtifactName.INFO,
+                    ArtifactName.METRICS,
+                ],
+            )
 
     def _sync_train_job(self, sync_info: HubSyncLocalTraining, upload_artifacts: Optional[List[ArtifactName]] = None):
         try:

diff --git a/focoos/trainer/hooks/visualization.py b/focoos/trainer/hooks/visualization.py
@@ -125,75 +125,78 @@ def _create_mosaic(self, images):
     def _visualize(self):
         training_mode = self.model.training
 
-        with ExitStack() as stack:
-            stack.enter_context(torch.no_grad())
-            stack.enter_context(inference_context(self.model))
-            stack.enter_context(inference_context(self.processor))
-
-            storage = get_event_storage()
-            self.model.eval()
-
-            all_visualized_images = []
-
-            for i in range(self.n_sample):
-                sample = self.samples[i]
-                sample["height"], sample["width"] = sample["image"].shape[-2:]
-
-                samples = [sample]
-                images, _ = self.processor.preprocess(samples, device=self.model.device, dtype=self.model.dtype)
-                outputs = self.model(images)
-                prediction = self.processor.eval_postprocess(outputs, samples)[0]
-
-                visualizer = Visualizer(
-                    sample["image"].permute(1, 2, 0).cpu().numpy(),
-                    self.metadata,
-                    instance_mode=ColorMode.IMAGE,
-                )
-                if "panoptic_seg" in prediction:
-                    panoptic_seg, segments_info = prediction["panoptic_seg"]
-                    vis_output = visualizer.draw_panoptic_seg_predictions(
-                        panoptic_seg.to(self.cpu_device), segments_info
+        try:
+            with ExitStack() as stack:
+                stack.enter_context(torch.no_grad())
+                stack.enter_context(inference_context(self.model))
+                stack.enter_context(inference_context(self.processor))
+
+                storage = get_event_storage()
+                self.model.eval()
+
+                all_visualized_images = []
+
+                for i in range(self.n_sample):
+                    sample = self.samples[i]
+                    sample["height"], sample["width"] = sample["image"].shape[-2:]
+
+                    samples = [sample]
+                    images, _ = self.processor.preprocess(samples, device=self.model.device, dtype=self.model.dtype)
+                    outputs = self.model(images)
+                    prediction = self.processor.eval_postprocess(outputs, samples)[0]
+
+                    visualizer = Visualizer(
+                        sample["image"].permute(1, 2, 0).cpu().numpy(),
+                        self.metadata,
+                        instance_mode=ColorMode.IMAGE,
                     )
-                elif "sem_seg" in prediction:
-                    vis_output = visualizer.draw_sem_seg(prediction["sem_seg"].argmax(dim=0).to(self.cpu_device))
-                elif "instances" in prediction:
-                    instances = prediction["instances"].to(self.cpu_device)
-                    # filter based on confidence - fixed at 0.5
-                    instances = instances[instances.scores > 0.5]
-                    vis_output = visualizer.draw_instance_predictions(predictions=instances)
-                else:
-                    vis_output = None
-
-                if vis_output is not None:
-                    pred_img = vis_output.get_image()
-                    # Non salviamo più i singoli samples nello storage
-                    all_visualized_images.append(pred_img)
-
-            # Create and save mosaic if we have images and output directory
-            if all_visualized_images:
-                # Get current iteration for filename
-                try:
-                    current_iter = self.trainer.iter
-                except (AttributeError, TypeError):
-                    current_iter = 0
-
-                # Create mosaic
-                mosaic = self._create_mosaic(all_visualized_images)
-
-                if mosaic is not None:
-                    # Salva il mosaico nello storage invece dei singoli samples
-                    mosaic_transposed = mosaic.transpose(2, 0, 1)  # HWC -> CHW
-                    storage.put_image("Samples_Mosaic", mosaic_transposed)
-
-                    # Save to disk if output_dir is provided
-                    if self.output_dir is not None:
-                        preview_dir = os.path.join(self.output_dir, "preview")
-                        os.makedirs(preview_dir, exist_ok=True)
-
-                        # Include iteration in filename
-                        output_path = os.path.join(preview_dir, f"samples_iter_{current_iter}.jpg")
-                        encode_params = [cv2.IMWRITE_JPEG_QUALITY, 80]
-                        cv2.imwrite(output_path, mosaic, encode_params)
+                    if "panoptic_seg" in prediction:
+                        panoptic_seg, segments_info = prediction["panoptic_seg"]
+                        vis_output = visualizer.draw_panoptic_seg_predictions(
+                            panoptic_seg.to(self.cpu_device), segments_info
+                        )
+                    elif "sem_seg" in prediction:
+                        vis_output = visualizer.draw_sem_seg(prediction["sem_seg"].argmax(dim=0).to(self.cpu_device))
+                    elif "instances" in prediction:
+                        instances = prediction["instances"].to(self.cpu_device)
+                        # filter based on confidence - fixed at 0.5
+                        instances = instances[instances.scores > 0.5]
+                        vis_output = visualizer.draw_instance_predictions(predictions=instances)
+                    else:
+                        vis_output = None
+
+                    if vis_output is not None:
+                        pred_img = vis_output.get_image()
+                        # Non salviamo più i singoli samples nello storage
+                        all_visualized_images.append(pred_img)
+
+                # Create and save mosaic if we have images and output directory
+                if all_visualized_images:
+                    # Get current iteration for filename
+                    try:
+                        current_iter = self.trainer.iter
+                    except (AttributeError, TypeError):
+                        current_iter = 0
+
+                    # Create mosaic
+                    mosaic = self._create_mosaic(all_visualized_images)
+
+                    if mosaic is not None:
+                        # Salva il mosaico nello storage invece dei singoli samples
+                        mosaic_transposed = mosaic.transpose(2, 0, 1)  # HWC -> CHW
+                        storage.put_image("Samples_Mosaic", mosaic_transposed)
+
+                        # Save to disk if output_dir is provided
+                        if self.output_dir is not None:
+                            preview_dir = os.path.join(self.output_dir, "preview")
+                            os.makedirs(preview_dir, exist_ok=True)
+
+                            # Include iteration in filename
+                            output_path = os.path.join(preview_dir, f"samples_iter_{current_iter}.jpg")
+                            encode_params = [cv2.IMWRITE_JPEG_QUALITY, 80]
+                            cv2.imwrite(output_path, mosaic, encode_params)
+        except Exception as e:
+            logger.warning(f"Exception during visualization hook: {e}")
 
         # set model back to training mode
         self.model.train(training_mode)

diff --git a/focoos/trainer/trainer.py b/focoos/trainer/trainer.py
@@ -21,7 +21,7 @@
 from focoos.hub.remote_model import RemoteModel
 from focoos.models.focoos_model import BaseModelNN
 from focoos.nn.layers.norm import FrozenBatchNorm2d
-from focoos.ports import ArtifactName, ModelInfo, ModelStatus, Task, TrainerArgs, TrainingInfo
+from focoos.ports import ArtifactName, HubSyncLocalTraining, ModelInfo, ModelStatus, Task, TrainerArgs, TrainingInfo
 from focoos.processor.base_processor import Processor
 from focoos.trainer.checkpointer import Checkpointer
 from focoos.trainer.evaluation.evaluator import inference_on_dataset
@@ -478,6 +478,19 @@ def train(self):
         trainer_loop.train(start_iter=start_iter, max_iter=args.max_iters)
         self.finished = True
         self.finish()
+        if comm.is_main_process() and self.remote_model and self.args.sync_to_hub:
+            self.remote_model.sync_local_training_job(
+                local_training_info=HubSyncLocalTraining(
+                    status=ModelStatus.TRAINING_COMPLETED,
+                    iterations=self.args.max_iters,
+                    training_info=self.model_info.training_info,
+                ),
+                dir=self.output_dir,
+                upload_artifacts=[
+                    ArtifactName.WEIGHTS,
+                    ArtifactName.METRICS,
+                ],
+            )
 
     def test(self, restore_best: bool = False):
         """Run model evaluation on test set.
@@ -516,8 +529,6 @@ def test(self, restore_best: bool = False):
             ):
                 self.model_info.val_metrics = raw_metrics
 
-        self.finished = True
-        self.finish()
         return eval_result
 
     def _update_training_info_and_dump(self, new_status: ModelStatus, detail: Optional[str] = None):

diff --git a/focoos/utils/visualizer.py b/focoos/utils/visualizer.py
@@ -487,11 +487,10 @@ def _create_text_labels(classes, scores, class_names, is_crowd=None):
         list[str] or None
     """
     labels = None
-    if classes is not None:
-        if class_names is not None and len(class_names) > 0:
-            labels = [class_names[i] for i in classes]
-        else:
-            labels = [str(i) for i in classes]
+    if classes is not None and class_names is not None:
+        labels = [class_names[i] if i < len(class_names) else str(i) for i in classes]
+    else:
+        labels = [str(i) for i in classes]
     if scores is not None:
         if labels is None:
             labels = ["{:.0f}%".format(s * 100) for s in scores]

diff --git a/pyproject.toml b/pyproject.toml
@@ -51,7 +51,6 @@ dependencies = [
     "pycocotools~=2.0.8",
     "faster_coco_eval~=1.6.6",
     "tensorboard~=2.19.0",
-
     "orjson~=3.10.18",
     "gradio~=5.31.0",
     "torch~=2.7.0",
@@ -70,7 +69,6 @@ keywords = [
 tensorrt = ["tensorrt==10.5.0"]
 onnx = ["onnxruntime-gpu==1.22.0", "onnx>=1.17.0", "onnxslim~=0.1.54", "onnxscript~=0.2.7"]
 onnx-cpu = ["onnxruntime==1.22.0","onnx>=1.18.0", "onnxslim~=0.1.54", "onnxscript~=0.2.7"]
-
 dev = [
     "pytest",
     "pytest-cov",

diff --git a/tutorials/hub.ipynb b/tutorials/hub.ipynb
@@ -207,7 +207,7 @@
     "    model = ModelManager.get(\"fai-detr-l-obj365\")\n",
     "\n",
     "    args = TrainerArgs(\n",
-    "        run_name=f\"{remote_dataset.name}-{model.model_info.name}\",\n",
+    "        run_name=f\"{model.name}_{train_dataset.name}\",\n",
     "        output_dir=\"./experiments\",\n",
     "        amp_enabled=True,\n",
     "        batch_size=16,\n",
@@ -221,7 +221,9 @@
     "        sync_to_hub=True,  # use this to sync model info, weights and metrics on the hub\n",
     "    )\n",
     "\n",
-    "    model.train(args, train_dataset, valid_dataset, hub=hub)"
+    "    model.train(\n",
+    "        args, train_dataset, valid_dataset, hub=hub\n",
+    "    )  # Hub is optional, if not provided and sync_to_hub is True, will be created automatically"
    ]
   }
  ],

diff --git a/tutorials/inference.ipynb b/tutorials/inference.ipynb
@@ -15,12 +15,10 @@
    ]
   },
   {
-   "cell_type": "raw",
-   "metadata": {
-    "vscode": {
-     "languageId": "raw"
-    }
-   },
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "%pip install 'focoos @ git+https://github.com/FocoosAI/focoos.git'"
    ]