Revert "Multi Target Support (#651)" (#673)

TaperChipmunk32 · web-flow · commit 2669e2fcfe1f · 2025-03-06T09:14:54.000-06:00
This reverts commit a063c55.
diff --git a/silnlp/nmt/config.py b/silnlp/nmt/config.py
@@ -320,7 +320,6 @@ def translate_test_files(
         self,
         input_paths: List[Path],
         translation_paths: List[Path],
-        src_trg_isos: List[Tuple[str, str]],
         produce_multiple_translations: bool = False,
         vref_paths: Optional[List[Path]] = None,
         ckpt: Union[CheckpointType, str, int] = CheckpointType.LAST,
diff --git a/silnlp/nmt/hugging_face_config.py b/silnlp/nmt/hugging_face_config.py
@@ -1093,29 +1093,25 @@ def translate_test_files(
         self,
         input_paths: List[Path],
         translation_paths: List[Path],
-        src_trg_isos: List[Tuple[str, str]],
         produce_multiple_translations: bool = False,
         vref_paths: Optional[List[Path]] = None,
         ckpt: Union[CheckpointType, str, int] = CheckpointType.LAST,
     ) -> None:
-        lang_codes: Dict[str, str] = self._config.data["lang_codes"]
         tokenizer = self._config.get_tokenizer()
         model = self._create_inference_model(ckpt, tokenizer)
-        model.to(0)
-        model = torch.compile(model)
-        for input_path, translation_path, src_trg_iso, vref_path in zip(
+        pipeline = PretokenizedTranslationPipeline(
+            model=model,
+            tokenizer=tokenizer,
+            src_lang=self._config.test_src_lang,
+            tgt_lang=self._config.test_trg_lang,
+            device=0,
+        )
+        pipeline.model = torch.compile(pipeline.model)
+        for input_path, translation_path, vref_path in zip(
             input_paths,
             translation_paths,
-            src_trg_isos,
             cast(Iterable[Optional[Path]], repeat(None) if vref_paths is None else vref_paths),
         ):
-            pipeline = PretokenizedTranslationPipeline(
-                model=model,
-                tokenizer=tokenizer,
-                src_lang=lang_codes.get(src_trg_iso[0]),
-                tgt_lang=lang_codes.get(src_trg_iso[1]),
-                device=0,
-            )
             length = count_lines(input_path)
             with ExitStack() as stack:
                 src_file = stack.enter_context(input_path.open("r", encoding="utf-8-sig"))
diff --git a/silnlp/nmt/test.py b/silnlp/nmt/test.py
@@ -369,7 +369,6 @@ def test_checkpoint(
     translation_file_names: List[str] = []
     refs_patterns: List[str] = []
     translation_detok_file_names: List[str] = []
-    src_trg_isos: List[Tuple[str, str]] = []
     suffix_str = "_".join(map(lambda n: book_number_to_id(n), sorted(books.keys())))
     if len(suffix_str) > 0:
         suffix_str += "-"
@@ -383,7 +382,6 @@ def test_checkpoint(
         translation_file_names.append(f"test.trg-predictions.txt.{suffix_str}")
         refs_patterns.append("test.trg.detok*.txt")
         translation_detok_file_names.append(f"test.trg-predictions.detok.txt.{suffix_str}")
-        src_trg_isos.append((config.default_test_src_iso, config.default_test_trg_iso))
     else:
         # test data is split into separate files
         for src_iso in sorted(config.test_src_isos):
@@ -398,7 +396,6 @@ def test_checkpoint(
                     translation_file_names.append(f"{prefix}.trg-predictions.txt.{suffix_str}")
                     refs_patterns.append(f"{prefix}.trg.detok*.txt")
                     translation_detok_file_names.append(f"{prefix}.trg-predictions.detok.txt.{suffix_str}")
-                    src_trg_isos.append((src_iso, trg_iso))
 
     checkpoint_name = "averaged checkpoint" if step == -1 else f"checkpoint {step}"
 
@@ -417,7 +414,6 @@ def test_checkpoint(
         model.translate_test_files(
             source_paths,
             translation_paths,
-            src_trg_isos,
             produce_multiple_translations,
             vref_paths,
             step if checkpoint_type is CheckpointType.OTHER else checkpoint_type,