Cherry pick fix transformers prediction (#717)

anmarques · web-flow · commit fe598cb4f337 · 2022-04-18T12:40:09.000-04:00
* Fix for prediction step when teacher model has more inputs than student.

* Updated signature of prediction_step method.
diff --git a/src/sparseml/transformers/sparsification/trainer.py b/src/sparseml/transformers/sparsification/trainer.py
@@ -365,6 +365,25 @@ def compute_loss(
 
         return (loss, student_outputs) if return_outputs else loss
 
+    def prediction_step(
+        self,
+        model: Module,
+        inputs: Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only: bool,
+        ignore_keys: Optional[List[str]] = None,
+    ) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
+        """
+        Wraps the prediction step from the original trainer to remove any input entry
+        that should not be passed to model.
+        This situation may arise when distillation is used and the teacher model
+        contains more inputs than the student model.
+        """
+        self._check_super_defined("prediction_step")
+
+        inputs = {k: inputs[k] for k in inputs if k in self._model_signature_columns}
+
+        return super().prediction_step(model, inputs, prediction_loss_only, ignore_keys)
+
     def save_model(
         self,
         output_dir: Optional[str] = None,