3838from ..config .autoconfig import AutoConfigResolver
3939from ..data_processing .assembler import TrainingExampleAssembler
4040from ..data_processing .dataset import make_json_schema
41- from ..data_processing .validation import validate_groupby_column
41+ from ..data_processing .validation import validate_groupby_column , validate_orderby_column
4242from ..defaults import (
4343 DEFAULT_VALID_RECORD_EVAL_BATCH_SIZE ,
4444 EVAL_STEPS ,
@@ -529,19 +529,6 @@ def prepare_params(self, **training_args):
529529 self .trainer = self ._create_trainer (self .train_args , data_collator )
530530 self ._configure_trainer_callbacks (self .trainer , training_args )
531531
532- def _validate_groupby_column (self , df ) -> None :
533- """Validate the groupby column exists and has no missing values.
534-
535- Args:
536- df: The DataFrame to validate.
537-
538- Raises:
539- ParameterError: If the groupby column doesn't exist.
540- DataError: If the groupby column has missing values.
541- """
542- col = self .params .data .group_training_examples_by
543- validate_groupby_column (df , col )
544-
545532 def _validate_orderby_column (self , df ) -> None :
546533 """Validate the orderby column exists in the dataset.
547534
@@ -558,10 +545,7 @@ def _validate_orderby_column(self, df) -> None:
558545 if self .params .time_series .is_timeseries and self .params .time_series .timestamp_column is None :
559546 return
560547
561- if orderby_col and orderby_col not in df .columns :
562- msg = f"Order by column '{ orderby_col } ' not found in the input data."
563- logger .error (msg )
564- raise ParameterError (msg )
548+ validate_orderby_column (df , orderby_col )
565549
566550 def _apply_preprocessing (self , df ):
567551 """Apply action_executor preprocessing if available.
@@ -642,8 +626,8 @@ def _log_dataset_statistics(self, assembler) -> None:
642626 def prepare_training_data (self ):
643627 """Validate, preprocess, and tokenize the training dataset.
644628
645- Runs auto-config resolution, time-series processing, groupby /
646- orderby validation , and assembles tokenized training examples.
629+ Validates groupby/orderby columns, resolves auto-config values,
630+ runs time-series preprocessing , and assembles tokenized training examples.
647631 Populates ``training_examples``, ``dataset_schema``,
648632 ``df_train``, and ``data_fraction``.
649633
@@ -660,7 +644,7 @@ def prepare_training_data(self):
660644 raise DataError ("Expected DataFrame from to_pandas(), got an iterator" )
661645
662646 # Validate groupby/orderby parameters as a preprocessing step.
663- self . _validate_groupby_column (df_all )
647+ validate_groupby_column (df_all , self . params . data . group_training_examples_by )
664648 self ._validate_orderby_column (df_all )
665649 self .params = AutoConfigResolver (df_all , self .params ).resolve ()
666650
0 commit comments