Skip to content

Commit 379033b

Browse files
committed
[refactor] Refactor __init__ of abstract evaluator
[refactor] Collect shared variables in NamedTuples [fix] Copy the budget passed to the evaluator params [refactor] Add cross validation result manager for separate management [refactor] Separate pipeline classes from abstract evaluator [refactor] Refactor tae.py [refactor] Increase the safety level of pipeline config [test] Fix test_evaluation.py [test] Fix test_abstract_evaluator.py 1 -- 3 [test] Add default pipeline config [test] Modify queue.empty in a safer way [test] Fix test_api.py [test] Fix test_train_evaluator.py [refactor] Refactor test_api.py before adding new tests [refactor] Refactor test_tabular_xxx [fix] Find the error in test_tabular_xxx Since pipeline is updated after the evaluations and the previous code updated self.pipeline in the predict method, dummy class only needs to override this method. However, the new code does it separately, so I override get_pipeline method so that we can reproduce the same results. [fix] Fix the shape issue in regression and add bug comment in a test [refactor] Use keyword args to avoid unexpected bugs [fix] Fix the ground truth of test_cv Since we changed the weighting strategy for the cross validation in the validation phase so that we weight performance from each model proportionally to the size of each VALIDATION split. I needed to change the answer. Note that the previous was weighting the performance proportionally to the TRAINING splits for both training and validation phases. [fix] Change qsize --> Empty since qsize might not be reliable [refactor] Add cost for crash in autoPyTorchMetrics [test] Remove self.pipeline since this is a duplication of self.pipelines [fix] Fix attribute errors caused by the last change in curve extraction [fix] Fix the issue when taking num_classes from regression task [fix] Deactivate the save of cv model in the case of holdout
1 parent 1431980 commit 379033b

File tree

14 files changed

+1615
-2019
lines changed

14 files changed

+1615
-2019
lines changed

autoPyTorch/api/base_task.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@
4848
)
4949
from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
5050
from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
51-
from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
52-
from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
51+
from autoPyTorch.evaluation.abstract_evaluator import fit_pipeline
52+
from autoPyTorch.evaluation.pipeline_class_collection import get_default_pipeline_config
53+
from autoPyTorch.evaluation.tae import TargetAlgorithmQuery
5354
from autoPyTorch.evaluation.utils import DisableFileOutputParameters
5455
from autoPyTorch.optimizer.smbo import AutoMLSMBO
5556
from autoPyTorch.pipeline.base_pipeline import BasePipeline
@@ -685,22 +686,23 @@ def _do_dummy_prediction(self) -> None:
685686
# already be generated here!
686687
stats = Stats(scenario_mock)
687688
stats.start_timing()
688-
ta = ExecuteTaFuncWithQueue(
689+
taq = TargetAlgorithmQuery(
689690
pynisher_context=self._multiprocessing_context,
690691
backend=self._backend,
691692
seed=self.seed,
692693
metric=self._metric,
693694
logger_port=self._logger_port,
694-
cost_for_crash=get_cost_of_crash(self._metric),
695+
cost_for_crash=self._metric._cost_of_crash,
695696
abort_on_first_run_crash=False,
696697
initial_num_run=num_run,
698+
pipeline_config=get_default_pipeline_config(choice='dummy'),
697699
stats=stats,
698700
memory_limit=memory_limit,
699701
disable_file_output=self._disable_file_output,
700702
all_supported_metrics=self._all_supported_metrics
701703
)
702704

703-
status, _, _, additional_info = ta.run(num_run, cutoff=self._time_for_task)
705+
status, _, _, additional_info = taq.run(num_run, cutoff=self._time_for_task)
704706
if status == StatusType.SUCCESS:
705707
self._logger.info("Finished creating dummy predictions.")
706708
else:
@@ -769,13 +771,13 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
769771
# already be generated here!
770772
stats = Stats(scenario_mock)
771773
stats.start_timing()
772-
ta = ExecuteTaFuncWithQueue(
774+
taq = TargetAlgorithmQuery(
773775
pynisher_context=self._multiprocessing_context,
774776
backend=self._backend,
775777
seed=self.seed,
776778
metric=self._metric,
777779
logger_port=self._logger_port,
778-
cost_for_crash=get_cost_of_crash(self._metric),
780+
cost_for_crash=self._metric._cost_of_crash,
779781
abort_on_first_run_crash=False,
780782
initial_num_run=self._backend.get_next_num_run(),
781783
stats=stats,
@@ -786,7 +788,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
786788
dask_futures.append([
787789
classifier,
788790
self._dask_client.submit(
789-
ta.run, config=classifier,
791+
taq.run, config=classifier,
790792
cutoff=func_eval_time_limit_secs,
791793
)
792794
])
@@ -1076,7 +1078,7 @@ def _search(
10761078

10771079
# Here the budget is set to max because the SMAC intensifier can be:
10781080
# Hyperband: in this case the budget is determined on the fly and overwritten
1079-
# by the ExecuteTaFuncWithQueue
1081+
# by the TargetAlgorithmQuery
10801082
# SimpleIntensifier (and others): in this case, we use max_budget as a target
10811083
# budget, and hece the below line is honored
10821084
self.pipeline_options[budget_type] = max_budget
@@ -1360,7 +1362,7 @@ def refit(
13601362
dataset_properties=dataset_properties,
13611363
dataset=dataset,
13621364
split_id=split_id)
1363-
fit_and_suppress_warnings(self._logger, model, X, y=None)
1365+
fit_pipeline(self._logger, model, X, y=None)
13641366

13651367
self._clean_logger()
13661368

@@ -1571,27 +1573,26 @@ def fit_pipeline(
15711573

15721574
stats.start_timing()
15731575

1574-
tae = ExecuteTaFuncWithQueue(
1576+
taq = TargetAlgorithmQuery(
15751577
backend=self._backend,
15761578
seed=self.seed,
15771579
metric=metric,
15781580
logger_port=self._logger_port,
1579-
cost_for_crash=get_cost_of_crash(metric),
1581+
cost_for_crash=metric._cost_of_crash,
15801582
abort_on_first_run_crash=False,
15811583
initial_num_run=self._backend.get_next_num_run(),
15821584
stats=stats,
15831585
memory_limit=memory_limit,
15841586
disable_file_output=disable_file_output,
15851587
all_supported_metrics=all_supported_metrics,
1586-
budget_type=budget_type,
15871588
include=include_components,
15881589
exclude=exclude_components,
15891590
search_space_updates=search_space_updates,
15901591
pipeline_config=pipeline_options,
15911592
pynisher_context=self._multiprocessing_context
15921593
)
15931594

1594-
run_info, run_value = tae.run_wrapper(
1595+
run_info, run_value = taq.run_wrapper(
15951596
RunInfo(config=configuration,
15961597
budget=budget,
15971598
seed=self.seed,
@@ -1603,7 +1604,7 @@ def fit_pipeline(
16031604

16041605
fitted_pipeline = self._get_fitted_pipeline(
16051606
dataset_name=dataset.dataset_name,
1606-
pipeline_idx=run_info.config.config_id + tae.initial_num_run,
1607+
pipeline_idx=run_info.config.config_id + taq.initial_num_run,
16071608
run_info=run_info,
16081609
run_value=run_value,
16091610
disable_file_output=disable_file_output
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"budget_type": "epochs",
3+
"epochs": 1,
4+
"runtime": 1
5+
}

0 commit comments

Comments
 (0)