Skip to content

Commit 113df5c

Browse files
authored
Merge branch 'main' into feature/bump-aqua-version
2 parents c32464e + 3dd0dba commit 113df5c

File tree

10 files changed

+79
-30
lines changed

10 files changed

+79
-30
lines changed

ads/jobs/builders/infrastructure/dsc_job.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,7 @@ def is_multi_node_job(runtime):
17511751
return (
17521752
MULTI_NODE_JOB_SUPPORT
17531753
and isinstance(runtime, MultiNodeRuntime)
1754+
and runtime.replica
17541755
and runtime.replica > 1
17551756
)
17561757

ads/jobs/builders/infrastructure/dsc_job_runtime.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,11 @@ def _get_node_group(self, dsc_job):
365365
dsc_job,
366366
"job_node_configuration_details.job_node_group_configuration_details_list",
367367
)
368+
if node_groups is None:
369+
node_groups = get_value(
370+
dsc_job,
371+
"job_node_configuration_details.jobNodeGroupConfigurationDetailsList",
372+
)
368373
if node_groups and len(node_groups) == 1:
369374
return node_groups[0]
370375
return None
@@ -373,6 +378,7 @@ def _get_replica(self, dsc_job, envs):
373378
node_group = self._get_node_group(dsc_job)
374379
if node_group:
375380
replica = get_value(node_group, "replicas")
381+
envs.pop(self.CONST_NODE_COUNT, None)
376382
elif not envs:
377383
replica = None
378384
elif self.CONST_WORKER_COUNT in envs:
@@ -399,7 +405,9 @@ def _extract_envs(self, dsc_job):
399405
env_attr = "job_configuration_details.environment_variables"
400406
node_group = self._get_node_group(dsc_job)
401407
if node_group:
402-
envs = get_value(node_group, env_attr)
408+
envs = get_value(node_group, env_attr) or get_value(
409+
node_group, "jobConfigurationDetails.environment_variables"
410+
)
403411
else:
404412
envs = get_value(dsc_job, env_attr)
405413
if envs:

ads/opctl/operator/lowcode/forecast/const.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,4 @@ class ForecastOutputColumns(ExtendedEnum):
9191
AUTO_SELECT = "auto-select"
9292
AUTO_SELECT_SERIES = "auto-select-series"
9393
BACKTEST_REPORT_NAME = "back_test.csv"
94+
TROUBLESHOOTING_GUIDE = "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-operators/troubleshooting.md"

ads/opctl/operator/lowcode/forecast/errors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
# Copyright (c) 2023 Oracle and/or its affiliates.
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

7+
from ads.opctl.operator.lowcode.forecast.const import TROUBLESHOOTING_GUIDE
8+
9+
710
class ForecastSchemaYamlError(Exception):
811
"""Exception raised when there is an issue with the schema."""
912

@@ -12,6 +15,7 @@ def __init__(self, error: str):
1215
"Invalid forecast operator specification. Check the YAML structure and ensure it "
1316
"complies with the required schema for forecast operator. \n"
1417
f"{error}"
18+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
1519
)
1620

1721

@@ -23,4 +27,5 @@ def __init__(self, error: str):
2327
"Invalid input data. Check the input data and ensure it "
2428
"complies with the validation criteria. \n"
2529
f"{error}"
30+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
2631
)

ads/opctl/operator/lowcode/forecast/model/base_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
SpeedAccuracyMode,
5252
SupportedMetrics,
5353
SupportedModels,
54+
TROUBLESHOOTING_GUIDE,
5455
)
5556
from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
5657
from .forecast_datasets import ForecastDatasets, ForecastResults
@@ -743,6 +744,7 @@ def _validate_automlx_explanation_mode(self):
743744
raise ValueError(
744745
"AUTOMLX explanation accuracy mode is only supported for AutoMLX models. "
745746
"Please select mode other than AUTOMLX from the available explanations_accuracy_mode options"
747+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
746748
)
747749

748750
@runtime_dependency(

ads/opctl/operator/lowcode/forecast/model/factory.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,14 @@
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55

66
from ads.opctl.operator.lowcode.common.transformations import Transformations
7-
from ..const import AUTO_SELECT, SpeedAccuracyMode, SupportedModels, AUTO_SELECT_SERIES
7+
8+
from ..const import (
9+
AUTO_SELECT,
10+
AUTO_SELECT_SERIES,
11+
TROUBLESHOOTING_GUIDE,
12+
SpeedAccuracyMode,
13+
SupportedModels,
14+
)
815
from ..meta_selector import MetaSelector
916
from ..model_evaluator import ModelEvaluator
1017
from ..operator_config import ForecastOperatorConfig
@@ -23,6 +30,7 @@ def __init__(self, model_type: str):
2330
super().__init__(
2431
f"Model: `{model_type}` "
2532
f"is not supported. Supported models: {SupportedModels.values()}"
33+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
2634
)
2735

2836

ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
get_frequency_of_datetime,
1919
)
2020

21-
from ..const import ForecastOutputColumns, SupportedModels
21+
from ..const import ForecastOutputColumns, SupportedModels, TROUBLESHOOTING_GUIDE
2222
from ..operator_config import ForecastOperatorConfig
2323

2424

@@ -49,7 +49,8 @@ def _verify_dt_col(self, spec):
4949
f"{SupportedModels.AutoMLX} requires data with a frequency of at least one hour. Please try using a different model,"
5050
" or select the 'auto' option."
5151
)
52-
raise InvalidParameterError(message)
52+
raise InvalidParameterError(f"{message}"
53+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps.")
5354

5455

5556
class AdditionalData(AbstractData):
@@ -65,10 +66,12 @@ def __init__(self, spec, historical_data, additional_data=None, subset=None):
6566
if historical_data.get_max_time() > add_dates[-spec.horizon]:
6667
raise DataMismatchError(
6768
f"The Historical Data ends on {historical_data.get_max_time()}. The additional data horizon starts on {add_dates[-spec.horizon]}. The horizon should have exactly {spec.horizon} dates after the Historical at a frequency of {historical_data.freq}"
69+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
6870
)
6971
elif historical_data.get_max_time() != add_dates[-(spec.horizon + 1)]:
7072
raise DataMismatchError(
7173
f"The Additional Data must be present for all historical data and the entire horizon. The Historical Data ends on {historical_data.get_max_time()}. The additonal data horizon starts after {add_dates[-(spec.horizon+1)]}. These should be the same date."
74+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
7275
)
7376
else:
7477
self.name = "additional_data"
@@ -215,6 +218,7 @@ def get_data_at_series(self, s_id, include_horizon=True):
215218
except Exception as e:
216219
raise InvalidParameterError(
217220
f"Unable to retrieve series id: {s_id} from data. Available series ids are: {self.list_series_ids()}"
221+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
218222
) from e
219223

220224
def get_horizon_at_series(self, s_id):
@@ -296,6 +300,7 @@ def add_series_id(
296300
if not overwrite and series_id in self.series_id_map:
297301
raise ValueError(
298302
f"Attempting to update ForecastOutput for series_id {series_id} when this already exists. Set overwrite to True."
303+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
299304
)
300305
forecast = self._check_forecast_format(forecast)
301306
self.series_id_map[series_id] = forecast
@@ -336,6 +341,7 @@ def populate_series_output(
336341
except KeyError as e:
337342
raise ValueError(
338343
f"Attempting to update output for series: {series_id}, however no series output has been initialized."
344+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
339345
) from e
340346

341347
if (output_i.shape[0] - self.horizon) == len(fit_val):
@@ -356,18 +362,21 @@ def populate_series_output(
356362
if len(forecast_val) != self.horizon:
357363
raise ValueError(
358364
f"Attempting to set forecast along horizon ({self.horizon}) for series: {series_id}, however forecast is only length {len(forecast_val)}"
365+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
359366
)
360367
output_i["forecast_value"].iloc[-self.horizon :] = forecast_val
361368

362369
if len(upper_bound) != self.horizon:
363370
raise ValueError(
364371
f"Attempting to set upper_bound along horizon ({self.horizon}) for series: {series_id}, however upper_bound is only length {len(upper_bound)}"
372+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
365373
)
366374
output_i[self.upper_bound_name].iloc[-self.horizon :] = upper_bound
367375

368376
if len(lower_bound) != self.horizon:
369377
raise ValueError(
370378
f"Attempting to set lower_bound along horizon ({self.horizon}) for series: {series_id}, however lower_bound is only length {len(lower_bound)}"
379+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
371380
)
372381
output_i[self.lower_bound_name].iloc[-self.horizon :] = lower_bound
373382

ads/opctl/operator/lowcode/forecast/model_evaluator.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
from ads.opctl import logger
1111
from ads.opctl.operator.lowcode.common.const import DataColumns
1212
from ads.opctl.operator.lowcode.common.errors import InsufficientDataError
13-
from ads.opctl.operator.lowcode.forecast.const import BACKTEST_REPORT_NAME
13+
from ads.opctl.operator.lowcode.forecast.const import (
14+
BACKTEST_REPORT_NAME,
15+
TROUBLESHOOTING_GUIDE,
16+
)
1417
from ads.opctl.operator.lowcode.forecast.model.factory import SupportedModels
1518

1619
from .model.forecast_datasets import ForecastDatasets
@@ -79,6 +82,7 @@ def generate_k_fold_data(
7982
raise InsufficientDataError(
8083
"Insufficient data to evaluate multiple models. Please specify a model "
8184
"instead of using auto-select."
85+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
8286
)
8387
training_datasets = [
8488
sampled_historical_data[sampled_historical_data[date_col] <= cut_off_date]
@@ -223,6 +227,7 @@ def find_best_model(
223227
model = SupportedModels.Prophet
224228
logger.error(
225229
f"Running {model} model as auto-select failed with the following error: {e.message}"
230+
f"\nPlease refer to the troubleshooting guide at {TROUBLESHOOTING_GUIDE} for resolution steps."
226231
)
227232
return model
228233
nonempty_metrics = {

ads/pipeline/ads_pipeline.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,15 +1728,19 @@ def __step_details(self, pipeline_details: Dict) -> list:
17281728

17291729
def __step_infrastructure_configuration_details(self, step) -> dict:
17301730
step_infrastructure_configuration_details = {}
1731-
step_infrastructure_configuration_details[
1732-
"blockStorageSizeInGBs"
1733-
] = step.infrastructure.block_storage_size
1734-
step_infrastructure_configuration_details[
1735-
"shapeName"
1736-
] = step.infrastructure.shape_name
1737-
step_infrastructure_configuration_details[
1738-
"shapeConfigDetails"
1739-
] = step.infrastructure.shape_config_details
1731+
step_infrastructure_configuration_details["blockStorageSizeInGBs"] = (
1732+
step.infrastructure.block_storage_size
1733+
)
1734+
step_infrastructure_configuration_details["shapeName"] = (
1735+
step.infrastructure.shape_name
1736+
)
1737+
step_infrastructure_configuration_details["shapeConfigDetails"] = (
1738+
step.infrastructure.shape_config_details
1739+
)
1740+
if getattr(step.infrastructure, "subnet_id", ""):
1741+
step_infrastructure_configuration_details["subnetId"] = (
1742+
step.infrastructure.subnet_id
1743+
)
17401744
return step_infrastructure_configuration_details
17411745

17421746
def __step_configuration_details(self, pipeline_details: Dict, step) -> dict:

tests/unitary/default_setup/jobs/test_jobs_pytorch_ddp.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
from unittest import mock
1111

1212
from ads.jobs import DataScienceJob, DataScienceJobRun, PyTorchDistributedRuntime
13+
from ads.jobs.builders.infrastructure.dsc_job_runtime import (
14+
MULTI_NODE_JOB_SUPPORT,
15+
)
1316
from ads.jobs.builders.infrastructure.dsc_job_runtime import (
1417
PyTorchDistributedRuntimeHandler as Handler,
1518
)
@@ -133,23 +136,26 @@ def test_create_job_runs(self, patched_run, *args):
133136
self.assertIsInstance(main_run, DataScienceJobRun)
134137
self.assertEqual(main_run.id, test_ocid)
135138
kwarg_list = [call_args.kwargs for call_args in patched_run.call_args_list]
136-
self.assertEqual(
137-
kwarg_list,
138-
[
139-
{
140-
"display_name": "None-0",
141-
"environment_variables": {"NODE_RANK": "0", "NODE_COUNT": "2"},
142-
},
143-
{
144-
"display_name": "None-1",
145-
"environment_variables": {
146-
"NODE_RANK": "1",
147-
"NODE_COUNT": "2",
148-
"MAIN_JOB_RUN_OCID": test_ocid,
139+
if MULTI_NODE_JOB_SUPPORT:
140+
self.assertEqual(kwarg_list, [{}])
141+
else:
142+
self.assertEqual(
143+
kwarg_list,
144+
[
145+
{
146+
"display_name": "None-0",
147+
"environment_variables": {"NODE_RANK": "0", "NODE_COUNT": "2"},
149148
},
150-
},
151-
],
152-
)
149+
{
150+
"display_name": "None-1",
151+
"environment_variables": {
152+
"NODE_RANK": "1",
153+
"NODE_COUNT": "2",
154+
"MAIN_JOB_RUN_OCID": test_ocid,
155+
},
156+
},
157+
],
158+
)
153159

154160
@mock.patch.dict(
155161
os.environ, {utils.CONST_ENV_INPUT_MAPPINGS: json.dumps({INPUT_SRC: INPUT_DST})}

0 commit comments

Comments
 (0)