|
4 | 4 | # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5 | 5 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6 | 6 |
|
7 |
| -from __future__ import print_function, absolute_import, division |
| 7 | +from __future__ import absolute_import, division, print_function |
8 | 8 |
|
| 9 | +import numpy as np |
| 10 | +import pandas as pd |
| 11 | +import scipy |
9 | 12 | import copy
|
10 |
| - |
| 13 | +from ads.common import logger, utils |
11 | 14 | from ads.common.model import ADSModel
|
12 |
| -from ads.common import logger |
13 | 15 | from ads.dataset import helper
|
14 |
| -from ads.dataset.dataset_with_target import ADSDatasetWithTarget |
15 | 16 | from ads.dataset.classification_dataset import (
|
16 | 17 | BinaryClassificationDataset,
|
| 18 | + BinaryTextClassificationDataset, |
17 | 19 | MultiClassClassificationDataset,
|
18 | 20 | MultiClassTextClassificationDataset,
|
19 |
| - BinaryTextClassificationDataset, |
20 | 21 | )
|
21 |
| -from ads.dataset.regression_dataset import RegressionDataset |
22 |
| - |
| 22 | +from ads.dataset.dataset_with_target import ADSDatasetWithTarget |
23 | 23 | from ads.dataset.pipeline import TransformerPipeline
|
| 24 | +from ads.dataset.regression_dataset import RegressionDataset |
24 | 25 | from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
|
25 | 26 | from ads.type_discovery.typed_feature import (
|
26 | 27 | ContinuousTypedFeature,
|
27 | 28 | DiscreteTypedFeature,
|
28 | 29 | )
|
29 |
| -from ads.common import utils |
30 |
| - |
31 | 30 |
|
32 | 31 | dataset_task_map = {
|
33 | 32 | BinaryClassificationDataset: utils.ml_task_types.BINARY_CLASSIFICATION,
|
@@ -114,7 +113,7 @@ def __init__(
|
114 | 113 | >>> olabs_automl = OracleAutoMLProvider()
|
115 | 114 | >>> model, baseline = AutoML(train, provider=olabs_automl).train()
|
116 | 115 | """
|
117 |
| - from ads.automl.provider import OracleAutoMLProvider, BaselineAutoMLProvider |
| 116 | + from ads.automl.provider import BaselineAutoMLProvider, OracleAutoMLProvider |
118 | 117 |
|
119 | 118 | if hasattr(training_data, "transformer_pipeline"):
|
120 | 119 | self.transformer_pipeline = training_data.transformer_pipeline
|
@@ -154,7 +153,23 @@ def __init__(
|
154 | 153 | or utils._is_dask_series(training_data.y)
|
155 | 154 | else training_data.y
|
156 | 155 | )
|
157 |
| - self.target_name = y.name |
| 156 | + |
| 157 | + if isinstance(y, pd.DataFrame): |
| 158 | + if len(y.columns) != 1: |
| 159 | + raise ValueError("Data must be 1-dimensional.") |
| 160 | + y = y.squeeze() |
| 161 | + elif isinstance(y, np.ndarray): |
| 162 | + y = pd.Series(y) |
| 163 | + if y.name: |
| 164 | + self.target_name = str(y.name) |
| 165 | + else: |
| 166 | + y.name = str(0) |
| 167 | + self.target_name = str(0) |
| 168 | + |
| 169 | + if isinstance(X, np.ndarray): |
| 170 | + X = pd.DataFrame(X) |
| 171 | + elif isinstance(X, scipy.sparse.csr.csr_matrix): |
| 172 | + X = pd.DataFrame(X.todense()) |
158 | 173 | self.feature_names = X.columns.values
|
159 | 174 | self.client = client
|
160 | 175 | class_names = y.unique()
|
@@ -256,8 +271,8 @@ def train(self, **kwargs):
|
256 | 271 | avail_n_cores = utils.get_cpu_count()
|
257 | 272 |
|
258 | 273 | warn_params = [
|
259 |
| - (10 ** 5, 4, "VM.Standard.E2.4"), |
260 |
| - (10 ** 6, 16, "VM.Standard.2.16"), |
| 274 | + (10**5, 4, "VM.Standard.E2.4"), |
| 275 | + (10**6, 16, "VM.Standard.2.16"), |
261 | 276 | ]
|
262 | 277 |
|
263 | 278 | # train using automl and baseline
|
|
0 commit comments