Skip to content

Commit

Permalink
Update change logs history (#1049)
Browse files Browse the repository at this point in the history
* Investigate issue 1047 about LabelEncoder

Signed-off-by: Xavier Dupre <[email protected]>

* add another unit test

Signed-off-by: Xavier Dupre <[email protected]>

* update change logs

Signed-off-by: Xavier Dupre <[email protected]>

* disable a test for old version of scikit-learn

Signed-off-by: Xavier Dupre <[email protected]>

---------

Signed-off-by: Xavier Dupre <[email protected]>
  • Loading branch information
xadupre authored Dec 11, 2023
1 parent ae29a33 commit fdb52ce
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 3 deletions.
5 changes: 4 additions & 1 deletion CHANGELOGS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@

* Supports cosine distance (LocalOutlierFactor, ...)
[#1050](https://github.com/onnx/sklearn-onnx/pull/1050),
* Supports multiple columns for OrdinalEncoder
[#1044](https://github.com/onnx/sklearn-onnx/pull/1044) (by @max-509)
* Add an example on how to handle FunctionTransformer
[#1042](https://github.com/onnx/sklearn-onnx/pull/1042),
Versions of `scikit-learn < 1.0` are not tested any more.
* FeatureHasher, raise an error when the delimiter length is > 1,
* Supports lists of strings as inputs for FeatureHasher
[#1025](https://github.com/onnx/sklearn-onnx/pull/1036),
[#1036](https://github.com/onnx/sklearn-onnx/pull/1036)
* skl2onnx works with onnx==1.15.0,
[#1034](https://github.com/onnx/sklearn-onnx/pull/1034)
Expand Down
96 changes: 94 additions & 2 deletions tests/test_sklearn_ordinal_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@

"""Tests scikit-learn's OrdinalEncoder converter."""
import unittest
from numpy.testing import assert_almost_equal
import packaging.version as pv
import numpy as np
import pandas as pd
import onnxruntime
from sklearn import __version__ as sklearn_version
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestRegressor

try:
from sklearn.preprocessing import OrdinalEncoder
except ImportError:
pass
from skl2onnx import convert_sklearn
from skl2onnx import convert_sklearn, to_onnx
from skl2onnx.common.data_types import (
Int64TensorType,
StringTensorType,
Expand All @@ -30,6 +34,11 @@ def ordinal_encoder_support():
return pv.Version(vers) >= pv.Version("0.20.0")


def set_output_support():
vers = ".".join(sklearn_version.split(".")[:2])
return pv.Version(vers) >= pv.Version("1.2")


class TestSklearnOrdinalEncoderConverter(unittest.TestCase):
@unittest.skipIf(
not ordinal_encoder_support(),
Expand Down Expand Up @@ -172,6 +181,89 @@ def test_model_ordinal_encoder_cat_list(self):
data, model, model_onnx, basename="SklearnOrdinalEncoderCatList"
)

@unittest.skipIf(
not set_output_support(),
reason="'ColumnTransformer' object has no attribute 'set_output'",
)
@unittest.skipIf(
not ordinal_encoder_support(),
reason="OrdinalEncoder was not available before 0.20",
)
def test_ordinal_encoder_pipeline_int64(self):
from onnxruntime import InferenceSession

data = pd.DataFrame({"cat": ["cat2", "cat1"], "num": [0, 1]})
data["num"] = data["num"].astype(np.float32)
y = np.array([0, 1], dtype=np.float32)
preprocessor = ColumnTransformer(
transformers=[
("cat", OrdinalEncoder(dtype=np.int64), ["cat"]),
("num", "passthrough", ["num"]),
],
sparse_threshold=1,
verbose_feature_names_out=False,
).set_output(transform="pandas")
model = make_pipeline(
preprocessor, RandomForestRegressor(n_estimators=3, max_depth=2)
)
model.fit(data, y)
expected = model.predict(data)
model_onnx = to_onnx(model, data[:1], target_opset=TARGET_OPSET)
sess = InferenceSession(
model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
)
got = sess.run(
None,
{
"cat": data["cat"].values.reshape((-1, 1)),
"num": data["num"].values.reshape((-1, 1)),
},
)
assert_almost_equal(expected, got[0].ravel())

@unittest.skipIf(
not set_output_support(),
reason="'ColumnTransformer' object has no attribute 'set_output'",
)
@unittest.skipIf(
not ordinal_encoder_support(),
reason="OrdinalEncoder was not available before 0.20",
)
def test_ordinal_encoder_pipeline_string_int64(self):
from onnxruntime import InferenceSession

data = pd.DataFrame(
{"C1": ["cat2", "cat1", "cat3"], "C2": [1, 0, 1], "num": [0, 1, 1]}
)
data["num"] = data["num"].astype(np.float32)
y = np.array([0, 1, 2], dtype=np.float32)
preprocessor = ColumnTransformer(
transformers=[
("cat", OrdinalEncoder(dtype=np.int64), ["C1", "C2"]),
("num", "passthrough", ["num"]),
],
sparse_threshold=1,
verbose_feature_names_out=False,
).set_output(transform="pandas")
model = make_pipeline(
preprocessor, RandomForestRegressor(n_estimators=3, max_depth=2)
)
model.fit(data, y)
expected = model.predict(data)
model_onnx = to_onnx(model, data[:1], target_opset=TARGET_OPSET)
sess = InferenceSession(
model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
)
got = sess.run(
None,
{
"C1": data["C1"].values.reshape((-1, 1)),
"C2": data["C2"].values.reshape((-1, 1)),
"num": data["num"].values.reshape((-1, 1)),
},
)
assert_almost_equal(expected, got[0].ravel())


if __name__ == "__main__":
unittest.main()
unittest.main(verbosity=2)

0 comments on commit fdb52ce

Please sign in to comment.