Add whitebox test (#4)

PhMueller · web-flow · commit aea995d0cf89 · 2020-03-24T10:06:59.000+01:00
Add ci scripts

Update Examples

Write fix for openml tasks with nan columns

Add codecov
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,41 @@
+language: python
+dist: xenial
+os: linux
+
+jobs:
+  include:
+  - python: "3.6"
+    env: RUN_TESTS="true"
+  - python: "3.7"
+    env: RUN_TESTS="true"
+  - python: "3.8"
+    env:
+      - RUN_TESTS="true"
+      - USE_SINGULARITY="true"
+  - python: "3.8"
+    env: RUN_CODESTYLE="true"
+  - python: "3.8"
+    dist: bionic
+    env:
+      - RUN_EXAMPLES="true"
+      - USE_SINGULARITY="true"
+  - python: "3.8"
+    dist: xenial
+    env:
+      - RUN_EXAMPLES="true"
+      - USE_SINGULARITY="true"
+
+before_cache:
+  - rm -f $HOME/.cache/pip/log/debug.log
+
+cache:
+  pip
+
+git:
+  depth: 5
+
+install:
+  - chmod +x ci_scripts/install.sh && source ./ci_scripts/install.sh
+
+script:
+  - chmod +x ci_scripts/script.sh && source ./ci_scripts/script.sh
diff --git a/ci_scripts/codestyle.sh b/ci_scripts/codestyle.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env sh
+
+if [[ "$RUN_CODESTYLE" == "true" ]]; then
+    echo "Performing codestyle checking"
+    pycodestyle --max-line-length=120 ./hpolib
+    flake8 --max-line-length=120 ./hpolib
+else
+    echo "Skip code style checking"
+fi
diff --git a/ci_scripts/examples.sh b/ci_scripts/examples.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env sh
+
+cd examples
+
+for script in *.py
+do
+    python $script
+    rval=$?
+    if [ "$rval" != 0 ]; then
+        echo "Error running example $script"
+        exit $rval
+    fi
+done
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env sh
+
+install_packages=""
+
+if [[ "$RUN_TESTS" == "true" ]]; then
+    echo "Install tools for testing"
+    install_packages="${install_packages}pytest,"
+    pip install codecov
+else
+    echo "Skip installing tools for testing"
+fi
+
+if [[ "$RUN_CODESTYLE" == "true" ]]; then
+    echo "Install tools for codestyle checking"
+    install_packages="${install_packages}codestyle,"
+else
+    echo "Skip installing tools for codestyle checking"
+fi
+
+if [[ "$USE_SINGULARITY" == "true" ]]; then
+    echo "Install Singularity"
+    gimme force 1.14
+    eval "$(gimme 1.14)"
+
+    sudo apt-get update && sudo apt-get install -y \
+      build-essential \
+      libssl-dev \
+      uuid-dev \
+      libgpgme11-dev \
+      squashfs-tools \
+      libseccomp-dev \
+      wget \
+      pkg-config \
+      git \
+      cryptsetup
+
+    export VERSION=3.5.2 && # adjust this as necessary \
+      wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-${VERSION}.tar.gz && \
+      tar -xzf singularity-${VERSION}.tar.gz && \
+      cd singularity
+
+    ./mconfig && \
+      make -C builddir && \
+      sudo make -C builddir install
+
+    cd ..
+    install_packages="${install_packages}singularity,"
+else
+    echo "Skip installing Singularity"
+fi
+
+install_packages="${install_packages}xgboost"
+echo "Install HPOlib3 with options: ${install_packages}"
+pip install .["${install_packages}"]
diff --git a/ci_scripts/script.sh b/ci_scripts/script.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env sh
+
+if [[ "$RUN_TESTS" == "true" ]]; then
+    if [[ "$USE_SINGULARITY" == "true" ]]; then
+        echo "Run tests with singularity support"
+        # Create the coverage report for the singularity example, since it covers more tests.
+        pytest -sv --cov=hpolib tests/
+        codecov
+    else
+        echo "Run tests without singularity support"
+        pytest -sv tests/
+    fi
+fi
+
+if [[ "$RUN_CODESTYLE" == "true" ]]; then
+    echo "Run codestyle"
+    chmod +x ci_scripts/codestyle.sh && source ./ci_scripts/codestyle.sh
+fi
+
+if [[ "$RUN_EXAMPLES" == "true" ]]; then
+    echo "Run all examples"
+    chmod +x ci_scripts/examples.sh && source ./ci_scripts/examples.sh
+fi
diff --git a/examples/XGBoost_local.py b/examples/XGBoost_local.py
@@ -4,23 +4,27 @@
 
 task_ids = get_openmlcc18_taskids()
 for task_no, task_id in enumerate(task_ids):
+
     print(f'###################### TASK {task_no + 1} of {len(task_ids)}: Task-Id: {task_id} ######################')
+    if task_id == 167204:
+        continue  # due to memory limits
+
     b = Benchmark(task_id=task_id)
     cs = b.get_configuration_space()
     start = time()
-    for i in range(5):
+    num_configs = 1
+    for i in range(num_configs):
         configuration = cs.sample_configuration()
         print(configuration)
-        for n_estimator in [2, 4, 8, 16, 32]:
-            for subsample in [0.1, 0.2, 0.4, 0.8, 1]:
+        for n_estimator in [8, 64]:
+            for subsample in [0.4, 1]:
                 result_dict = b.objective_function(configuration, n_estimators=n_estimator, subsample=subsample)
                 valid_loss = result_dict['function_value']
                 train_loss = result_dict['train_loss']
 
                 result_dict = b.objective_function_test(configuration, n_estimators=n_estimator)
                 test_loss = result_dict['function_value']
 
-                print(f'[{i+1}|5] No Estimator: {n_estimator:3d} - Subsample Rate: {subsample:.1f} - Test {test_loss:.4f} '
+                print(f'[{i+1}|{num_configs}] No Estimator: {n_estimator:3d} - Subsample Rate: {subsample:.1f} - Test {test_loss:.4f} '
                       f'- Valid {valid_loss:.4f} - Train {train_loss:.4f}')
-
     print(f'Done, took totally {time()-start:.2f}')
diff --git a/examples/XGBoost_with_container.py b/examples/XGBoost_with_container.py
@@ -49,8 +49,8 @@ def run_benchmark(task_id):
     cs = b.get_configuration_space()
     configuration = cs.get_default_configuration()
 
-    n_estimators = [2, 4, 8, 16, 32, 64]
-    subsample_ratios = [0.1, 0.2, 0.4, 0.8, 1]
+    n_estimators = [8, 64]
+    subsample_ratios = [0.4, 1]
 
     result_per_data_set = []
     num_configs = 10
@@ -84,7 +84,7 @@ def run_benchmark(task_id):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(prog='HPOlib CC Datasets', description='HPOlib3', usage='%(prog)s <task_id>')
-    parser.add_argument('--array_id', type=int, help='Defines which data set to use. Values from 0 to 71')
+    parser.add_argument('--array_id', default=0, type=int, help='Defines which data set to use. Values from 0 to 71')
 
     args = parser.parse_args()
     task_ids = get_openmlcc18_taskids()
diff --git a/extra_requirements/tests.json b/extra_requirements/tests.json
@@ -1,4 +1,4 @@
 {
-  "py": ["pycodestyle", "flake8"],
+  "codestyle": ["pycodestyle","flake8"],
   "pytest": ["pytest","pytest-cov"]
 }
diff --git a/extra_requirements/xgboost.json b/extra_requirements/xgboost.json
@@ -1,3 +1,3 @@
 {
   "xgboost": ["xgboost==0.90","json_tricks==3.14.0"]
-}
+}
diff --git a/hpolib/benchmarks/ml/xgboost_benchmark.py b/hpolib/benchmarks/ml/xgboost_benchmark.py
@@ -49,6 +49,9 @@ def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1,
         self.X_valid = self.X_valid[:, sorting]
         self.X_test = self.X_test[:, sorting]
 
+        nan_columns = np.all(np.isnan(self.X_train), axis=0)
+        self.categorical_data = self.categorical_data[~nan_columns]
+
         mean_imputer = SimpleImputer(strategy='mean')
         self.X_train = mean_imputer.fit_transform(self.X_train)
         self.X_valid = mean_imputer.transform(self.X_valid)
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_whitebox.py b/tests/test_whitebox.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pytest
+from time import time
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+try:
+    import Pyro4
+    skip_container_test = False
+except ImportError:
+    skip_container_test = True
+
+
+def test_whitebox_without_container():
+    from hpolib.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    b = Benchmark(task_id=167199, rng=0)
+    cs = b.get_configuration_space(seed=0)
+
+    start = time()
+    configuration = cs.get_default_configuration()
+    assert configuration['colsample_bylevel'] == 1.0
+    assert len(configuration.keys()) == 6
+
+    n_estimator = 32
+    subsample = 1
+    result_dict = b.objective_function(configuration, n_estimators=n_estimator, subsample=subsample, rng=0)
+    valid_loss = result_dict['function_value']
+    train_loss = result_dict['train_loss']
+
+    result_dict = b.objective_function_test(configuration, n_estimators=n_estimator, rng=0)
+    test_loss = result_dict['function_value']
+
+    assert np.isclose(train_loss, 0.1071, atol=0.001)
+    assert np.isclose(valid_loss, 0.3873, atol=0.001)
+    assert np.isclose(test_loss, 0.38181, atol=0.001)
+
+
+@pytest.mark.skipif(skip_container_test, reason="Requires singularity and flask")
+def test_whitebox_with_container():
+    from hpolib.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark as Benchmark
+    b = Benchmark(container_source='library://keggensperger/automl/',
+                  container_name='xgboost_benchmark',
+                  task_id=167199,
+                  rng=0)
+
+    cs = b.get_configuration_space()
+    configuration = cs.get_default_configuration()
+    assert configuration['colsample_bylevel'] == 1.0
+    assert len(configuration.keys()) == 6
+
+    n_estimator = 32
+    subsample = 1
+    result_dict = b.objective_function(configuration, n_estimators=n_estimator, subsample=subsample)
+    valid_loss = result_dict['function_value']
+    train_loss = result_dict['train_loss']
+    result_dict = b.objective_function_test(configuration, n_estimators=n_estimator)
+    test_loss = result_dict['function_value']
+
+    print(train_loss, valid_loss, test_loss)
+    assert np.isclose(train_loss, 0.1071, atol=0.001)
+    assert np.isclose(valid_loss, 0.3873, atol=0.001)
+    assert np.isclose(test_loss, 0.38181, atol=0.001)
+

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`{`
`2`		`- "py": ["pycodestyle", "flake8"],`
	`2`	`+ "codestyle": ["pycodestyle","flake8"],`
`3`	`3`	`"pytest": ["pytest","pytest-cov"]`
`4`	`4`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`	`2`	`"xgboost": ["xgboost==0.90","json_tricks==3.14.0"]`
`3`		`-}`
	`3`	`+}`