diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 593d48bd7..e7aab739a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -43,34 +43,6 @@ concurrency: cancel-in-progress: true jobs: - cpp-build: - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 - with: - build_type: ${{ inputs.build_type || 'branch' }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - script: ci/build_cpp.sh - python-build: - needs: [cpp-build] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 - with: - build_type: ${{ inputs.build_type || 'branch' }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - script: ci/build_python.sh - upload-conda: - needs: [cpp-build, python-build] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.14 - with: - build_type: ${{ inputs.build_type || 'branch' }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 @@ -169,7 +141,6 @@ jobs: package-name: cuopt_server package-type: python docs-build: - needs: [python-build] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 with: @@ -213,7 +184,6 @@ jobs: # Docker image build / tests aren't necessary for the 'test.yaml' workflow, # so 'test.yaml' can be triggered without waiting for those. needs: - - upload-conda - wheel-publish-cuopt - wheel-publish-cuopt-mps-parser - wheel-publish-cuopt-server diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 95741c1fb..cab5c3213 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -20,10 +20,6 @@ jobs: - compute-matrix-filters - changed-files - checks - - conda-cpp-build - - conda-cpp-tests - - conda-python-build - - conda-python-tests - docs-build - wheel-build-libcuopt - wheel-build-cuopt @@ -80,8 +76,6 @@ jobs: needs: check-lean-ci runs-on: ubuntu-latest outputs: - conda_lean_filter: ${{ steps.set-filters.outputs.conda_lean_filter }} - conda_test_filter: ${{ steps.set-filters.outputs.conda_test_filter }} wheel_lean_filter: ${{ steps.set-filters.outputs.wheel_lean_filter }} mps_parser_filter: ${{ steps.set-filters.outputs.mps_parser_filter }} libcuopt_filter: ${{ steps.set-filters.outputs.libcuopt_filter }} @@ -92,16 +86,12 @@ jobs: id: set-filters run: | if [ "${{ needs.check-lean-ci.outputs.lean_ci_enabled }}" == "true" ]; then - echo "conda_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.11\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT - echo "conda_test_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.13\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT echo "wheel_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT echo "mps_parser_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT echo "libcuopt_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT echo "cuopt_server_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT echo "cuopt_sh_client_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT else - echo "conda_lean_filter=." >> $GITHUB_OUTPUT - echo "conda_test_filter=." >> $GITHUB_OUTPUT echo "wheel_lean_filter=." >> $GITHUB_OUTPUT echo "mps_parser_filter=group_by([.ARCH, (.PY_VER |split(\".\") | map(tonumber))])|map(max_by([(.CUDA_VER|split(\".\")|map(tonumber))]))" >> $GITHUB_OUTPUT echo "libcuopt_filter=group_by([.ARCH, (.CUDA_VER|split(\".\")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(\".\")|map(tonumber)))" >> $GITHUB_OUTPUT @@ -282,55 +272,8 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.14 with: enable_check_generated_files: false - conda-cpp-build: - needs: [checks, compute-matrix-filters] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 - with: - build_type: pull-request - script: ci/build_cpp.sh - matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} - conda-cpp-tests: - needs: [conda-cpp-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.14 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp - with: - build_type: pull-request - script: ci/test_cpp.sh - matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} - secrets: - script-env-secret-1-key: CUOPT_DATASET_S3_URI - script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} - script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID - script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} - script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY - script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} - conda-python-build: - needs: [conda-cpp-build, compute-matrix-filters] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 - with: - build_type: pull-request - script: ci/build_python.sh - matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} - conda-python-tests: - needs: [conda-python-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.14 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda - with: - run_codecov: false - build_type: pull-request - script: ci/test_python.sh - matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} - secrets: - script-env-secret-1-key: CUOPT_DATASET_S3_URI - script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} - script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID - script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} - script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY - script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} docs-build: - needs: [conda-python-build, changed-files] + needs: [checks, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).build_docs diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e88b7829f..d2cad3722 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -26,37 +26,6 @@ on: default: nightly jobs: - conda-cpp-tests: - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main - with: - build_type: ${{ inputs.build_type }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - script: ci/test_cpp.sh - secrets: - script-env-secret-1-key: CUOPT_DATASET_S3_URI - script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} - script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID - script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} - script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY - script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} - conda-python-tests: - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main - with: - run_codecov: false - build_type: ${{ inputs.build_type }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - script: ci/test_python.sh - secrets: - script-env-secret-1-key: CUOPT_DATASET_S3_URI - script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} - script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID - script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} - script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY - script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-tests-cuopt: uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main with: @@ -87,15 +56,3 @@ jobs: script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} - conda-notebook-tests: - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main - with: - build_type: ${{ inputs.build_type }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - node_type: "gpu-l4-latest-1" - arch: "amd64" - container_image: "rapidsai/ci-conda:26.04-latest" - script: ci/test_notebooks.sh diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index ba2b63983..8aa949816 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -2056,6 +2056,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut nonbasic_list, edge_norms_); } + std::cout << "\n FINISHED SOLVE ROOT RELAXATION in BB\n" << std::endl; solving_root_relaxation_ = false; exploration_stats_.total_lp_iters = root_relax_soln_.iterations; exploration_stats_.total_lp_solve_time = toc(exploration_stats_.start_time); @@ -2102,10 +2103,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } assert(root_vstatus_.size() == original_lp_.num_cols); + std::cout << "\n SETTING UNINITIALIZED STEEPEST EDGE NORMS in BB\n" << std::endl; set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + std::cout << "\n UNCRUSHING PRIMAL AND DUAL SOLUTION in BB\n" << std::endl; if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; uncrush_primal_solution(original_problem_, original_lp_, root_relax_soln_.x, original_x); @@ -2117,6 +2120,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_relax_soln_.z, original_dual, original_z); + std::cout << "\n UNCRUSHING PRIMAL AND DUAL SOLUTION DONE in BB\n" << std::endl; settings_.set_simplex_solution_callback( original_x, original_dual, compute_user_objective(original_lp_, root_objective_)); } diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 0ded8337d..0990c517f 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -443,6 +443,7 @@ solution_t diversity_manager_t::run_solver() { std::lock_guard guard(relaxed_solution_mutex); if (!simplex_solution_exists.load()) { + std::cout << "\n NO SIMPLEXSOLUTION EXISTS \n"<< std::endl; cuopt_assert(lp_result.get_primal_solution().size() == lp_optimal_solution.size(), "LP optimal solution size mismatch"); cuopt_assert(lp_result.get_dual_solution().size() == lp_dual_optimal_solution.size(), diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index f25c093af..84d452cd6 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -70,7 +70,10 @@ struct branch_and_bound_solution_helper_t { std::vector& dual_solution, f_t objective) { + std::cout << "\n SETTING SIMPLEX SOLUTION \n" << std::endl; dm->set_simplex_solution(solution, dual_solution, objective); + std::cout << "\n SETTING SIMPLEX SOLUTION DONE \n" << std::endl; + } void node_processed_callback(const std::vector& solution, f_t objective) diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 2fc9ec08d..1e669dd6c 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -53,7 +53,8 @@ #include -#include // For std::thread +#include +#include #define CUOPT_LOG_CONDITIONAL_INFO(condition, ...) \ if ((condition)) { CUOPT_LOG_INFO(__VA_ARGS__); } @@ -1149,13 +1150,11 @@ optimization_problem_solution_t run_concurrent( auto barrier_handle = raft::handle_t(barrier_stream); auto barrier_problem = dual_simplex_problem; barrier_problem.handle_ptr = &barrier_handle; - run_barrier_thread(std::ref(barrier_problem), std::ref(settings_pdlp), std::ref(sol_barrier_ptr), std::ref(timer)); }; - if (settings.num_gpus > 1) { problem.handle_ptr->sync_stream(); raft::device_setter device_setter(1); // Scoped variable @@ -1169,8 +1168,29 @@ optimization_problem_solution_t run_concurrent( if (settings.num_gpus > 1) { CUOPT_LOG_DEBUG("PDLP device: %d", raft::device_setter::get_current_device()); } - // Run pdlp in the main thread - auto sol_pdlp = run_pdlp(problem, settings_pdlp, timer, is_batch_mode); + + // Run pdlp in the main thread. + // Must join all spawned threads before leaving this scope, even on exception, + // because destroying a joinable std::thread calls std::terminate(). + std::exception_ptr pdlp_exception; + optimization_problem_solution_t sol_pdlp{pdlp_termination_status_t::NumericalError, + problem.handle_ptr->get_stream()}; + try { + sol_pdlp = run_pdlp(problem, settings_pdlp, timer, is_batch_mode); + } catch (...) { + std::cout << "\n DEBUGGING: CAUGHT PDLP EXCEPTION \n" << std::endl; + pdlp_exception = std::current_exception(); + *settings_pdlp.concurrent_halt = 1; + try { + std::rethrow_exception(pdlp_exception); + } catch (const std::exception& e) { + std::cout << "\n DEBUGGING: CAUGHT PDLP EXCEPTION RETHROW 1\n" << std::endl; + CUOPT_LOG_ERROR("PDLP exception in concurrent mode: %s", e.what()); + } catch (...) { + std::cout << "\n DEBUGGING: CAUGHT PDLP EXCEPTION RETHROW 2\n" << std::endl; + } + } + std::cout << "\n DEBUGGING:AFTER TRY CATCH BLOCK \n" << std::endl; // Wait for dual simplex thread to finish if (!settings.inside_mip) { dual_simplex_thread.join(); }