Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,34 +43,6 @@ concurrency:
cancel-in-progress: true

jobs:
cpp-build:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/build_cpp.sh
python-build:
needs: [cpp-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/build_python.sh
upload-conda:
needs: [cpp-build, python-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.14
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
wheel-build-cuopt-mps-parser:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14
Expand Down Expand Up @@ -169,7 +141,6 @@ jobs:
package-name: cuopt_server
package-type: python
docs-build:
needs: [python-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14
with:
Expand Down Expand Up @@ -213,7 +184,6 @@ jobs:
# Docker image build / tests aren't necessary for the 'test.yaml' workflow,
# so 'test.yaml' can be triggered without waiting for those.
needs:
- upload-conda
- wheel-publish-cuopt
- wheel-publish-cuopt-mps-parser
- wheel-publish-cuopt-server
Expand Down
59 changes: 1 addition & 58 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ jobs:
- compute-matrix-filters
- changed-files
- checks
- conda-cpp-build
- conda-cpp-tests
- conda-python-build
- conda-python-tests
- docs-build
- wheel-build-libcuopt
- wheel-build-cuopt
Expand Down Expand Up @@ -80,8 +76,6 @@ jobs:
needs: check-lean-ci
runs-on: ubuntu-latest
outputs:
conda_lean_filter: ${{ steps.set-filters.outputs.conda_lean_filter }}
conda_test_filter: ${{ steps.set-filters.outputs.conda_test_filter }}
wheel_lean_filter: ${{ steps.set-filters.outputs.wheel_lean_filter }}
mps_parser_filter: ${{ steps.set-filters.outputs.mps_parser_filter }}
libcuopt_filter: ${{ steps.set-filters.outputs.libcuopt_filter }}
Expand All @@ -92,16 +86,12 @@ jobs:
id: set-filters
run: |
if [ "${{ needs.check-lean-ci.outputs.lean_ci_enabled }}" == "true" ]; then
echo "conda_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.11\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
echo "conda_test_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.13\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
echo "wheel_lean_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
echo "mps_parser_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
echo "libcuopt_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
echo "cuopt_server_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
echo "cuopt_sh_client_filter=[map(select(.ARCH == \"amd64\" and .PY_VER == \"3.12\")) | max_by(.CUDA_VER | split(\".\") | map(tonumber))]" >> $GITHUB_OUTPUT
else
echo "conda_lean_filter=." >> $GITHUB_OUTPUT
echo "conda_test_filter=." >> $GITHUB_OUTPUT
echo "wheel_lean_filter=." >> $GITHUB_OUTPUT
echo "mps_parser_filter=group_by([.ARCH, (.PY_VER |split(\".\") | map(tonumber))])|map(max_by([(.CUDA_VER|split(\".\")|map(tonumber))]))" >> $GITHUB_OUTPUT
echo "libcuopt_filter=group_by([.ARCH, (.CUDA_VER|split(\".\")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(\".\")|map(tonumber)))" >> $GITHUB_OUTPUT
Expand Down Expand Up @@ -282,55 +272,8 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.14
with:
enable_check_generated_files: false
conda-cpp-build:
needs: [checks, compute-matrix-filters]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14
with:
build_type: pull-request
script: ci/build_cpp.sh
matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }}
conda-cpp-tests:
needs: [conda-cpp-build, changed-files, compute-matrix-filters]
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.14
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
with:
build_type: pull-request
script: ci/test_cpp.sh
matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
secrets:
script-env-secret-1-key: CUOPT_DATASET_S3_URI
script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
conda-python-build:
needs: [conda-cpp-build, compute-matrix-filters]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14
with:
build_type: pull-request
script: ci/build_python.sh
matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
conda-python-tests:
needs: [conda-python-build, changed-files, compute-matrix-filters]
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.14
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda
with:
run_codecov: false
build_type: pull-request
script: ci/test_python.sh
matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
secrets:
script-env-secret-1-key: CUOPT_DATASET_S3_URI
script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
docs-build:
needs: [conda-python-build, changed-files]
needs: [checks, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14
if: fromJSON(needs.changed-files.outputs.changed_file_groups).build_docs
Expand Down
43 changes: 0 additions & 43 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,6 @@ on:
default: nightly

jobs:
conda-cpp-tests:
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main
with:
build_type: ${{ inputs.build_type }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/test_cpp.sh
secrets:
script-env-secret-1-key: CUOPT_DATASET_S3_URI
script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
conda-python-tests:
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
with:
run_codecov: false
build_type: ${{ inputs.build_type }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/test_python.sh
secrets:
script-env-secret-1-key: CUOPT_DATASET_S3_URI
script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }}
script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID
script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
wheel-tests-cuopt:
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
with:
Expand Down Expand Up @@ -87,15 +56,3 @@ jobs:
script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY
script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
conda-notebook-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
with:
build_type: ${{ inputs.build_type }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "gpu-l4-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:26.04-latest"
script: ci/test_notebooks.sh
4 changes: 4 additions & 0 deletions cpp/src/branch_and_bound/branch_and_bound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
nonbasic_list,
edge_norms_);
}
std::cout << "\n FINISHED SOLVE ROOT RELAXATION in BB\n" << std::endl;
solving_root_relaxation_ = false;
exploration_stats_.total_lp_iters = root_relax_soln_.iterations;
exploration_stats_.total_lp_solve_time = toc(exploration_stats_.start_time);
Expand Down Expand Up @@ -2102,10 +2103,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
}

assert(root_vstatus_.size() == original_lp_.num_cols);
std::cout << "\n SETTING UNINITIALIZED STEEPEST EDGE NORMS in BB\n" << std::endl;
set_uninitialized_steepest_edge_norms<i_t, f_t>(original_lp_, basic_list, edge_norms_);

root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);

std::cout << "\n UNCRUSHING PRIMAL AND DUAL SOLUTION in BB\n" << std::endl;
if (settings_.set_simplex_solution_callback != nullptr) {
std::vector<f_t> original_x;
uncrush_primal_solution(original_problem_, original_lp_, root_relax_soln_.x, original_x);
Expand All @@ -2117,6 +2120,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
root_relax_soln_.z,
original_dual,
original_z);
std::cout << "\n UNCRUSHING PRIMAL AND DUAL SOLUTION DONE in BB\n" << std::endl;
settings_.set_simplex_solution_callback(
original_x, original_dual, compute_user_objective(original_lp_, root_objective_));
}
Expand Down
1 change: 1 addition & 0 deletions cpp/src/mip_heuristics/diversity/diversity_manager.cu
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
{
std::lock_guard<std::mutex> guard(relaxed_solution_mutex);
if (!simplex_solution_exists.load()) {
std::cout << "\n NO SIMPLEXSOLUTION EXISTS \n"<< std::endl;
cuopt_assert(lp_result.get_primal_solution().size() == lp_optimal_solution.size(),
"LP optimal solution size mismatch");
cuopt_assert(lp_result.get_dual_solution().size() == lp_dual_optimal_solution.size(),
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/mip_heuristics/solver.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ struct branch_and_bound_solution_helper_t {
std::vector<f_t>& dual_solution,
f_t objective)
{
std::cout << "\n SETTING SIMPLEX SOLUTION \n" << std::endl;
dm->set_simplex_solution(solution, dual_solution, objective);
std::cout << "\n SETTING SIMPLEX SOLUTION DONE \n" << std::endl;

}

void node_processed_callback(const std::vector<f_t>& solution, f_t objective)
Expand Down
30 changes: 25 additions & 5 deletions cpp/src/pdlp/solve.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@

#include <rmm/cuda_stream.hpp>

#include <thread> // For std::thread
#include <exception>
#include <thread>

#define CUOPT_LOG_CONDITIONAL_INFO(condition, ...) \
if ((condition)) { CUOPT_LOG_INFO(__VA_ARGS__); }
Expand Down Expand Up @@ -1149,13 +1150,11 @@ optimization_problem_solution_t<i_t, f_t> run_concurrent(
auto barrier_handle = raft::handle_t(barrier_stream);
auto barrier_problem = dual_simplex_problem;
barrier_problem.handle_ptr = &barrier_handle;

run_barrier_thread<i_t, f_t>(std::ref(barrier_problem),
std::ref(settings_pdlp),
std::ref(sol_barrier_ptr),
std::ref(timer));
};

if (settings.num_gpus > 1) {
problem.handle_ptr->sync_stream();
raft::device_setter device_setter(1); // Scoped variable
Expand All @@ -1169,8 +1168,29 @@ optimization_problem_solution_t<i_t, f_t> run_concurrent(
if (settings.num_gpus > 1) {
CUOPT_LOG_DEBUG("PDLP device: %d", raft::device_setter::get_current_device());
}
// Run pdlp in the main thread
auto sol_pdlp = run_pdlp(problem, settings_pdlp, timer, is_batch_mode);

// Run pdlp in the main thread.
// Must join all spawned threads before leaving this scope, even on exception,
// because destroying a joinable std::thread calls std::terminate().
std::exception_ptr pdlp_exception;
optimization_problem_solution_t<i_t, f_t> sol_pdlp{pdlp_termination_status_t::NumericalError,
problem.handle_ptr->get_stream()};
try {
sol_pdlp = run_pdlp(problem, settings_pdlp, timer, is_batch_mode);
} catch (...) {
std::cout << "\n DEBUGGING: CAUGHT PDLP EXCEPTION \n" << std::endl;
pdlp_exception = std::current_exception();
*settings_pdlp.concurrent_halt = 1;
try {
std::rethrow_exception(pdlp_exception);
} catch (const std::exception& e) {
std::cout << "\n DEBUGGING: CAUGHT PDLP EXCEPTION RETHROW 1\n" << std::endl;
CUOPT_LOG_ERROR("PDLP exception in concurrent mode: %s", e.what());
} catch (...) {
std::cout << "\n DEBUGGING: CAUGHT PDLP EXCEPTION RETHROW 2\n" << std::endl;
}
}
Comment on lines +1175 to +1192
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

pdlp_exception is captured but never rethrown after thread cleanup.

The exception is captured in pdlp_exception and logged, but the variable is never used after line 1190. After threads join (lines 1193-1195), the function continues and returns sol_pdlp with NumericalError status without rethrowing.

If the intent (per PR description: "rethrowing exceptions") is to propagate the exception after ensuring threads are cleaned up, add a rethrow after the joins:

🛡️ Proposed fix to rethrow after thread cleanup
   barrier_thread.join();
+
+  // Rethrow captured exception after threads are safely joined
+  if (pdlp_exception) {
+    std::rethrow_exception(pdlp_exception);
+  }

   // copy the dual simplex solution to the device

If the current behavior (graceful degradation returning NumericalError status) is intentional, consider removing the unused pdlp_exception variable and directly logging within the catch block, or add a comment clarifying the exception is intentionally swallowed.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@cpp/src/pdlp/solve.cu` around lines 1175 - 1190, The code captures exceptions
into pdlp_exception in the run_pdlp try/catch but never rethrows it after thread
cleanup, causing swallowed errors and returning sol_pdlp with
pdlp_termination_status_t::NumericalError; fix by rethrowing pdlp_exception
after the concurrent threads are joined (i.e., after the thread-join/cleanup
section that follows this block) so the caller observes the original exception,
while keeping the existing settings_pdlp.concurrent_halt update and logging in
the catch; alternatively, if swallowing is intentional remove pdlp_exception and
add a clarifying comment—refer to pdlp_exception, run_pdlp,
settings_pdlp.concurrent_halt, and sol_pdlp when making the change.

std::cout << "\n DEBUGGING:AFTER TRY CATCH BLOCK \n" << std::endl;

// Wait for dual simplex thread to finish
if (!settings.inside_mip) { dual_simplex_thread.join(); }
Expand Down
Loading