From feead09d8259f9e05bf6276fee51c913d4be95b9 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Thu, 11 Sep 2025 17:49:43 +0200 Subject: [PATCH 01/46] WCNF parser --- cpmpy/tools/wcnf/__init__.py | 90 ++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 cpmpy/tools/wcnf/__init__.py diff --git a/cpmpy/tools/wcnf/__init__.py b/cpmpy/tools/wcnf/__init__.py new file mode 100644 index 000000000..3446f0906 --- /dev/null +++ b/cpmpy/tools/wcnf/__init__.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with WCNF-formatted CP models. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_wcnf +""" + + +import os +import lzma +import cpmpy as cp +from io import StringIO +from typing import Union + + +def _get_var(i, vars_dict): + """ + Returns CPMpy boolean decision variable matching to index `i` if exists, else creates a new decision variable. + + Arguments: + i: index + vars_dict (dict): dictionary to keep track of previously generated decision variables + """ + if i not in vars_dict: + vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables + return vars_dict[i] + + +def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model: + """ + Parser for WCNF format. Reads in an instance and returns its matching CPMpy model. + + Arguments: + wcnf (str or os.PathLike): A string containing a WCNF-formatted model, or a path to a file containing containing the same. + + Returns: + cp.Model: The CPMpy model of the WCNF instance. + """ + # If wcnf is a path to a file -> open file + if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf): + f_open = lzma.open if str(wcnf).endswith(".xz") else open + f = f_open(wcnf, "rt") + # If wcnf is a string containing a model -> create a memory-mapped file + else: + f = StringIO(wcnf) + + model = cp.Model() + vars = {} + soft_terms = [] + + for raw in f: + line = raw.strip() + + # Empty line or a comment -> skip + if not line or line.startswith("c"): + continue + + # Hard clause + if line[0] == "h": + literals = map(int, line[1:].split()) + clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) + for i in literals if i != 0] + model.add(cp.any(clause)) + + # Soft clause (weight first) + else: + parts = line.split() + weight = int(parts[0]) + literals = map(int, parts[1:]) + clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) + for i in literals if i != 0] + soft_terms.append(weight * cp.any(clause)) + + # Objective = sum of soft clause terms + if soft_terms: + model.maximize(sum(soft_terms)) + + return model \ No newline at end of file From 5ade48ec7a661123688a79979d262e591c44e21c Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Thu, 11 Sep 2025 18:02:32 +0200 Subject: [PATCH 02/46] Small docstring change --- cpmpy/tools/wcnf/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/wcnf/__init__.py b/cpmpy/tools/wcnf/__init__.py index 3446f0906..5ce83a146 100644 --- a/cpmpy/tools/wcnf/__init__.py +++ b/cpmpy/tools/wcnf/__init__.py @@ -43,7 +43,9 @@ def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model: Parser for WCNF format. Reads in an instance and returns its matching CPMpy model. Arguments: - wcnf (str or os.PathLike): A string containing a WCNF-formatted model, or a path to a file containing containing the same. + wcnf (str or os.PathLike): + - A file path to an WCNF file (optionally LZMA-compressed with `.xz`) + - OR a string containing the WCNF content directly Returns: cp.Model: The CPMpy model of the WCNF instance. From 7f52f5fc7694d6736c877889835ca4266db95109 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Thu, 11 Sep 2025 18:02:54 +0200 Subject: [PATCH 03/46] OPB parser --- cpmpy/tools/opb/__init__.py | 179 ++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 cpmpy/tools/opb/__init__.py diff --git a/cpmpy/tools/opb/__init__.py b/cpmpy/tools/opb/__init__.py new file mode 100644 index 000000000..963c021cb --- /dev/null +++ b/cpmpy/tools/opb/__init__.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with OPB-formatted CP models. + +Currently only the restricted OPB PB24 format is supported (without WBO). + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_opb +""" + + +import os +import re +import lzma +import cpmpy as cp +from io import StringIO +from typing import Union +from functools import reduce +from operator import mul + +# Regular expressions +HEADER_RE = re.compile(r'(.*)\s*#variable=\s*(\d+)\s*#constraint=\s*(\d+).*') +TERM_RE = re.compile(r"([+-]?\d+)((?:\s+~?x\d+)+)") +OBJ_TERM_RE = re.compile(r'^min:') +IND_TERM_RE = re.compile(r'([>=|<=|=]+)\s+([+-]?\d+)') +IND_TERM_RE = re.compile(r'(>=|<=|=)\s*([+-]?\d+)') + + +def _parse_term(line, vars): + """ + Parse a line containing OPB terms into a CPMpy expression. + + Supports: + - Linear terms (e.g., +2 x1) + - Non-linear terms (e.g., -1 x1 x14) + - Negated variables using '~' (e.g., ~x5) + + Arguments: + line (str): A string containing one or more terms. + vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. + + Returns: + cp.Expression: A CPMpy expression representing the sum of all parsed terms. + + Example: + >>> _parse_term("2 x2 x3 +3 x4 ~x5", vars) + sum([2, 3] * [(IV2*IV3), (IV4*~IV5)]) + """ + + terms = [] + for w, vars_str in TERM_RE.findall(line): + factors = [] + + for v in vars_str.split(): + if v.startswith("~x"): + idx = int(v[2:]) # remove "~x" + factors.append(~vars[idx]) + else: + idx = int(v[1:]) # remove "x" + factors.append(vars[idx]) + + term = int(w) * reduce(mul, factors, 1) # create weighted term + terms.append(term) + + return cp.sum(terms) + +def _parse_constraint(line, vars): + """ + Parse a single OPB constraint line into a CPMpy comparison expression. + + Arguments: + line (str): A string representing a single OPB constraint. + vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. Will be index to get the variables for the constraint. + + Returns: + cp.expressions.core.Comparison: A CPMpy comparison expression representing + the constraint. + + Example: + >>> _parse_constraint("-1 x1 x14 -1 x1 ~x17 >= -1", vars) + sum([-1, -1] * [(IV1*IV14), (IV1*~IV17)]) >= -1 + """ + + op, ind_term = IND_TERM_RE.search(line).groups() + lhs = _parse_term(line, vars) + + rhs = int(ind_term) if ind_term.lstrip("+-").isdigit() else vars[int(ind_term)] + + return cp.expressions.core.Comparison( + name="==" if op == "=" else ">=", + left=lhs, + right=rhs + ) + +def read_opb(opb: Union[str, os.PathLike]) -> cp.Model: + """ + Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model. + + Based on PyPBLib's example parser: https://hardlog.udl.cat/static/doc/pypblib/html/library/index.html#example-from-opb-to-cnf-file + + Supports: + - Linear and non-linear terms (e.g., -1 x1 x14 +2 x2) + - Negated variables using '~' (e.g., ~x5) + - Minimisation objective + - Comparison operators in constraints: '=', '>=' + + Arguments: + opb (str or os.PathLike): + - A file path to an OPB file (optionally LZMA-compressed with `.xz`) + - OR a string containing the OPB content directly + + Returns: + cp.Model: The CPMpy model of the OPB instance. + + Example: + >>> opb_text = ''' + ... * #variable= 5 #constraint= 2 #equal= 1 intsize= 64 #product= 5 sizeproduct= 13 + ... min: 2 x2 x3 +3 x4 ~x5 +2 ~x1 x2 +3 ~x1 x2 x3 ~x4 ~x5 ; + ... 2 x2 x3 -1 x1 ~x3 = 5 ; + ... ''' + >>> model = read_opb(opb_text) + >>> print(model) + Model(...) + + Notes: + - Comment lines starting with '*' are ignored. + - Only "min:" objectives are supported; "max:" is not recognized. + """ + + + # If opb is a path to a file -> open file + if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb): + f_open = lzma.open if str(opb).endswith(".xz") else open + f = f_open(opb, 'rt') + # If opb is a string containing a model -> create a memory-mapped file + else: + f = StringIO(opb) + + # Look for header on first line + line = f.readline() + header = HEADER_RE.match(line) + if not header: # If not found on first line, look on second (happens when passing multi line string) + _line = f.readline() + header = HEADER_RE.match(_line) + if not header: + raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...") + nr_vars = int(header.group(2)) + 1 + + # Generator without comment lines + reader = (l for l in map(str.strip, f) if l and l[0] != '*') + + # CPMpy objects + vars = cp.boolvar(shape=nr_vars, name="x") + model = cp.Model() + + # Special case for first line -> might contain objective function + first_line = next(reader) + if OBJ_TERM_RE.match(first_line): + obj_expr = _parse_term(first_line, vars) + model.minimize(obj_expr) + else: # no objective found, parse as a constraint instead + model.add(_parse_constraint(first_line, vars)) + + # Start parsing line by line + for line in reader: + model.add(_parse_constraint(line, vars)) + + return model \ No newline at end of file From 548de8e13dd30137fd84031ccafc66bdb9f85bf1 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 09:35:49 +0200 Subject: [PATCH 04/46] Move parser out of init and add cli --- cpmpy/tools/opb/__init__.py | 168 +------------------------- cpmpy/tools/opb/parser.py | 221 +++++++++++++++++++++++++++++++++++ cpmpy/tools/wcnf/__init__.py | 82 +------------ cpmpy/tools/wcnf/parser.py | 133 +++++++++++++++++++++ 4 files changed, 364 insertions(+), 240 deletions(-) create mode 100644 cpmpy/tools/opb/parser.py create mode 100644 cpmpy/tools/wcnf/parser.py diff --git a/cpmpy/tools/opb/__init__.py b/cpmpy/tools/opb/__init__.py index 963c021cb..ae751c7e7 100644 --- a/cpmpy/tools/opb/__init__.py +++ b/cpmpy/tools/opb/__init__.py @@ -8,172 +8,14 @@ Currently only the restricted OPB PB24 format is supported (without WBO). - -================= -List of functions -================= +================== +List of submodules +================== .. autosummary:: :nosignatures: - read_opb + parser """ - -import os -import re -import lzma -import cpmpy as cp -from io import StringIO -from typing import Union -from functools import reduce -from operator import mul - -# Regular expressions -HEADER_RE = re.compile(r'(.*)\s*#variable=\s*(\d+)\s*#constraint=\s*(\d+).*') -TERM_RE = re.compile(r"([+-]?\d+)((?:\s+~?x\d+)+)") -OBJ_TERM_RE = re.compile(r'^min:') -IND_TERM_RE = re.compile(r'([>=|<=|=]+)\s+([+-]?\d+)') -IND_TERM_RE = re.compile(r'(>=|<=|=)\s*([+-]?\d+)') - - -def _parse_term(line, vars): - """ - Parse a line containing OPB terms into a CPMpy expression. - - Supports: - - Linear terms (e.g., +2 x1) - - Non-linear terms (e.g., -1 x1 x14) - - Negated variables using '~' (e.g., ~x5) - - Arguments: - line (str): A string containing one or more terms. - vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. - - Returns: - cp.Expression: A CPMpy expression representing the sum of all parsed terms. - - Example: - >>> _parse_term("2 x2 x3 +3 x4 ~x5", vars) - sum([2, 3] * [(IV2*IV3), (IV4*~IV5)]) - """ - - terms = [] - for w, vars_str in TERM_RE.findall(line): - factors = [] - - for v in vars_str.split(): - if v.startswith("~x"): - idx = int(v[2:]) # remove "~x" - factors.append(~vars[idx]) - else: - idx = int(v[1:]) # remove "x" - factors.append(vars[idx]) - - term = int(w) * reduce(mul, factors, 1) # create weighted term - terms.append(term) - - return cp.sum(terms) - -def _parse_constraint(line, vars): - """ - Parse a single OPB constraint line into a CPMpy comparison expression. - - Arguments: - line (str): A string representing a single OPB constraint. - vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. Will be index to get the variables for the constraint. - - Returns: - cp.expressions.core.Comparison: A CPMpy comparison expression representing - the constraint. - - Example: - >>> _parse_constraint("-1 x1 x14 -1 x1 ~x17 >= -1", vars) - sum([-1, -1] * [(IV1*IV14), (IV1*~IV17)]) >= -1 - """ - - op, ind_term = IND_TERM_RE.search(line).groups() - lhs = _parse_term(line, vars) - - rhs = int(ind_term) if ind_term.lstrip("+-").isdigit() else vars[int(ind_term)] - - return cp.expressions.core.Comparison( - name="==" if op == "=" else ">=", - left=lhs, - right=rhs - ) - -def read_opb(opb: Union[str, os.PathLike]) -> cp.Model: - """ - Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model. - - Based on PyPBLib's example parser: https://hardlog.udl.cat/static/doc/pypblib/html/library/index.html#example-from-opb-to-cnf-file - - Supports: - - Linear and non-linear terms (e.g., -1 x1 x14 +2 x2) - - Negated variables using '~' (e.g., ~x5) - - Minimisation objective - - Comparison operators in constraints: '=', '>=' - - Arguments: - opb (str or os.PathLike): - - A file path to an OPB file (optionally LZMA-compressed with `.xz`) - - OR a string containing the OPB content directly - - Returns: - cp.Model: The CPMpy model of the OPB instance. - - Example: - >>> opb_text = ''' - ... * #variable= 5 #constraint= 2 #equal= 1 intsize= 64 #product= 5 sizeproduct= 13 - ... min: 2 x2 x3 +3 x4 ~x5 +2 ~x1 x2 +3 ~x1 x2 x3 ~x4 ~x5 ; - ... 2 x2 x3 -1 x1 ~x3 = 5 ; - ... ''' - >>> model = read_opb(opb_text) - >>> print(model) - Model(...) - - Notes: - - Comment lines starting with '*' are ignored. - - Only "min:" objectives are supported; "max:" is not recognized. - """ - - - # If opb is a path to a file -> open file - if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb): - f_open = lzma.open if str(opb).endswith(".xz") else open - f = f_open(opb, 'rt') - # If opb is a string containing a model -> create a memory-mapped file - else: - f = StringIO(opb) - - # Look for header on first line - line = f.readline() - header = HEADER_RE.match(line) - if not header: # If not found on first line, look on second (happens when passing multi line string) - _line = f.readline() - header = HEADER_RE.match(_line) - if not header: - raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...") - nr_vars = int(header.group(2)) + 1 - - # Generator without comment lines - reader = (l for l in map(str.strip, f) if l and l[0] != '*') - - # CPMpy objects - vars = cp.boolvar(shape=nr_vars, name="x") - model = cp.Model() - - # Special case for first line -> might contain objective function - first_line = next(reader) - if OBJ_TERM_RE.match(first_line): - obj_expr = _parse_term(first_line, vars) - model.minimize(obj_expr) - else: # no objective found, parse as a constraint instead - model.add(_parse_constraint(first_line, vars)) - - # Start parsing line by line - for line in reader: - model.add(_parse_constraint(line, vars)) - - return model \ No newline at end of file +from .parser import read_opb diff --git a/cpmpy/tools/opb/parser.py b/cpmpy/tools/opb/parser.py new file mode 100644 index 000000000..846c0874b --- /dev/null +++ b/cpmpy/tools/opb/parser.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +OPB parser. + +Currently only the restricted OPB PB24 format is supported (without WBO). + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_opb +""" + + +import os +import re +import sys +import lzma +import argparse +import cpmpy as cp +from io import StringIO +from typing import Union +from functools import reduce +from operator import mul + +# Regular expressions +HEADER_RE = re.compile(r'(.*)\s*#variable=\s*(\d+)\s*#constraint=\s*(\d+).*') +TERM_RE = re.compile(r"([+-]?\d+)((?:\s+~?x\d+)+)") +OBJ_TERM_RE = re.compile(r'^min:') +IND_TERM_RE = re.compile(r'([>=|<=|=]+)\s+([+-]?\d+)') +IND_TERM_RE = re.compile(r'(>=|<=|=)\s*([+-]?\d+)') + + +def _parse_term(line, vars): + """ + Parse a line containing OPB terms into a CPMpy expression. + + Supports: + - Linear terms (e.g., +2 x1) + - Non-linear terms (e.g., -1 x1 x14) + - Negated variables using '~' (e.g., ~x5) + + Arguments: + line (str): A string containing one or more terms. + vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. + + Returns: + cp.Expression: A CPMpy expression representing the sum of all parsed terms. + + Example: + >>> _parse_term("2 x2 x3 +3 x4 ~x5", vars) + sum([2, 3] * [(IV2*IV3), (IV4*~IV5)]) + """ + + terms = [] + for w, vars_str in TERM_RE.findall(line): + factors = [] + + for v in vars_str.split(): + if v.startswith("~x"): + idx = int(v[2:]) # remove "~x" + factors.append(~vars[idx]) + else: + idx = int(v[1:]) # remove "x" + factors.append(vars[idx]) + + term = int(w) * reduce(mul, factors, 1) # create weighted term + terms.append(term) + + return cp.sum(terms) + +def _parse_constraint(line, vars): + """ + Parse a single OPB constraint line into a CPMpy comparison expression. + + Arguments: + line (str): A string representing a single OPB constraint. + vars (list[cp.boolvar]): List or array of CPMpy Boolean variables. Will be index to get the variables for the constraint. + + Returns: + cp.expressions.core.Comparison: A CPMpy comparison expression representing + the constraint. + + Example: + >>> _parse_constraint("-1 x1 x14 -1 x1 ~x17 >= -1", vars) + sum([-1, -1] * [(IV1*IV14), (IV1*~IV17)]) >= -1 + """ + + op, ind_term = IND_TERM_RE.search(line).groups() + lhs = _parse_term(line, vars) + + rhs = int(ind_term) if ind_term.lstrip("+-").isdigit() else vars[int(ind_term)] + + return cp.expressions.core.Comparison( + name="==" if op == "=" else ">=", + left=lhs, + right=rhs + ) + +def read_opb(opb: Union[str, os.PathLike]) -> cp.Model: + """ + Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model. + + Based on PyPBLib's example parser: https://hardlog.udl.cat/static/doc/pypblib/html/library/index.html#example-from-opb-to-cnf-file + + Supports: + - Linear and non-linear terms (e.g., -1 x1 x14 +2 x2) + - Negated variables using '~' (e.g., ~x5) + - Minimisation objective + - Comparison operators in constraints: '=', '>=' + + Arguments: + opb (str or os.PathLike): + - A file path to an OPB file (optionally LZMA-compressed with `.xz`) + - OR a string containing the OPB content directly + + Returns: + cp.Model: The CPMpy model of the OPB instance. + + Example: + >>> opb_text = ''' + ... * #variable= 5 #constraint= 2 #equal= 1 intsize= 64 #product= 5 sizeproduct= 13 + ... min: 2 x2 x3 +3 x4 ~x5 +2 ~x1 x2 +3 ~x1 x2 x3 ~x4 ~x5 ; + ... 2 x2 x3 -1 x1 ~x3 = 5 ; + ... ''' + >>> model = read_opb(opb_text) + >>> print(model) + Model(...) + + Notes: + - Comment lines starting with '*' are ignored. + - Only "min:" objectives are supported; "max:" is not recognized. + """ + + + # If opb is a path to a file -> open file + if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb): + f_open = lzma.open if str(opb).endswith(".xz") else open + f = f_open(opb, 'rt') + # If opb is a string containing a model -> create a memory-mapped file + else: + f = StringIO(opb) + + # Look for header on first line + line = f.readline() + header = HEADER_RE.match(line) + if not header: # If not found on first line, look on second (happens when passing multi line string) + _line = f.readline() + header = HEADER_RE.match(_line) + if not header: + raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...") + nr_vars = int(header.group(2)) + 1 + + # Generator without comment lines + reader = (l for l in map(str.strip, f) if l and l[0] != '*') + + # CPMpy objects + vars = cp.boolvar(shape=nr_vars, name="x") + model = cp.Model() + + # Special case for first line -> might contain objective function + first_line = next(reader) + if OBJ_TERM_RE.match(first_line): + obj_expr = _parse_term(first_line, vars) + model.minimize(obj_expr) + else: # no objective found, parse as a constraint instead + model.add(_parse_constraint(first_line, vars)) + + # Start parsing line by line + for line in reader: + model.add(_parse_constraint(line, vars)) + + return model + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve an OPB model using CPMpy") + parser.add_argument("model", help="Path to an OPB file (or raw OPB string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw OPB string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_opb(args.model) + else: + model = read_opb(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() diff --git a/cpmpy/tools/wcnf/__init__.py b/cpmpy/tools/wcnf/__init__.py index 5ce83a146..e2db10412 100644 --- a/cpmpy/tools/wcnf/__init__.py +++ b/cpmpy/tools/wcnf/__init__.py @@ -7,86 +7,14 @@ Set of utilities for working with WCNF-formatted CP models. -================= -List of functions -================= +================== +List of submodules +================== .. autosummary:: :nosignatures: - read_wcnf + parser """ - -import os -import lzma -import cpmpy as cp -from io import StringIO -from typing import Union - - -def _get_var(i, vars_dict): - """ - Returns CPMpy boolean decision variable matching to index `i` if exists, else creates a new decision variable. - - Arguments: - i: index - vars_dict (dict): dictionary to keep track of previously generated decision variables - """ - if i not in vars_dict: - vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables - return vars_dict[i] - - -def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model: - """ - Parser for WCNF format. Reads in an instance and returns its matching CPMpy model. - - Arguments: - wcnf (str or os.PathLike): - - A file path to an WCNF file (optionally LZMA-compressed with `.xz`) - - OR a string containing the WCNF content directly - - Returns: - cp.Model: The CPMpy model of the WCNF instance. - """ - # If wcnf is a path to a file -> open file - if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf): - f_open = lzma.open if str(wcnf).endswith(".xz") else open - f = f_open(wcnf, "rt") - # If wcnf is a string containing a model -> create a memory-mapped file - else: - f = StringIO(wcnf) - - model = cp.Model() - vars = {} - soft_terms = [] - - for raw in f: - line = raw.strip() - - # Empty line or a comment -> skip - if not line or line.startswith("c"): - continue - - # Hard clause - if line[0] == "h": - literals = map(int, line[1:].split()) - clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) - for i in literals if i != 0] - model.add(cp.any(clause)) - - # Soft clause (weight first) - else: - parts = line.split() - weight = int(parts[0]) - literals = map(int, parts[1:]) - clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) - for i in literals if i != 0] - soft_terms.append(weight * cp.any(clause)) - - # Objective = sum of soft clause terms - if soft_terms: - model.maximize(sum(soft_terms)) - - return model \ No newline at end of file +from .parser import read_wcnf diff --git a/cpmpy/tools/wcnf/parser.py b/cpmpy/tools/wcnf/parser.py new file mode 100644 index 000000000..72cec94c8 --- /dev/null +++ b/cpmpy/tools/wcnf/parser.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Parser for the WCNF format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_wcnf +""" + + +import os +import sys +import lzma +import argparse +import cpmpy as cp +from io import StringIO +from typing import Union + + +def _get_var(i, vars_dict): + """ + Returns CPMpy boolean decision variable matching to index `i` if exists, else creates a new decision variable. + + Arguments: + i: index + vars_dict (dict): dictionary to keep track of previously generated decision variables + """ + if i not in vars_dict: + vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables + return vars_dict[i] + + +def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model: + """ + Parser for WCNF format. Reads in an instance and returns its matching CPMpy model. + + Arguments: + wcnf (str or os.PathLike): + - A file path to an WCNF file (optionally LZMA-compressed with `.xz`) + - OR a string containing the WCNF content directly + + Returns: + cp.Model: The CPMpy model of the WCNF instance. + """ + # If wcnf is a path to a file -> open file + if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf): + f_open = lzma.open if str(wcnf).endswith(".xz") else open + f = f_open(wcnf, "rt") + # If wcnf is a string containing a model -> create a memory-mapped file + else: + f = StringIO(wcnf) + + model = cp.Model() + vars = {} + soft_terms = [] + + for raw in f: + line = raw.strip() + + # Empty line or a comment -> skip + if not line or line.startswith("c"): + continue + + # Hard clause + if line[0] == "h": + literals = map(int, line[1:].split()) + clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) + for i in literals if i != 0] + model.add(cp.any(clause)) + + # Soft clause (weight first) + else: + parts = line.split() + weight = int(parts[0]) + literals = map(int, parts[1:]) + clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars) + for i in literals if i != 0] + soft_terms.append(weight * cp.any(clause)) + + # Objective = sum of soft clause terms + if soft_terms: + model.maximize(sum(soft_terms)) + + return model + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a WCNF model using CPMpy") + parser.add_argument("model", help="Path to a WCNF file (or raw WCNF string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw WCNF string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_wcnf(args.model) + else: + model = read_wcnf(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() \ No newline at end of file From 450502570a36a8958610b36554dd2ced0f0814e7 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 11:53:31 +0200 Subject: [PATCH 05/46] Add MSE and OPB datasets --- cpmpy/tools/datasets/_base.py | 85 +++++++++++++++++++ cpmpy/tools/datasets/model/mse.py | 104 +++++++++++++++++++++++ cpmpy/tools/datasets/model/opb.py | 135 ++++++++++++++++++++++++++++++ 3 files changed, 324 insertions(+) create mode 100644 cpmpy/tools/datasets/_base.py create mode 100644 cpmpy/tools/datasets/model/mse.py create mode 100644 cpmpy/tools/datasets/model/opb.py diff --git a/cpmpy/tools/datasets/_base.py b/cpmpy/tools/datasets/_base.py new file mode 100644 index 000000000..3c5338489 --- /dev/null +++ b/cpmpy/tools/datasets/_base.py @@ -0,0 +1,85 @@ +""" +Dataset Base Class + +This module defines the abstract `_Dataset` class, which serves as the foundation +for loading and managing benchmark instance collections in CPMpy-based experiments. +It standardizes how datasets are stored, accessed, and optionally transformed. +""" + +from abc import ABC, abstractmethod +import pathlib +from typing import Any, Tuple + +class _Dataset(ABC): + """ + Abstract base class for PyTorch-style datasets of benchmarking instances. + + The `_Dataset` class provides a standardized interface for downloading and + accessing benchmark instances. This class should not be used on its own. + """ + + def __init__( + self, + dataset_dir: str = ".", + transform=None, target_transform=None, + download: bool = False, + extension:str=".txt", + **kwargs + ): + self.dataset_dir = pathlib.Path(dataset_dir) + self.transform = transform + self.target_transform = target_transform + self.extension = extension + + if not self.dataset_dir.exists(): + if not download: + raise ValueError(f"Dataset not found. Please set download=True to download the dataset.") + else: + self.download() + + @abstractmethod + def category(self): + pass + + @abstractmethod + def download(self, *args, **kwargs): + pass + + def metadata(self, file): + metadata = self.category() | { + 'name': pathlib.Path(file).stem.replace(self.extension, ''), + 'path': file, + } + return metadata + + def __len__(self) -> int: + """Return the total number of instances.""" + return len(list(self.dataset_dir.glob(f"*{self.extension}"))) + + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + + if index < 0 or index >= len(self): + raise IndexError("Index out of range") + + # Get all compressed XML files and sort for deterministic behavior + files = sorted(list(self.dataset_dir.glob(f"*{self.extension}"))) + file_path = files[index] + + filename = str(file_path) + if self.transform: + # does not need to remain a filename... + filename = self.transform(filename) + + # Basic metadata about the instance + metadata = self.metadata(file=filename, ) + if self.target_transform: + metadata = self.target_transform(metadata) + + return filename, metadata + + + + + + diff --git a/cpmpy/tools/datasets/model/mse.py b/cpmpy/tools/datasets/model/mse.py new file mode 100644 index 000000000..a749d75d0 --- /dev/null +++ b/cpmpy/tools/datasets/model/mse.py @@ -0,0 +1,104 @@ +""" +MaxSAT Evaluation (MSE) Dataset + +https://maxsat-evaluations.github.io/ +""" + +import zipfile +import pathlib +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError + +from .._base import _Dataset + + +class MSEDataset(_Dataset): # torch.utils.data.Dataset compatible + """ + MaxSAT Evaluation (MSE) benchmark dataset. + + Provides access to benchmark instances from the MaxSAT Evaluation + competitions. Instances are grouped by `year` and `track` (e.g., + `"exact-unweighted"`, `"exact-weighted"`) and stored as `.wcnf.xz` files. + If the dataset is not available locally, it can be automatically + downloaded and extracted. + + More information on the competition can be found here: https://maxsat-evaluations.github.io/ + """ + + def __init__( + self, + root: str = ".", + year: int = 2024, track: str = "exact-unweighted", + transform=None, target_transform=None, + download: bool = False + ): + """ + Constructor for a dataset object of the MSE competition. + + Arguments: + root (str): Root directory where datasets are stored or will be downloaded to (default="."). + year (int): Competition year of the dataset to use (default=2024). + track (str): Track name specifying which subset of the competition instances to load (default="exact-unweighted"). + transform (callable, optional): Optional transform applied to the instance file path. + target_transform (callable, optional): Optional transform applied to the metadata dictionary. + download (bool): If True, downloads the dataset if it does not exist locally (default=False). + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested year/track combination is not available. + """ + + self.root = pathlib.Path(root) + self.year = year + self.track = track + + dataset_dir = self.root / str(year) / track + + super().__init__( + dataset_dir=dataset_dir, + transform=transform, target_transform=target_transform, + download=download, extension=".wcnf.xz" + ) + + + def category(self): + return { + "year": self.year, + "track": self.track + } + + + def download(self): + print(f"Downloading MaxSAT Eval {self.year} {self.track} instances...") + + zip_name = f"mse{str(self.year)[2:]}-{self.track}.zip" + url = f"https://www.cs.helsinki.fi/group/coreo/MSE{self.year}-instances/" + + url_path = url + zip_name + zip_path = self.root / zip_name + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for year {self.year} and track {self.track}. Error: {str(e)}") + + # Extract only the specific track folder from the tar + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Create track folder in root directory, parents=True ensures recursive creation + self.dataset_dir.mkdir(parents=True, exist_ok=True) + + # Extract files + for file_info in zip_ref.infolist(): + # Extract file to family_dir, removing main_folder/track prefix + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.dataset_dir / filename, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + +if __name__ == "__main__": + dataset = MSEDataset(year=2024, track="exact-weighted", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) diff --git a/cpmpy/tools/datasets/model/opb.py b/cpmpy/tools/datasets/model/opb.py new file mode 100644 index 000000000..d3602954c --- /dev/null +++ b/cpmpy/tools/datasets/model/opb.py @@ -0,0 +1,135 @@ +""" +Pseudo Boolean Competition (PB) Dataset + +https://www.cril.univ-artois.fr/PB25/ +""" + +import os +import pathlib +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError +import tarfile + +from .._base import _Dataset + + +class OPBDataset(_Dataset): + """ + Pseudo Boolean Competition (PB) benchmark dataset. + + Provides access to benchmark instances from the Pseudo Boolean + competitions. Instances are grouped by `year` and `track` (e.g., + `"OPT-LIN"`, `"DEC-LIN"`) and stored as `.opb.xz` files. + If the dataset is not available locally, it can be automatically + downloaded and extracted. + + More information on the competition can be found here: https://www.cril.univ-artois.fr/PB25/ + """ + + def __init__( + self, + root: str = ".", + year: int = 2024, track: str = "OPT-LIN", + transform=None, target_transform=None, + download: bool = False + ): + """ + Constructor for a dataset object of the PB competition. + + Arguments: + root (str): Root directory where datasets are stored or will be downloaded to (default="."). + year (int): Competition year of the dataset to use (default=2024). + track (str): Track name specifying which subset of the competition instances to load (default="OPT-LIN"). + transform (callable, optional): Optional transform applied to the instance file path. + target_transform (callable, optional): Optional transform applied to the metadata dictionary. + download (bool): If True, downloads the dataset if it does not exist locally (default=False). + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested year/track combination is not available. + """ + + self.root = pathlib.Path(root) + self.year = year + self.track = track + + dataset_dir = self.root / str(year) / track + + super().__init__( + dataset_dir=dataset_dir, + transform=transform, target_transform=target_transform, + download=download, extension=".opb.xz" + ) + + def category(self): + return { + "year": self.year, + "track": self.track + } + + def metadata(self, file): + return super().metadata(file) | {'author': str(file).split(os.sep)[-1].split("_")[0],} + + + def download(self): + # TODO: add option to filter on competition instances + print(f"Downloading OPB {self.year} {self.track} instances...") + url = f"https://www.cril.univ-artois.fr/PB24/benchs/" + year_suffix = str(self.year)[2:] # Drop the starting '20' + url_path = url + f"normalized-PB{year_suffix}.tar" + tar_path = self.root / f"normalized-extraPB{year_suffix}.tar" + + try: + urlretrieve(url_path, str(tar_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for year {self.year}. Error: {str(e)}") + + # Extract only the specific track folder from the tar + with tarfile.open(tar_path, "r:*") as tar_ref: # r:* handles .tar, .tar.gz, .tar.bz2, etc. + # Get the main folder name + main_folder = None + for name in tar_ref.getnames(): + if "/" in name: + main_folder = name.split("/")[0] + break + + if main_folder is None: + raise ValueError(f"Could not find main folder in tar file") + + # Extract only files from the specified track + # Get all unique track names from tar + tracks = set() + for member in tar_ref.getmembers(): + parts = member.name.split("/") + if len(parts) > 2 and parts[0] == main_folder: + tracks.add(parts[1]) + + # Check if requested track exists + if self.track not in tracks: + raise ValueError(f"Track '{self.track}' not found in dataset. Available tracks: {sorted(tracks)}") + + # Create track folder in root directory + self.dataset_dir.mkdir(parents=True, exist_ok=True) + + # Extract files for the specified track + prefix = f"{main_folder}/{self.track}/" + for member in tar_ref.getmembers(): + if member.name.startswith(prefix) and member.isfile(): + # Path relative to main_folder/track + relative_path = member.name[len(prefix):] + + # Flatten: replace "/" with "_" to encode subfolders (some instances have clashing names) + flat_name = relative_path.replace("/", "_") + target_path = self.dataset_dir / flat_name + + with tar_ref.extractfile(member) as source, open(target_path, "wb") as target: + target.write(source.read()) + + # Clean up the tar file + tar_path.unlink() + +if __name__ == "__main__": + dataset = OPBDataset(year=2024, track="DEC-LIN", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) From 2b26034cc3e8ecadde39b1b16ba8884b59155673 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 11:55:12 +0200 Subject: [PATCH 06/46] Rename datasets to dataset --- cpmpy/tools/{datasets => dataset}/_base.py | 0 cpmpy/tools/{datasets => dataset}/model/mse.py | 0 cpmpy/tools/{datasets => dataset}/model/opb.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename cpmpy/tools/{datasets => dataset}/_base.py (100%) rename cpmpy/tools/{datasets => dataset}/model/mse.py (100%) rename cpmpy/tools/{datasets => dataset}/model/opb.py (100%) diff --git a/cpmpy/tools/datasets/_base.py b/cpmpy/tools/dataset/_base.py similarity index 100% rename from cpmpy/tools/datasets/_base.py rename to cpmpy/tools/dataset/_base.py diff --git a/cpmpy/tools/datasets/model/mse.py b/cpmpy/tools/dataset/model/mse.py similarity index 100% rename from cpmpy/tools/datasets/model/mse.py rename to cpmpy/tools/dataset/model/mse.py diff --git a/cpmpy/tools/datasets/model/opb.py b/cpmpy/tools/dataset/model/opb.py similarity index 100% rename from cpmpy/tools/datasets/model/opb.py rename to cpmpy/tools/dataset/model/opb.py From e238c2934c3f5127f3c0e6ab8766a25814941fce Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 13:45:48 +0200 Subject: [PATCH 07/46] Dataset specific 'open' --- cpmpy/tools/dataset/_base.py | 4 ++++ cpmpy/tools/dataset/model/mse.py | 5 ++++- cpmpy/tools/dataset/model/opb.py | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py index 3c5338489..ce2206110 100644 --- a/cpmpy/tools/dataset/_base.py +++ b/cpmpy/tools/dataset/_base.py @@ -45,6 +45,10 @@ def category(self): def download(self, *args, **kwargs): pass + @abstractmethod + def open(self, instance): + pass + def metadata(self, file): metadata = self.category() | { 'name': pathlib.Path(file).stem.replace(self.extension, ''), diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py index a749d75d0..84e8c5dfa 100644 --- a/cpmpy/tools/dataset/model/mse.py +++ b/cpmpy/tools/dataset/model/mse.py @@ -4,6 +4,8 @@ https://maxsat-evaluations.github.io/ """ +import lzma +import os import zipfile import pathlib from urllib.request import urlretrieve @@ -11,7 +13,6 @@ from .._base import _Dataset - class MSEDataset(_Dataset): # torch.utils.data.Dataset compatible """ MaxSAT Evaluation (MSE) benchmark dataset. @@ -97,6 +98,8 @@ def download(self): # Clean up the zip file zip_path.unlink() + def open(self, instance: os.PathLike) -> callable: + return lzma.open if str(instance).endswith(".xz") else open if __name__ == "__main__": dataset = MSEDataset(year=2024, track="exact-weighted", download=True) diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py index d3602954c..bc051d784 100644 --- a/cpmpy/tools/dataset/model/opb.py +++ b/cpmpy/tools/dataset/model/opb.py @@ -4,6 +4,7 @@ https://www.cril.univ-artois.fr/PB25/ """ +import lzma import os import pathlib from urllib.request import urlretrieve @@ -129,6 +130,9 @@ def download(self): # Clean up the tar file tar_path.unlink() + def open(self, instance: os.PathLike) -> callable: + return lzma.open if str(instance).endswith(".xz") else open + if __name__ == "__main__": dataset = OPBDataset(year=2024, track="DEC-LIN", download=True) print("Dataset size:", len(dataset)) From 669875acbcfc5fa43a1ab139c92fcd9b4c5badf1 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 13:46:22 +0200 Subject: [PATCH 08/46] Dataset module init file --- cpmpy/tools/dataset/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 cpmpy/tools/dataset/__init__.py diff --git a/cpmpy/tools/dataset/__init__.py b/cpmpy/tools/dataset/__init__.py new file mode 100644 index 000000000..e69de29bb From c1bd2fef45bbb4a39413794fbfcfbd551fe54db2 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 13:48:36 +0200 Subject: [PATCH 09/46] Add benchmark runners --- cpmpy/tools/benchmark/__init__.py | 0 cpmpy/tools/benchmark/_base.py | 496 ++++++++++++++++++++++++++++++ cpmpy/tools/benchmark/mse.py | 205 ++++++++++++ cpmpy/tools/benchmark/opb.py | 197 ++++++++++++ cpmpy/tools/benchmark/runner.py | 287 +++++++++++++++++ 5 files changed, 1185 insertions(+) create mode 100644 cpmpy/tools/benchmark/__init__.py create mode 100644 cpmpy/tools/benchmark/_base.py create mode 100644 cpmpy/tools/benchmark/mse.py create mode 100644 cpmpy/tools/benchmark/opb.py create mode 100644 cpmpy/tools/benchmark/runner.py diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py new file mode 100644 index 000000000..85119a822 --- /dev/null +++ b/cpmpy/tools/benchmark/_base.py @@ -0,0 +1,496 @@ +""" +Benchmark framework for CPMpy models. + +This module provides the `Benchmark` base class, designed to run constraint programming +benchmarks in a structured fashion. It allows reading instances, posting them to different +back-end solvers, and handling solver execution with limits on time and memory. +It also provides hooks for customizing logging, intermediate solution printing, and +error handling. Although this base class can be used on its own (example below), +users will most likely want to have a look at one of its subclasses for running a specific +benchmark dataset, e.g. xcsp3, opb, mse, ... + + +Usage Example +------------- +>>> from myparser import read_instance # your custom model parser (or one included in CPMpy) +>>> bm = Benchmark(reader=read_instance) +>>> bm.run( +... instance="example.extension", # your benchmark instance (e.g. coming from a CPMpy model dataset) +... solver="ortools", +... time_limit=30, +... mem_limit=1024, +... verbose=True +... ) +Status: OPTIMAL +Objective: 42 +Solution: ... + +""" + + +from abc import ABC + +import time +import random +import psutil +import warnings +from typing import Optional + +import cpmpy as cp +from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb + + +class Benchmark(ABC): + """ + Abstract base class for running CPMpy benchmarks. + + The `Benchmark` class provides a standardized framework for reading instances, + posting models to solvers, and managing solver runs with resource limits. + It is designed to be extended or customized for specific benchmarking needs. + """ + + def __init__(self, reader:callable): + """ + Arguments: + reader (callable): A parser from a model format to a CPMPy model. + """ + self.reader = reader + + def read_instance(self, instance) -> cp.Model: + """ + Parse a model instance to a CPMpy model. + + Arguments: + instance (str or os.PathLike): The model instance to parse into a CPMpy model. + """ + return self.reader(instance) + + """ + Callback methods which can be overwritten to make a custom benchmark run. + """ + + def print_comment(self, comment:str): + print(comment) + + def print_intermediate(self, objective:int): + print("Intermediate solution:", objective) + + def print_result(self, s): + self.print_comment(s.status()) + + def handle_memory_error(self, mem_limit): + self.print_comment(f"MemoryError raised. Reached limit of {mem_limit} MiB") + + def handle_not_implemented(self, e): + self.print_comment(str(e)) + + def handle_exception(self, e): + self.print_comment(f"An {type(e)} got raised: {e}") + import traceback + self.print_comment("Stack trace:") + for line in traceback.format_exc().split('\n'): + if line.strip(): + self.print_comment(line) + + """ + Solver arguments (can also be tweaked for a specific benchmark). + """ + + def ortools_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + # https://github.com/google/or-tools/blob/stable/ortools/sat/sat_parameters.proto + res = dict() + + # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688 + res |= { + "interleave_search": True, + "use_rins_lns": False, + } + if not model.has_objective(): + res |= { "num_violation_ls": 1 } + + if cores is not None: + res |= { "num_search_workers": cores } + if seed is not None: + res |= { "random_seed": seed } + + if intermediate and model.has_objective(): + # Define custom ORT solution callback, then register it + _self = self + from ortools.sat.python import cp_model as ort + class OrtSolutionCallback(ort.CpSolverSolutionCallback): + """ + For intermediate objective printing. + """ + + def __init__(self): + super().__init__() + self.__start_time = time.time() + self.__solution_count = 1 + + def on_solution_callback(self): + """Called on each new solution.""" + + current_time = time.time() + obj = int(self.ObjectiveValue()) + _self.print_comment('Solution %i, time = %0.2fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count += 1 + + + def solution_count(self): + """Returns the number of solutions found.""" + return self.__solution_count + + # Register the callback + res |= { "solution_callback": OrtSolutionCallback() } + + def internal_options(solver: "CPM_ortools"): + # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688 + solver.ort_solver.parameters.subsolvers.extend(["default_lp", "max_lp", "quick_restart"]) + if not model.has_objective(): + solver.ort_solver.parameters.subsolvers.append("core_or_no_lp") + if len(solver.ort_model.proto.search_strategy) != 0: + solver.ort_solver.parameters.subsolvers.append("fixed") + + return res, internal_options + + def exact_arguments( + self, + seed: Optional[int] = None, + **kwargs + ): + # Documentation: https://gitlab.com/JoD/exact/-/blob/main/src/Options.hpp?ref_type=heads + res = dict() + if seed is not None: + res |= { "seed": seed } + + return res, None + + def choco_arguments(): + # Documentation: https://github.com/chocoteam/pychoco/blob/master/pychoco/solver.py + return {}, None + + def z3_arguments( + self, + model: cp.Model, + cores: int = 1, + seed: Optional[int] = None, + mem_limit: Optional[int] = None, + **kwargs + ): + # Documentation: https://microsoft.github.io/z3guide/programming/Parameters/ + # -> is outdated, just let it crash and z3 will report the available options + + res = dict() + + if model.has_objective(): + # Opt does not seem to support setting random seed or max memory + pass + else: + # Sat parameters + if cores is not None: + res |= { "threads": cores } # TODO what with hyperthreadding, when more threads than cores + if seed is not None: + res |= { "random_seed": seed } + if mem_limit is not None: + res |= { "max_memory": _bytes_as_mb(mem_limit) } + + return res, None + + def minizinc_arguments( + self, + solver: str, + cores: Optional[int] = None, + seed: Optional[int] = None, + **kwargs + ): + # Documentation: https://minizinc-python.readthedocs.io/en/latest/api.html#minizinc.instance.Instance.solve + res = dict() + if cores is not None: + res |= { "processes": cores } + if seed is not None: + res |= { "random_seed": seed } + + #if solver.endswith("gecode"): + # Documentation: https://www.minizinc.org/doc-2.4.3/en/lib-gecode.html + #elif solver.endswith("chuffed"): + # Documentation: + # - https://www.minizinc.org/doc-2.5.5/en/lib-chuffed.html + # - https://github.com/chuffed/chuffed/blob/develop/chuffed/core/options.h + + return res, None + + def gurobi_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + mem_limit: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + # Documentation: https://www.gurobi.com/documentation/9.5/refman/parameters.html#sec:Parameters + res = dict() + if cores is not None: + res |= { "Threads": cores } + if seed is not None: + res |= { "Seed": seed } + if mem_limit is not None: + res |= { "MemLimit": _bytes_as_gb(mem_limit) } + + if intermediate and model.has_objective(): + + _self = self + + class GurobiSolutionCallback: + def __init__(self, model:cp.Model): + self.__start_time = time.time() + self.__solution_count = 0 + self.model = model + + def callback(self, *args, **kwargs): + current_time = time.time() + model, state = args + + # Callback codes: https://www.gurobi.com/documentation/current/refman/cb_codes.html#sec:CallbackCodes + + from gurobipy import GRB + # if state == GRB.Callback.MESSAGE: # verbose logging + # print_comment("log message: " + str(model.cbGet(GRB.Callback.MSG_STRING))) + if state == GRB.Callback.MIP: # callback from the MIP solver + if model.cbGet(GRB.Callback.MIP_SOLCNT) > self.__solution_count: # do we have a new solution? + + obj = int(model.cbGet(GRB.Callback.MIP_OBJBST)) + _self.print_comment('Solution %i, time = %0.2fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count = model.cbGet(GRB.Callback.MIP_SOLCNT) + + res |= { "solution_callback": GurobiSolutionCallback(model).callback } + + return res, None + + def cpo_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + # Documentation: https://ibmdecisionoptimization.github.io/docplex-doc/cp/docplex.cp.parameters.py.html#docplex.cp.parameters.CpoParameters + res = dict() + if cores is not None: + res |= { "Workers": cores } + if seed is not None: + res |= { "RandomSeed": seed } + + if intermediate and model.has_objective(): + from docplex.cp.solver.solver_listener import CpoSolverListener + _self = self + class CpoSolutionCallback(CpoSolverListener): + + def __init__(self): + super().__init__() + self.__start_time = time.time() + self.__solution_count = 1 + + def result_found(self, solver, sres): + current_time = time.time() + obj = sres.get_objective_value() + if obj is not None: + _self.print_comment('Solution %i, time = %0.2fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count += 1 + + def solution_count(self): + """Returns the number of solutions found.""" + return self.__solution_count + + # Register the callback + res |= { "solution_callback": CpoSolutionCallback } + + return res, None + + + """ + Methods which can, bit most likely shouldn't, be overwritten. + """ + + def set_memory_limit(self, mem_limit, verbose=False): + set_memory_limit(mem_limit, verbose=verbose) + + def set_time_limit(self, time_limit, verbose=False): + p = psutil.Process() + if time_limit is not None: + set_time_limit(int(time_limit - _wall_time(p) + time.process_time()), verbose=verbose) + else: + set_time_limit(None) + + def post_model(self, model, solver, solver_args): + """ + Post the model to the selected backend solver. + """ + if solver == "exact": # Exact2 takes its options at creation time + s = cp.SolverLookup.get(solver, model, **solver_args) + solver_args = dict() # no more solver args needed + else: + s = cp.SolverLookup.get(solver, model) + return s + + + """ + Internal workings + """ + + def solver_arguments( + self, + solver: str, + model: cp.Model, + seed: Optional[int] = None, + intermediate: bool = False, + cores: int = 1, + mem_limit: Optional[int] = None, + **kwargs + ): + opt = model.has_objective() + sat = not opt + + if solver == "ortools": + return self.ortools_arguments(model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) + elif solver == "exact": + return self.exact_arguments(seed=seed, **kwargs) + elif solver == "choco": + return self.choco_arguments() + elif solver == "z3": + return self.z3_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, **kwargs) + elif solver.startswith("minizinc"): # also can have a subsolver + return self.minizinc_arguments(solver, cores=cores, seed=seed, **kwargs) + elif solver == "gurobi": + return self.gurobi_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, intermediate=intermediate, opt=opt, **kwargs) + elif solver == "cpo": + return self.cpo_arguments(model=model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) + else: + self.print_comment(f"setting parameters of {solver} is not (yet) supported") + return dict() + + def run( + self, + instance:str, # path to the instance to run + open:Optional[callable] = None, # how to 'open' the instance file + seed: Optional[int] = None, # random seed + time_limit: Optional[int] = None, # time limit for this single instance + mem_limit: Optional[int] = None, # MiB: 1024 * 1024 bytes + cores: int = 1, + solver: str = None, # which backend solver to use + time_buffer: int = 0, + intermediate: bool = False, + verbose: bool = False, + **kwargs, + ): + + if not verbose: + warnings.filterwarnings("ignore") + + try: + + # --------------------------- Global Configuration --------------------------- # + + # Get the current process + p = psutil.Process() + + # pychoco currently does not support setting the mem_limit + if solver == "choco" and mem_limit is not None: + warnings.warn("'mem_limit' is currently not supported with choco, issues with GraalVM") + mem_limit = None + + # Set random seed (if provided) + if seed is not None: + random.seed(seed) + + # Set memory limit (if provided) + if mem_limit is not None: + self.set_memory_limit(mem_limit, verbose=verbose) + + # Set time limit (if provided) + if time_limit is not None: + self.set_time_limit(time_limit, verbose=verbose) # set remaining process time != wall time + + # ------------------------------ Parse instance ------------------------------ # + + time_parse = time.time() + model = self.read_instance(instance, open=open) + time_parse = time.time() - time_parse + if verbose: self.print_comment(f"took {time_parse:.4f} seconds to parse model [{instance}]") + + if time_limit and time_limit < _wall_time(p): + raise TimeoutError("Time's up after parse") + + # ------------------------ Post CPMpy model to solver ------------------------ # + + solver_args, internal_options = self.solver_arguments(solver, model=model, seed=seed, + intermediate=intermediate, + cores=cores, mem_limit=_mib_as_bytes(mem_limit) if mem_limit is not None else None, + **kwargs) + + # Post model to solver + time_post = time.time() + s = self.post_model(model, solver, solver_args) + time_post = time.time() - time_post + if verbose: self.print_comment(f"took {time_post:.4f} seconds to post model to {solver}") + + if time_limit and time_limit < _wall_time(p): + raise TimeoutError("Time's up after post") + + # ------------------------------- Solve model ------------------------------- # + + if time_limit: + # give solver only the remaining time + time_limit = time_limit - _wall_time(p) - time_buffer + # disable signal-based time limit and let the solver handle it (solvers don't play well with difference between cpu and wall time) + self.set_time_limit(None) + + if verbose: self.print_comment(f"{time_limit}s left to solve") + + time_solve = time.time() + try: + if internal_options is not None: + internal_options(s) # Set more internal solver options (need access to native solver object) + is_sat = s.solve(time_limit=time_limit, **solver_args) + except RuntimeError as e: + if "Program interrupted by user." in str(e): # Special handling for Exact + raise TimeoutError("Exact interrupted due to timeout") + else: + raise e + + time_solve = time.time() - time_solve + if verbose: self.print_comment(f"took {time_solve:.4f} seconds to solve") + + # ------------------------------- Print result ------------------------------- # + + self.print_result(s) + + # ------------------------------------- - ------------------------------------ # + + + except MemoryError as e: + self.handle_memory_error(mem_limit) + raise e + except NotImplementedError as e: + self.handle_not_implemented(e) + raise e + except Exception as e: + self.handle_exception(e) + raise e + + + \ No newline at end of file diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py new file mode 100644 index 000000000..08084b645 --- /dev/null +++ b/cpmpy/tools/benchmark/mse.py @@ -0,0 +1,205 @@ +""" +MSE competition as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on MaxSAT Evaluation (MSE) +competition instances encoded in WCNF (Weighted CNF) format. It extends the generic +`Benchmark` base class with MSE-specific logging and result reporting in DIMACS-like format. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on MSE datasets. + +Usage: + python mse.py --year 2024 --track exact-weighted --solver ortools + +Arguments: + --year Competition year (e.g., 2024). + --track Track type (e.g., exact-weighted, exact-unweighted). + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + MSEBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_mse +""" + +import argparse +from datetime import datetime +from enum import Enum +from pathlib import Path +import warnings + +from cpmpy.tools.benchmark.runner import benchmark_runner +from _base import Benchmark + +from cpmpy.tools.wcnf import read_wcnf +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +class ExitStatus(Enum): + unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found + unsat:str = "UNSATISFIABLE" # instance is unsatisfiable + unknown:str = "UNKNOWN" # any other case + +def solution_mse(model): + """ + Convert a CPMpy model solution into the MSE solution string format. + + Arguments: + model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution in MSE format. + + Returns: + str: MSE-formatted solution string. + """ + variables = [var for var in model.user_vars if var.name[:2] == "BV"] # dirty workaround for all missed aux vars in user vars + variables = sorted(variables, key=lambda v: int("".join(filter(str.isdigit, v.name)))) + return " ".join([str(1 if var.value() else 0) for var in variables]) + +class MSEBenchmark(Benchmark): + + """ + Benchmark runner for MSE (MaxSAT Evaluation) competition instances. + + This class extends `Benchmark` to implement MSE-specific solution printing + in DIMACS-like output format (`c`, `s`, `v`, `o` lines). It uses CPMpy's `read_wcnf` + to parse WCNF (Weighted CNF) instances and runs them on a selected solver supported + by CPMpy. + """ + + def __init__(self): + super().__init__(reader=read_wcnf) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:] + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_value(solution_mse(s)) + self.print_status(ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_value(solution_mse(s)) + self.print_status(ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(ExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(ExitStatus.unknown) + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + result['intermediate'] += [(sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on MSE instances') + parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2024)') + parser.add_argument('--track', type=str, required=True, help='Track type (e.g., exact-weighted, exact-unweighted)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + # parser.add_argument('--checker-path', type=str, default=None, + # help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.model.mse import MSEDataset + dataset = MSEDataset(year=args.year, track=args.track, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "mse" / f"mse_{args.year}_{args.track}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = MSEBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py new file mode 100644 index 000000000..3fc5202cd --- /dev/null +++ b/cpmpy/tools/benchmark/opb.py @@ -0,0 +1,197 @@ +""" +PB competition as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on PB +competition instances. It extends the generic `Benchmark` base class with +PB Competition-specific logging and result reporting. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on MSE datasets. + +Usage: + python opb.py --year 2024 --track OPT-LIN --solver ortools + +Arguments: + --year Competition year (e.g., 2024). + --track Track type (e.g., OPT_LIN, DEC_LIN). + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + OPBBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_opb +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark +from cpmpy.tools.opb import read_opb +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +class ExitStatus(Enum): + unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found + unsat:str = "UNSATISFIABLE" # instance is unsatisfiable + unknown:str = "UNKNOWN" # any other case + +def solution_opb(model): + """ + Formats a solution according to the PB24 specification. + + Arguments: + model: CPMpy model for which to format its solution (should be solved first) + + Returns: + Formatted model solution according to PB24 specification. + """ + variables = [var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]] # dirty workaround for all missed aux vars in user vars TODO + return " ".join([var.name.replace("[","").replace("]","") if var.value() else "-"+var.name.replace("[","").replace("]","") for var in variables]) + +class OPBBenchmark(Benchmark): + """ + The PB competition as a CPMpy benchmark. + """ + + def __init__(self): + super().__init__(reader=read_opb) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:] + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_result() + self.print_value(solution_opb(s)) + self.print_status(ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_value(solution_opb(s)) + self.print_status(ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(ExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(ExitStatus.unknown) + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + result['intermediate'] += [(sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on OPB instances') + parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2023)') + parser.add_argument('--track', type=str, required=True, help='Track type (e.g., OPT-LIN, DEC-LIN)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.model.opb import OPBDataset + dataset = OPBDataset(year=args.year, track=args.track, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "opb" / f"opb_{args.year}_{args.track}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = OPBBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py new file mode 100644 index 000000000..325ac54cd --- /dev/null +++ b/cpmpy/tools/benchmark/runner.py @@ -0,0 +1,287 @@ +""" +Benchmark Runner for CPMpy Instances + +This module provides tools to execute benchmark instances in parallel while +safely capturing solver output, enforcing time and memory limits, and +writing structured results to a CSV file. The included functions should not +be used directly, but rather through one of the available benchmarks. + +Key Features +------------ +- Supports running multiple instances in parallel using threads. +- Executes each instance in a separate subprocess for isolation. +- Forwards stdout to both console and parent process, preserving output. +- Handles timeouts and SIGTERM/SIGKILL signals gracefully. +- Writes results to a CSV file. +- Optional reporting of intermediate solutions and solution checking. +""" + +import csv +from io import StringIO +import os +import signal +import time +import sys +import warnings +import traceback +import multiprocessing +from tqdm import tqdm +from typing import Optional, Tuple +from filelock import FileLock +from concurrent.futures import ThreadPoolExecutor + +from cpmpy.tools.xcsp3.xcsp3_cpmpy import xcsp3_cpmpy, init_signal_handlers, ExitStatus + +class Tee: + """ + A stream-like object that duplicates writes to multiple underlying streams. + """ + def __init__(self, *streams): + """ + Arguments: + *streams: Any number of file-like objects that implement a write() method, + such as sys.stdout, sys.stderr, or StringIO. + """ + self.streams = streams + + def write(self, data): + """ + Write data to all underlying streams. + + Args: + data (str): The string to write. + """ + for s in self.streams: + s.write(data) + + def flush(self): + """ + Flush all underlying streams to ensure all data is written out. + """ + for s in self.streams: + s.flush() + +class PipeWriter: + """ + Stdout wrapper for a multiprocessing pipe. + """ + def __init__(self, conn): + self.conn = conn + def write(self, data): + if data: # avoid empty writes + try: + self.conn.send(data) + except: + pass + def flush(self): + pass # no buffering + + +def wrapper(instance_runner, conn, kwargs, verbose): + """ + Wraps a call to a benchmark as to correctly + forward stdout to the multiprocessing pipe (conn). + Also sends a last status report though the pipe. + + Status report can be missing when process has been terminated by a SIGTERM. + """ + + original_stdout = sys.stdout + pipe_writer = PipeWriter(conn) + + if not verbose: + warnings.filterwarnings("ignore") + sys.stdout = pipe_writer # only forward to pipe + else: + sys.stdout = Tee(original_stdout, pipe_writer) # forward to pipe and console + + try: + init_signal_handlers() # configure OS signal handlers + instance_runner.run(**kwargs) + conn.send({"status": "ok"}) + except Exception as e: # capture exceptions and report in state + tb_str = traceback.format_exc() + conn.send({"status": "error", "exception": e, "traceback": tb_str}) + finally: + sys.stdout = original_stdout + conn.close() + +# exec_args = (instance_runner, filename, metadata, open, solver, time_limit, mem_limit, output_file, verbose) +def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, int, str, bool, bool, str]) -> None: + """ + Solve a single benchmark instance and write results to file immediately. + + Args is a list of: + filename: Path to the instance file + metadata: Dictionary containing instance metadata (year, track, name) + solver: Name of the solver to use + time_limit: Time limit in seconds + mem_limit: Memory limit in MB + output_file: Path to the output CSV file + verbose: Whether to show solver output + """ + + instance_runner, filename, metadata, open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path = args + + # Fieldnames for the CSV file + fieldnames = ['instance'] + list(metadata.keys()) + \ + ['solver', + 'time_total', 'time_parse', 'time_model', 'time_post', 'time_solve', + 'status', 'objective_value', 'solution', 'intermediate', 'checker_result'] + result = dict.fromkeys(fieldnames) # init all fields to None + for k in metadata.keys(): + result[k] = metadata[k] + result['solver'] = solver + + # Decompress before timers start + with open(filename) as f: # <- dataset-specific 'open' callable + filename = StringIO(f.read()) # read to memory-mapped file + + # Start total timing + total_start = time.time() + + # Call xcsp3 in separate process + ctx = multiprocessing.get_context("spawn") + parent_conn, child_conn = multiprocessing.Pipe() # communication pipe between processes + process = ctx.Process(target=wrapper, args=( + instance_runner, + child_conn, + { + "instance": filename, + "solver": solver, + "time_limit": time_limit, + "mem_limit": mem_limit, + "intermediate": intermediate, + "force_mem_limit": True, + "time_buffer": 1, + "cores": cores, + }, + verbose)) + process.start() + process.join(timeout=time_limit) + + # Replicate competition convention on how jobs get terminated + if process.is_alive(): + # Send sigterm to let process know it reached its time limit + os.kill(process.pid, signal.SIGTERM) + # 1 second grace period + process.join(timeout=1) + # Kill if still alive + if process.is_alive(): + os.kill(process.pid, signal.SIGKILL) + process.join() + + result['time_total'] = time.time() - total_start + + # Default status if nothing returned by subprocess + # -> process exited prematurely due to sigterm + status = {"status": "error", "exception": "sigterm"} + + # Parse the output to get status, solution and timings + while parent_conn.poll(timeout=1): + line = parent_conn.recv() + + # Received a print statement from the subprocess + if isinstance(line, str): + instance_runner.parse_output_line(line, result) + + # Received a new status from the subprocess + elif isinstance(line, dict): + status = line + + else: + raise() + + # Parse the exit status + if status["status"] == "error": + # Ignore timeouts + if "TimeoutError" in repr(status["exception"]): + pass + # All other exceptions, put in solution field + elif result['solution'] is None: + result['status'] = ExitStatus.unknown.value + result["solution"] = status["exception"] + + # if checker_path is not None and complete_solution is not None: TODO: generalise 'checkers' for benchmarks + # checker_output, checker_time = run_solution_checker( + # JAR=checker_path, + # instance_location=file_path, + # out_file="'" + complete_solution.replace("\n\r", " ").replace("\n", " ").replace("v ", "").replace("v ", "")+ "'", + # verbose=verbose, + # cpm_time=result.get('time_solve', 0) # or total solve time you have + # ) + + # if checker_output is not None: + # result['checker_result'] = checker_output + # else: + # result['checker_result'] = None + + # Use a lock file to prevent concurrent writes + lock_file = f"{output_file}.lock" + lock = FileLock(lock_file) + try: + with lock: + # Pre-check if file exists to determine if we need to write header + write_header = not os.path.exists(output_file) + + with open(output_file, 'a', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + if write_header: + writer.writeheader() + writer.writerow(result) + finally: + # Optional: cleanup if the lock file somehow persists + if os.path.exists(lock_file): + try: + os.remove(lock_file) + except Exception: + pass # avoid crashing on cleanup + + + +def benchmark_runner( + dataset, instance_runner, + output_file: str, + solver: str, workers: int = 1, + time_limit: int = 300, mem_limit: Optional[int] = 4096, cores: int=1, + verbose: bool = False, intermediate: bool = False, + checker_path: Optional[str] = None, + **kwargs + ) -> str: + """ + Run a benchmark over all instances in a dataset using multiple threads. + + Arguments: + dataset (_Dataset): Dataset object containing instances to benchmark. + instance_runner (Benchmark): Benchmark runner that implements the run() method. + output_file (str): Path to the CSV file where results will be stored. + solver (str): Name of the solver to use. + workers (int): Number of parallel processes to run instances (default=1). + time_limit (int): Time limit in seconds for each instance (default=300). + mem_limit (int, optional): Memory limit in MB per instance (default=4096). + cores (int): Number of CPU cores assigned per instance (default=1). + verbose (bool): Whether to show solver output in stdout (default=False). + intermediate (bool): Whether to report intermediate solutions if supported (default=False). + checker_path (str, optional): Path to a solution checker for validating instance solutions. + **kwargs: Additional arguments passed to `execute_instance`. + + Returns: + str: Path to the CSV file where benchmark results were written. + """ + + # Process instances in parallel + with ThreadPoolExecutor(max_workers=workers) as executor: + # Submit all tasks and track their futures + futures = [executor.submit(execute_instance, # below: args + (instance_runner, filename, metadata, dataset.open(), solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path)) + for filename, metadata in dataset] + # Process results as they complete + for i,future in enumerate(tqdm(futures, total=len(futures), desc=f"Running {solver}")): + try: + _ = future.result(timeout=time_limit+60) # for cleanliness sake, result is empty + except TimeoutError: + pass + except Exception as e: + print(f"Job {i}: {dataset[i][1]['name']}, ProcessPoolExecutor caught: {e}") + + return output_file From 83454e00dae27aa38acffe8ee00cb60c8808d6ac Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 14:00:47 +0200 Subject: [PATCH 10/46] Formatting --- cpmpy/tools/benchmark/mse.py | 10 +++++----- cpmpy/tools/dataset/model/mse.py | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py index 08084b645..3654c2bc8 100644 --- a/cpmpy/tools/benchmark/mse.py +++ b/cpmpy/tools/benchmark/mse.py @@ -43,15 +43,15 @@ solution_mse """ +import warnings import argparse -from datetime import datetime from enum import Enum from pathlib import Path -import warnings +from datetime import datetime +# CPMpy from cpmpy.tools.benchmark.runner import benchmark_runner -from _base import Benchmark - +from cpmpy.tools.benchmark._base import Benchmark from cpmpy.tools.wcnf import read_wcnf from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus @@ -80,7 +80,7 @@ def solution_mse(model): class MSEBenchmark(Benchmark): """ - Benchmark runner for MSE (MaxSAT Evaluation) competition instances. + MSE (MaxSAT Evaluation) competition as a CPMpy benchmark. This class extends `Benchmark` to implement MSE-specific solution printing in DIMACS-like output format (`c`, `s`, `v`, `o` lines). It uses CPMpy's `read_wcnf` diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py index 84e8c5dfa..711a560bb 100644 --- a/cpmpy/tools/dataset/model/mse.py +++ b/cpmpy/tools/dataset/model/mse.py @@ -4,8 +4,9 @@ https://maxsat-evaluations.github.io/ """ -import lzma + import os +import lzma import zipfile import pathlib from urllib.request import urlretrieve From 7f2d363282588ac9c298e80df5c14bb8760daf80 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 14:06:44 +0200 Subject: [PATCH 11/46] XCSP3 as dataset and benchmark --- cpmpy/tools/benchmark/xcsp3.py | 248 +++++++++++++++++++++++++++++ cpmpy/tools/dataset/model/xcsp3.py | 132 +++++++++++++++ cpmpy/tools/xcsp3/__init__.py | 137 ++-------------- cpmpy/tools/xcsp3/parser.py | 146 +++++++++++++++++ 4 files changed, 543 insertions(+), 120 deletions(-) create mode 100644 cpmpy/tools/benchmark/xcsp3.py create mode 100644 cpmpy/tools/dataset/model/xcsp3.py create mode 100644 cpmpy/tools/xcsp3/parser.py diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py new file mode 100644 index 000000000..e52e41a4a --- /dev/null +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -0,0 +1,248 @@ +""" +XCSP3 competition as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on XCSP3 +competition instances. It extends the generic `Benchmark` base class with +XCSP3-specific logging and result reporting. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on XCSP3 datasets. + +Usage: + python xcsp3.py --year 2024 --track CSP --solver ortools + +Arguments: + --year Competition year (e.g., 2024). + --track Track type (e.g., CSP, COP). + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + XCSP3Benchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_xcsp3 +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark +from cpmpy.tools.xcsp3 import read_xcsp3 +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + +# PyCSP3 +from xml.etree.ElementTree import ParseError +import xml.etree.cElementTree as ET + + +class ExitStatus(Enum): + unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found + unsat:str = "UNSATISFIABLE" # instance is unsatisfiable + unknown:str = "UNKNOWN" # any other case + +def solution_xcsp3(model, useless_style="*", boolean_style="int"): + """ + Formats a solution according to the XCSP3 specification. + + Arguments: + model: CPMpy model for which to format its solution (should be solved first) + useless_style: How to process unused decision variables (with value `None`). + If "*", variable is included in reporting with value "*". + If "drop", variable is excluded from reporting. + boolean_style: Print style for boolean constants. + "int" results in 0/1, "bool" results in False/True. + + Returns: + XML-formatted model solution according to XCSP3 specification. + """ + + # CSP + if not model.has_objective(): + root = ET.Element("instantiation", type="solution") + # COP + else: + root = ET.Element("instantiation", type="optimum", cost=str(int(model.objective_value()))) + + # How useless variables should be handled + # (variables which have value `None` in the solution) + variables = {var.name: var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars + if useless_style == "*": + variables = {k:(v.value() if v.value() is not None else "*") for k,v in variables.items()} + elif useless_style == "drop": + variables = {k:v.value() for k,v in variables.items() if v.value() is not None} + + # Convert booleans + if boolean_style == "bool": + pass + elif boolean_style == "int": + variables = {k:(v if (not isinstance(v, bool)) else (1 if v else 0)) for k,v in variables.items()} + + # Build XCSP3 XML tree + ET.SubElement(root, "list").text=" " + " ".join([str(v) for v in variables.keys()]) + " " + ET.SubElement(root, "values").text=" " + " ".join([str(v) for v in variables.values()]) + " " + tree = ET.ElementTree(root) + ET.indent(tree, space=" ", level=0) + res = ET.tostring(root).decode("utf-8") + + return str(res) + + +class XCSP3Benchmark(Benchmark): + """ + The XCSP3 competition as a CPMpy benchmark. + """ + + def __init__(self): + super().__init__(reader=read_xcsp3) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:] + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_result() + self.print_value(solution_xcsp3(s)) + self.print_status(ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_value(solution_xcsp3(s)) + self.print_status(ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(ExitStatus.unsupported) + + def handle_exception(self, e): + if isinstance(e, ParseError): + if "out of memory" in e.msg: + self.print_comment(f"MemoryError raised by parser.") + self.print_status(ExitStatus.unknown) + else: + self.print_comment(f"An {type(e)} got raised by the parser: {e}") + self.print_status(ExitStatus.unknown) + else: + super().handle_exception(e) + self.print_status(ExitStatus.unknown) + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v ') and result['solution'] is None: + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + result['solution'] = str(solution) + complete_solution = line + if "cost" in solution: + result['objective_value'] = solution.split('cost="')[-1][:-2] + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + result['intermediate'] += [(sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on XCSP3 instances') + parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2023)') + parser.add_argument('--track', type=str, required=True, help='Track type (e.g., COP, CSP, MiniCOP)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + parser.add_argument('--checker-path', type=str, default=None, + help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.model.xcsp3 import XCSP3Dataset + dataset = XCSP3Dataset(year=args.year, track=args.track, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "xcsp3" / f"xcsp3_{args.year}_{args.track}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = XCSP3Benchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") + + diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py new file mode 100644 index 000000000..e71df1d04 --- /dev/null +++ b/cpmpy/tools/dataset/model/xcsp3.py @@ -0,0 +1,132 @@ +""" +XCS3 Dataset + +https://xcsp.org/instances/ +""" + +from functools import partial +import os +import lzma +import zipfile +import pathlib +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError + +from cpmpy.tools.dataset._base import _Dataset + + +class XCSP3Dataset(_Dataset): + """ + XCSP3 benchmark dataset. + + Provides access to benchmark instances from the XCSP3 + competitions. Instances are grouped by `year` and `track` (e.g., + `"CSP"`, `"eCOP"`) and stored as `.xml.lzma` files. + If the dataset is not available locally, it can be automatically + downloaded and extracted. + + More information on the competition can be found here: https://xcsp.org/competitions/ + """ + + def __init__( + self, + root: str = ".", + year: int = 2023, track: str = "CSP", + transform=None, target_transform=None, + download: bool = False + ): + """ + Constructor for a dataset object of the XCP3 competition. + + Arguments: + root (str): Root directory where datasets are stored or will be downloaded to (default="."). + year (int): Competition year of the dataset to use (default=2024). + track (str): Track name specifying which subset of the competition instances to load (default="CSP"). + transform (callable, optional): Optional transform applied to the instance file path. + target_transform (callable, optional): Optional transform applied to the metadata dictionary. + download (bool): If True, downloads the dataset if it does not exist locally (default=False). + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested year/track combination is not available. + """ + + self.root = pathlib.Path(root) + self.year = year + self.track = track + + dataset_dir = self.root / str(year) / track + + super().__init__( + dataset_dir=dataset_dir, + transform=transform, target_transform=target_transform, + download=download, extension=".xml.lzma" + ) + + + def category(self): + return { + "year": self.year, + "track": self.track + } + + def download(self): + print(f"Downloading XCSP3 {self.year} instances...") + + url = f"https://www.cril.univ-artois.fr/~lecoutre/compets/" + year_suffix = str(self.year)[2:] # Drop the starting '20' + url_path = url + f"instancesXCSP{year_suffix}.zip" + zip_path = self.root / f"instancesXCSP{year_suffix}.zip" + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for year {self.year}. Error: {str(e)}") + + # Extract only the specific track folder from the zip + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Get the main folder name (e.g., "024_V3") + main_folder = None + for name in zip_ref.namelist(): + if '/' in name: + main_folder = name.split('/')[0] + break + + if main_folder is None: + raise ValueError(f"Could not find main folder in zip file") + + # Extract only files from the specified track + # Get all unique track names from zip + tracks = set() + for file_info in zip_ref.infolist(): + parts = file_info.filename.split('/') + if len(parts) > 2 and parts[0] == main_folder: + tracks.add(parts[1]) + + # Check if requested track exists + if self.track not in tracks: + raise ValueError(f"Track '{self.track}' not found in dataset. Available tracks: {sorted(tracks)}") + + # Create track folder in root directory, parents=True ensures recursive creation + self.dataset_dir.mkdir(parents=True, exist_ok=True) + + # Extract files for the specified track + prefix = f"{main_folder}/{self.track}/" + for file_info in zip_ref.infolist(): + if file_info.filename.startswith(prefix): + # Extract file to track_dir, removing main_folder/track prefix + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.dataset_dir / filename, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + def open(self, instance: os.PathLike) -> callable: + return partial(lzma.open, mode='rt', encoding='utf-8') if str(instance).endswith(".lzma") else open + + +if __name__ == "__main__": + dataset = XCSP3Dataset(year=2024, track="MiniCOP", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) diff --git a/cpmpy/tools/xcsp3/__init__.py b/cpmpy/tools/xcsp3/__init__.py index d5abf2766..9572943d8 100644 --- a/cpmpy/tools/xcsp3/__init__.py +++ b/cpmpy/tools/xcsp3/__init__.py @@ -4,127 +4,24 @@ ## __init__.py ## """ - Set of utilities for working with XCSP3-formatted CP models. - - - ================= - List of functions - ================= - - .. autosummary:: - :nosignatures: - - read_xcsp3 - - ======================== - List of helper functions - ======================== - - .. autosummary:: - :nosignatures: - - _parse_xcsp3 - _load_xcsp3 - - ================== - List of submodules - ================== - - .. autosummary:: - :nosignatures: - - parser_callbacks - analyze - benchmark - xcsp3_cpmpy - dataset - globals +Set of utilities for working with XCSP3-formatted CP models. + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser + parser_callbacks + analyze + benchmark + xcsp3_cpmpy + dataset + globals """ -from io import StringIO -import lzma -import os -import cpmpy as cp - -# Special case for optional cpmpy dependencies -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from pycsp3.parser.xparser import CallbackerXCSP3, ParserXCSP3 from .dataset import XCSP3Dataset # for easier importing - -def _parse_xcsp3(path: os.PathLike) -> "ParserXCSP3": - """ - Parses an XCSP3 instance file (.xml) and returns a `ParserXCSP3` instance. - - Arguments: - path: location of the XCSP3 instance to read (expects a .xml file). - - Returns: - A parser object. - """ - try: - from pycsp3.parser.xparser import ParserXCSP3 - except ImportError as e: - raise ImportError("The 'pycsp3' package is required to parse XCSP3 files. " - "Please install it with `pip install pycsp3`.") from e - - parser = ParserXCSP3(path) - return parser - -def _load_xcsp3(parser: "ParserXCSP3") -> cp.Model: - """ - Takes in a `ParserXCSP3` instance and loads its captured model as a CPMpy model. - - Arguments: - parser (ParserXCSP3): A parser object to load from. - - Returns: - The XCSP3 instance loaded as a CPMpy model. - """ - from .parser_callbacks import CallbacksCPMPy - from pycsp3.parser.xparser import CallbackerXCSP3 - callbacks = CallbacksCPMPy() - callbacks.force_exit = True - callbacker = CallbackerXCSP3(parser, callbacks) - callbacker.load_instance() - model = callbacks.cpm_model - - return model - - -def read_xcsp3(path: os.PathLike) -> cp.Model: - """ - Reads in an XCSP3 instance (.xml or .xml.lzma) and returns its matching CPMpy model. - - Arguments: - path: location of the XCSP3 instance to read (expects a .xml or .xml.lzma file). - - Returns: - The XCSP3 instance loaded as a CPMpy model. - """ - # Decompress on the fly if still in .lzma format - if str(path).endswith(".lzma"): - path = decompress_lzma(path) - - # Parse and create CPMpy model - parser = _parse_xcsp3(path) - model = _load_xcsp3(parser) - return model - -def decompress_lzma(path: os.PathLike) -> StringIO: - """ - Decompresses a .lzma file. - - Arguments: - path: Location of .lzma file - - Returns: - Memory-mapped decompressed file - """ - # Decompress the XZ file - with lzma.open(path, 'rt', encoding='utf-8') as f: - return StringIO(f.read()) # read to memory-mapped file - - - \ No newline at end of file +from .parser import read_xcsp3 \ No newline at end of file diff --git a/cpmpy/tools/xcsp3/parser.py b/cpmpy/tools/xcsp3/parser.py new file mode 100644 index 000000000..761ef7caa --- /dev/null +++ b/cpmpy/tools/xcsp3/parser.py @@ -0,0 +1,146 @@ +""" +Parser for the XCSP3 format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_xcsp3 + +======================== +List of helper functions +======================== + +.. autosummary:: + :nosignatures: + + _parse_xcsp3 + _load_xcsp3 +""" + +import os +import sys +import argparse +from io import StringIO + +import cpmpy as cp + +# Special case for optional cpmpy dependencies +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pycsp3.parser.xparser import ParserXCSP3 + +def _parse_xcsp3(path: os.PathLike) -> "ParserXCSP3": + """ + Parses an XCSP3 instance file (.xml) and returns a `ParserXCSP3` instance. + + Arguments: + path: location of the XCSP3 instance to read (expects a .xml file). + + Returns: + A parser object. + """ + try: + from pycsp3.parser.xparser import ParserXCSP3 + except ImportError as e: + raise ImportError("The 'pycsp3' package is required to parse XCSP3 files. " + "Please install it with `pip install pycsp3`.") from e + + parser = ParserXCSP3(path) + return parser + +def _load_xcsp3(parser: "ParserXCSP3") -> cp.Model: + """ + Takes in a `ParserXCSP3` instance and loads its captured model as a CPMpy model. + + Arguments: + parser (ParserXCSP3): A parser object to load from. + + Returns: + The XCSP3 instance loaded as a CPMpy model. + """ + from .parser_callbacks import CallbacksCPMPy + from pycsp3.parser.xparser import CallbackerXCSP3 + callbacks = CallbacksCPMPy() + callbacks.force_exit = True + callbacker = CallbackerXCSP3(parser, callbacks) + callbacker.load_instance() + model = callbacks.cpm_model + + return model + +_std_open = open +def read_xcsp3(xcsp3: os.PathLike, open=open) -> cp.Model: + """ + Reads in an XCSP3 instance (.xml or .xml.lzma) and returns its matching CPMpy model. + + Arguments: + xcsp3 (str or os.PathLike): + - A file path to an WCNF file (optionally LZMA-compressed with `.lzma`) + - OR a string containing the WCNF content directly + open: (callable): + If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + The XCSP3 instance loaded as a CPMpy model. + """ + # If wcnf is a path to a file -> open file + if isinstance(xcsp3, (str, os.PathLike)) and os.path.exists(xcsp3): + if open is not None: + f = open(xcsp3) + else: + f = _std_open(xcsp3, "rt") + # If wcnf is a string containing a model -> create a memory-mapped file + else: + f = StringIO(xcsp3) + + # Parse and create CPMpy model + parser = _parse_xcsp3(f) + model = _load_xcsp3(parser) + return model + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a WCNF model using CPMpy") + parser.add_argument("model", help="Path to a WCNF file (or raw WCNF string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw WCNF string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_xcsp3(args.model) + else: + model = read_xcsp3(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() + \ No newline at end of file From 9173c9faebbb4077368f6ba73c3990804c055fe0 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 14:07:22 +0200 Subject: [PATCH 12/46] Parsers with changeable 'open' --- cpmpy/tools/opb/parser.py | 11 ++++++++--- cpmpy/tools/wcnf/parser.py | 17 ++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/cpmpy/tools/opb/parser.py b/cpmpy/tools/opb/parser.py index 846c0874b..e300a2752 100644 --- a/cpmpy/tools/opb/parser.py +++ b/cpmpy/tools/opb/parser.py @@ -105,7 +105,8 @@ def _parse_constraint(line, vars): right=rhs ) -def read_opb(opb: Union[str, os.PathLike]) -> cp.Model: +_std_open = open +def read_opb(opb: Union[str, os.PathLike], open=open) -> cp.Model: """ Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model. @@ -121,6 +122,8 @@ def read_opb(opb: Union[str, os.PathLike]) -> cp.Model: opb (str or os.PathLike): - A file path to an OPB file (optionally LZMA-compressed with `.xz`) - OR a string containing the OPB content directly + open: (callable): + If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open'). Returns: cp.Model: The CPMpy model of the OPB instance. @@ -143,8 +146,10 @@ def read_opb(opb: Union[str, os.PathLike]) -> cp.Model: # If opb is a path to a file -> open file if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb): - f_open = lzma.open if str(opb).endswith(".xz") else open - f = f_open(opb, 'rt') + if open is not None: + f = open(opb) + else: + f = _std_open(opb, "rt") # If opb is a string containing a model -> create a memory-mapped file else: f = StringIO(opb) diff --git a/cpmpy/tools/wcnf/parser.py b/cpmpy/tools/wcnf/parser.py index 72cec94c8..84b484979 100644 --- a/cpmpy/tools/wcnf/parser.py +++ b/cpmpy/tools/wcnf/parser.py @@ -1,8 +1,3 @@ -#!/usr/bin/env python -#-*- coding:utf-8 -*- -## -## __init__.py -## """ Parser for the WCNF format. @@ -39,8 +34,8 @@ def _get_var(i, vars_dict): vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables return vars_dict[i] - -def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model: +_std_open = open +def read_wcnf(wcnf: Union[str, os.PathLike], open=open) -> cp.Model: """ Parser for WCNF format. Reads in an instance and returns its matching CPMpy model. @@ -48,14 +43,18 @@ def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model: wcnf (str or os.PathLike): - A file path to an WCNF file (optionally LZMA-compressed with `.xz`) - OR a string containing the WCNF content directly + open: (callable): + If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open'). Returns: cp.Model: The CPMpy model of the WCNF instance. """ # If wcnf is a path to a file -> open file if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf): - f_open = lzma.open if str(wcnf).endswith(".xz") else open - f = f_open(wcnf, "rt") + if open is not None: + f = open(wcnf) + else: + f = _std_open(wcnf, "rt") # If wcnf is a string containing a model -> create a memory-mapped file else: f = StringIO(wcnf) From 52b95de5f6e556e0d853b4c5f85c08a13f93087b Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 14:13:58 +0200 Subject: [PATCH 13/46] Type-hints and docstrings --- cpmpy/tools/dataset/_base.py | 20 +++++++++++++++++--- cpmpy/tools/dataset/model/mse.py | 2 +- cpmpy/tools/dataset/model/opb.py | 6 ++++-- cpmpy/tools/dataset/model/xcsp3.py | 2 +- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py index ce2206110..aa22ae930 100644 --- a/cpmpy/tools/dataset/_base.py +++ b/cpmpy/tools/dataset/_base.py @@ -38,18 +38,32 @@ def __init__( self.download() @abstractmethod - def category(self): + def category(self) -> dict: + """ + Labels to distinguish instances into categories matching to those of the dataset. + E.g. + - year + - track + """ pass @abstractmethod def download(self, *args, **kwargs): + """ + How the dataset should be downloaded. + """ pass @abstractmethod - def open(self, instance): + def open(self, instance) -> callable: + """ + How an instance file from the dataset should be opened. + Especially usefull when files come compressed and won't work with + python standard library's 'open', e.g. '.xz', '.lzma'. + """ pass - def metadata(self, file): + def metadata(self, file) -> dict: metadata = self.category() | { 'name': pathlib.Path(file).stem.replace(self.extension, ''), 'path': file, diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py index 711a560bb..8f395d677 100644 --- a/cpmpy/tools/dataset/model/mse.py +++ b/cpmpy/tools/dataset/model/mse.py @@ -64,7 +64,7 @@ def __init__( ) - def category(self): + def category(self) -> dict: return { "year": self.year, "track": self.track diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py index bc051d784..40e6a282d 100644 --- a/cpmpy/tools/dataset/model/opb.py +++ b/cpmpy/tools/dataset/model/opb.py @@ -63,19 +63,21 @@ def __init__( download=download, extension=".opb.xz" ) - def category(self): + def category(self) -> dict: return { "year": self.year, "track": self.track } - def metadata(self, file): + def metadata(self, file) -> dict: + # Add the author to the metadata return super().metadata(file) | {'author': str(file).split(os.sep)[-1].split("_")[0],} def download(self): # TODO: add option to filter on competition instances print(f"Downloading OPB {self.year} {self.track} instances...") + url = f"https://www.cril.univ-artois.fr/PB24/benchs/" year_suffix = str(self.year)[2:] # Drop the starting '20' url_path = url + f"normalized-PB{year_suffix}.tar" diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py index e71df1d04..597a2af55 100644 --- a/cpmpy/tools/dataset/model/xcsp3.py +++ b/cpmpy/tools/dataset/model/xcsp3.py @@ -65,7 +65,7 @@ def __init__( ) - def category(self): + def category(self) -> dict: return { "year": self.year, "track": self.track From bf5ecd2c65b537c10ad67a368da0b6631b7c2a58 Mon Sep 17 00:00:00 2001 From: Thomas Sergeys Date: Fri, 12 Sep 2025 14:18:10 +0200 Subject: [PATCH 14/46] Add TODOs --- cpmpy/tools/benchmark/mse.py | 2 +- cpmpy/tools/benchmark/opb.py | 2 +- cpmpy/tools/benchmark/xcsp3.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py index 3654c2bc8..a11b1f5cb 100644 --- a/cpmpy/tools/benchmark/mse.py +++ b/cpmpy/tools/benchmark/mse.py @@ -73,7 +73,7 @@ def solution_mse(model): Returns: str: MSE-formatted solution string. """ - variables = [var for var in model.user_vars if var.name[:2] == "BV"] # dirty workaround for all missed aux vars in user vars + variables = [var for var in model.user_vars if var.name[:2] == "BV"] # dirty workaround for all missed aux vars in user vars TODO fix with Ignace variables = sorted(variables, key=lambda v: int("".join(filter(str.isdigit, v.name)))) return " ".join([str(1 if var.value() else 0) for var in variables]) diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py index 3fc5202cd..9d669a075 100644 --- a/cpmpy/tools/benchmark/opb.py +++ b/cpmpy/tools/benchmark/opb.py @@ -73,7 +73,7 @@ def solution_opb(model): Returns: Formatted model solution according to PB24 specification. """ - variables = [var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]] # dirty workaround for all missed aux vars in user vars TODO + variables = [var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]] # dirty workaround for all missed aux vars in user vars TODO fix with Ignace return " ".join([var.name.replace("[","").replace("]","") if var.value() else "-"+var.name.replace("[","").replace("]","") for var in variables]) class OPBBenchmark(Benchmark): diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py index e52e41a4a..9601a4530 100644 --- a/cpmpy/tools/benchmark/xcsp3.py +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -92,7 +92,7 @@ def solution_xcsp3(model, useless_style="*", boolean_style="int"): # How useless variables should be handled # (variables which have value `None` in the solution) - variables = {var.name: var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars + variables = {var.name: var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace if useless_style == "*": variables = {k:(v.value() if v.value() is not None else "*") for k,v in variables.items()} elif useless_style == "drop": From 5dc388647c8c7fde2a3ec270f01c5506eabf02f7 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 15:07:56 +0200 Subject: [PATCH 15/46] Mising helper functions --- cpmpy/tools/benchmark/__init__.py | 59 +++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py index e69de29bb..b5a26f62a 100644 --- a/cpmpy/tools/benchmark/__init__.py +++ b/cpmpy/tools/benchmark/__init__.py @@ -0,0 +1,59 @@ + +import resource +import sys +import time +import warnings +import psutil + + +TIME_BUFFER = 5 # seconds +# TODO : see if good value +MEMORY_BUFFER_SOFT = 2 # MiB +MEMORY_BUFFER_HARD = 0 # MiB +MEMORY_BUFFER_SOLVER = 20 # MB + + +def set_memory_limit(mem_limit): + """ + Set memory limit (Virtual Memory Size). + """ + if mem_limit is not None: + soft = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_SOFT), _mib_as_bytes(MEMORY_BUFFER_SOFT)) + hard = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_HARD), _mib_as_bytes(MEMORY_BUFFER_HARD)) + if sys.platform != "win32": + resource.setrlimit(resource.RLIMIT_AS, (soft, hard)) # limit memory in number of bytes + else: + warnings.warn("Memory limits using `resource` are not supported on Windows. Skipping hard limit.") + +def set_time_limit(time_limit, verbose:bool=False): + """ + Set time limit (CPU time in seconds). + """ + if time_limit is not None: + if sys.platform != "win32": + soft = time_limit + hard = resource.RLIM_INFINITY + resource.setrlimit(resource.RLIMIT_CPU, (soft, hard)) + else: + warnings.warn("CPU time limits using `resource` are not supported on Windows. Skipping hard limit.") + +def _wall_time(p: psutil.Process): + return time.time() - p.create_time() + +def _mib_as_bytes(mib: int) -> int: + return mib * 1024 * 1024 + +def _mb_as_bytes(mb: int) -> int: + return mb * 1000 * 1000 + +def _bytes_as_mb(bytes: int) -> int: + return bytes // (1000 * 1000) + +def _bytes_as_gb(bytes: int) -> int: + return bytes // (1000 * 1000 * 1000) + +def _bytes_as_mb_float(bytes: int) -> float: + return bytes / (1000 * 1000) + +def _bytes_as_gb_float(bytes: int) -> float: + return bytes / (1000 * 1000 * 1000) \ No newline at end of file From 7209c620d6e22660ea98d5074143ef40bb06e16d Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 15:10:58 +0200 Subject: [PATCH 16/46] Print stacktrace of process --- cpmpy/tools/benchmark/runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py index 325ac54cd..a83740459 100644 --- a/cpmpy/tools/benchmark/runner.py +++ b/cpmpy/tools/benchmark/runner.py @@ -273,15 +273,16 @@ def benchmark_runner( with ThreadPoolExecutor(max_workers=workers) as executor: # Submit all tasks and track their futures futures = [executor.submit(execute_instance, # below: args - (instance_runner, filename, metadata, dataset.open(), solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path)) + (instance_runner, filename, metadata, dataset.open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path)) for filename, metadata in dataset] # Process results as they complete - for i,future in enumerate(tqdm(futures, total=len(futures), desc=f"Running {solver}")): + for i, future in enumerate(tqdm(futures, total=len(futures), desc=f"Running {solver}")): try: - _ = future.result(timeout=time_limit+60) # for cleanliness sake, result is empty + _ = future.result(timeout=time_limit + 60) # for cleanliness sake, result is empty except TimeoutError: pass except Exception as e: print(f"Job {i}: {dataset[i][1]['name']}, ProcessPoolExecutor caught: {e}") + if verbose: traceback.print_exc() return output_file From f66c8c554555d178244dacd50ed487d9f3c22401 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 15:21:22 +0200 Subject: [PATCH 17/46] Fix arguments --- cpmpy/tools/benchmark/_base.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index 85119a822..c6b4353d9 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -56,14 +56,14 @@ def __init__(self, reader:callable): """ self.reader = reader - def read_instance(self, instance) -> cp.Model: + def read_instance(self, instance, open) -> cp.Model: """ Parse a model instance to a CPMpy model. Arguments: instance (str or os.PathLike): The model instance to parse into a CPMpy model. """ - return self.reader(instance) + return self.reader(instance, open=open) """ Callback methods which can be overwritten to make a custom benchmark run. @@ -326,13 +326,13 @@ def solution_count(self): Methods which can, bit most likely shouldn't, be overwritten. """ - def set_memory_limit(self, mem_limit, verbose=False): - set_memory_limit(mem_limit, verbose=verbose) + def set_memory_limit(self, mem_limit): + set_memory_limit(mem_limit) - def set_time_limit(self, time_limit, verbose=False): + def set_time_limit(self, time_limit): p = psutil.Process() if time_limit is not None: - set_time_limit(int(time_limit - _wall_time(p) + time.process_time()), verbose=verbose) + set_time_limit(int(time_limit - _wall_time(p) + time.process_time())) else: set_time_limit(None) @@ -419,11 +419,11 @@ def run( # Set memory limit (if provided) if mem_limit is not None: - self.set_memory_limit(mem_limit, verbose=verbose) + self.set_memory_limit(mem_limit) # Set time limit (if provided) if time_limit is not None: - self.set_time_limit(time_limit, verbose=verbose) # set remaining process time != wall time + self.set_time_limit(time_limit) # set remaining process time != wall time # ------------------------------ Parse instance ------------------------------ # From 6ab8b32932da152140bd94e168757fa9e4027ad5 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 15:21:48 +0200 Subject: [PATCH 18/46] Fix overwritten open --- cpmpy/tools/benchmark/runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py index a83740459..9d3f4c5f6 100644 --- a/cpmpy/tools/benchmark/runner.py +++ b/cpmpy/tools/benchmark/runner.py @@ -107,6 +107,7 @@ def wrapper(instance_runner, conn, kwargs, verbose): conn.close() # exec_args = (instance_runner, filename, metadata, open, solver, time_limit, mem_limit, output_file, verbose) +_std_open = open def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, int, str, bool, bool, str]) -> None: """ Solve a single benchmark instance and write results to file immediately. @@ -224,7 +225,7 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i # Pre-check if file exists to determine if we need to write header write_header = not os.path.exists(output_file) - with open(output_file, 'a', newline='') as f: + with _std_open(output_file, 'a', newline='') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) if write_header: writer.writeheader() From 34c8a9e75828022003afdbc056068eee14f7078e Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 15:22:12 +0200 Subject: [PATCH 19/46] Read as string instead of StringIO --- cpmpy/tools/benchmark/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py index 9d3f4c5f6..b0edeb655 100644 --- a/cpmpy/tools/benchmark/runner.py +++ b/cpmpy/tools/benchmark/runner.py @@ -136,7 +136,7 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i # Decompress before timers start with open(filename) as f: # <- dataset-specific 'open' callable - filename = StringIO(f.read()) # read to memory-mapped file + filename = f.read() # read to memory-mapped file # Start total timing total_start = time.time() From fd55b3a204e4fd8578d88f4316bf7ff49eb74702 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 15:22:32 +0200 Subject: [PATCH 20/46] Read as text instead of binary --- cpmpy/tools/dataset/model/mse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py index 8f395d677..ef31b0d64 100644 --- a/cpmpy/tools/dataset/model/mse.py +++ b/cpmpy/tools/dataset/model/mse.py @@ -100,7 +100,7 @@ def download(self): zip_path.unlink() def open(self, instance: os.PathLike) -> callable: - return lzma.open if str(instance).endswith(".xz") else open + return lzma.open(instance, "rt") if str(instance).endswith(".xz") else open(instance) if __name__ == "__main__": dataset = MSEDataset(year=2024, track="exact-weighted", download=True) From 2be9fa67820ece6aa1044822a5d88b618faa2f40 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 17:11:27 +0200 Subject: [PATCH 21/46] Sigterm callbacks --- cpmpy/tools/benchmark/_base.py | 47 ++++++++++++++++++++++++++++++++- cpmpy/tools/benchmark/mse.py | 39 ++++++++++++++++++++------- cpmpy/tools/benchmark/runner.py | 30 ++++++++++++--------- 3 files changed, 93 insertions(+), 23 deletions(-) diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index c6b4353d9..3522af9e9 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -30,15 +30,25 @@ from abc import ABC +import os +import signal +import sys import time import random import psutil import warnings +from enum import Enum from typing import Optional import cpmpy as cp from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb +class ExitStatus(Enum): + unsupported:str = "unsupported" # instance contains an unsupported feature (e.g. a unsupported global constraint) + sat:str = "sat" # CSP : found a solution | COP : found a solution but couldn't prove optimality + optimal:str = "optimal" # optimal COP solution found + unsat:str = "unsat" # instance is unsatisfiable + unknown:str = "unknown" # any other case class Benchmark(ABC): """ @@ -49,12 +59,13 @@ class Benchmark(ABC): It is designed to be extended or customized for specific benchmarking needs. """ - def __init__(self, reader:callable): + def __init__(self, reader:callable, exit_status:Enum): """ Arguments: reader (callable): A parser from a model format to a CPMPy model. """ self.reader = reader + self.exit_status = exit_status def read_instance(self, instance, open) -> cp.Model: """ @@ -92,6 +103,12 @@ def handle_exception(self, e): if line.strip(): self.print_comment(line) + def handle_sigterm(self): + pass + + def handle_rlimit_cpu(self): + pass + """ Solver arguments (can also be tweaked for a specific benchmark). """ @@ -336,6 +353,29 @@ def set_time_limit(self, time_limit): else: set_time_limit(None) + def sigterm_handler(self, _signo, _stack_frame): + exit_code = self.handle_sigterm() + print(flush=True) + os._exit(exit_code) + + def rlimit_cpu_handler(self, _signo, _stack_frame): + exit_code = self.handle_rlimit_cpu() + print(flush=True) + os._exit(exit_code) + + def init_signal_handlers(self): + """ + Configure signal handlers + """ + signal.signal(signal.SIGINT, self.sigterm_handler) + signal.signal(signal.SIGTERM, self.sigterm_handler) + signal.signal(signal.SIGINT, self.sigterm_handler) + signal.signal(signal.SIGABRT, self.sigterm_handler) + if sys.platform != "win32": + signal.signal(signal.SIGXCPU, self.rlimit_cpu_handler) + else: + warnings.warn("Windows does not support setting SIGXCPU signal") + def post_model(self, model, solver, solver_args): """ Post the model to the selected backend solver. @@ -417,6 +457,8 @@ def run( if seed is not None: random.seed(seed) + self.init_signal_handlers() + # Set memory limit (if provided) if mem_limit is not None: self.set_memory_limit(mem_limit) @@ -488,6 +530,9 @@ def run( except NotImplementedError as e: self.handle_not_implemented(e) raise e + except TimeoutError as e: + self.handle_exception(e) # TODO add callback for timeout? + raise e except Exception as e: self.handle_exception(e) raise e diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py index a11b1f5cb..3745e4503 100644 --- a/cpmpy/tools/benchmark/mse.py +++ b/cpmpy/tools/benchmark/mse.py @@ -56,7 +56,7 @@ from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus -class ExitStatus(Enum): +class MSEExitStatus(Enum): unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found @@ -89,12 +89,12 @@ class MSEBenchmark(Benchmark): """ def __init__(self): - super().__init__(reader=read_wcnf) + super().__init__(reader=read_wcnf, exit_status=MSEExitStatus) def print_comment(self, comment:str): print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) - def print_status(self, status: ExitStatus) -> None: + def print_status(self, status: MSEExitStatus) -> None: print('s' + chr(32) + status.value, end="\n", flush=True) def print_value(self, value: str) -> None: @@ -107,27 +107,46 @@ def print_objective(self, objective: int) -> None: def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: self.print_value(solution_mse(s)) - self.print_status(ExitStatus.optimal) + self.print_status(MSEExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: self.print_value(solution_mse(s)) - self.print_status(ExitStatus.sat) + self.print_status(MSEExitStatus.sat) elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: - self.print_status(ExitStatus.unsat) + self.print_status(MSEExitStatus.unsat) else: self.print_comment("Solver did not find any solution within the time/memory limit") - self.print_status(ExitStatus.unknown) + self.print_status(MSEExitStatus.unknown) def handle_memory_error(self, mem_limit): super().handle_memory_error(mem_limit) - self.print_status(ExitStatus.unknown) + self.print_status(MSEExitStatus.unknown) def handle_not_implemented(self, e): super().handle_not_implemented(e) - self.print_status(ExitStatus.unsupported) + self.print_status(MSEExitStatus.unsupported) def handle_exception(self, e): super().handle_exception(e) - self.print_status(ExitStatus.unknown) + self.print_status(MSEExitStatus.unknown) + + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(MSEExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(MSEExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 def parse_output_line(self, line, result): if line.startswith('s '): diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py index b0edeb655..933dac132 100644 --- a/cpmpy/tools/benchmark/runner.py +++ b/cpmpy/tools/benchmark/runner.py @@ -30,8 +30,6 @@ from filelock import FileLock from concurrent.futures import ThreadPoolExecutor -from cpmpy.tools.xcsp3.xcsp3_cpmpy import xcsp3_cpmpy, init_signal_handlers, ExitStatus - class Tee: """ A stream-like object that duplicates writes to multiple underlying streams. @@ -96,14 +94,22 @@ def wrapper(instance_runner, conn, kwargs, verbose): sys.stdout = Tee(original_stdout, pipe_writer) # forward to pipe and console try: - init_signal_handlers() # configure OS signal handlers instance_runner.run(**kwargs) conn.send({"status": "ok"}) + except TimeoutError: + try: + conn.send({"status": "timeout"}) + except (BrokenPipeError, EOFError): + pass except Exception as e: # capture exceptions and report in state tb_str = traceback.format_exc() - conn.send({"status": "error", "exception": e, "traceback": tb_str}) + try: + conn.send({"status": "error", "exception": e, "traceback": tb_str}) + except (BrokenPipeError, EOFError): + pass + #conn.send({"status": "error", "exception": e, "traceback": tb_str}) finally: - sys.stdout = original_stdout + #sys.stdout = original_stdout conn.close() # exec_args = (instance_runner, filename, metadata, open, solver, time_limit, mem_limit, output_file, verbose) @@ -125,7 +131,7 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i instance_runner, filename, metadata, open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path = args # Fieldnames for the CSV file - fieldnames = ['instance'] + list(metadata.keys()) + \ + fieldnames = list(metadata.keys()) + \ ['solver', 'time_total', 'time_parse', 'time_model', 'time_post', 'time_solve', 'status', 'objective_value', 'solution', 'intermediate', 'checker_result'] @@ -194,13 +200,13 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i raise() # Parse the exit status - if status["status"] == "error": + if status["status"] == "timeout": # Ignore timeouts - if "TimeoutError" in repr(status["exception"]): - pass - # All other exceptions, put in solution field - elif result['solution'] is None: - result['status'] = ExitStatus.unknown.value + pass + elif status["status"] == "error": + # All exceptions, put in solution field + if result['solution'] is None: + result['status'] = instance_runner.exit_status.unknown.value result["solution"] = status["exception"] # if checker_path is not None and complete_solution is not None: TODO: generalise 'checkers' for benchmarks From 2e646231405bd2f410fcfdb47192259b5ebad0c5 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 18:18:02 +0200 Subject: [PATCH 22/46] Attempt at fixing some nested memory exceptions --- cpmpy/tools/benchmark/__init__.py | 7 +++++++ cpmpy/tools/benchmark/_base.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py index b5a26f62a..54aa8031f 100644 --- a/cpmpy/tools/benchmark/__init__.py +++ b/cpmpy/tools/benchmark/__init__.py @@ -13,6 +13,7 @@ MEMORY_BUFFER_SOLVER = 20 # MB + def set_memory_limit(mem_limit): """ Set memory limit (Virtual Memory Size). @@ -25,6 +26,12 @@ def set_memory_limit(mem_limit): else: warnings.warn("Memory limits using `resource` are not supported on Windows. Skipping hard limit.") +def disable_memory_limit(): + if sys.platform != "win32": + soft, hard = resource.getrlimit(resource.RLIMIT_AS) + # set a very high soft limit + resource.setrlimit(resource.RLIMIT_AS, (hard, hard)) + def set_time_limit(time_limit, verbose:bool=False): """ Set time limit (CPU time in seconds). diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index 3522af9e9..2e81505e1 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -41,7 +41,7 @@ from typing import Optional import cpmpy as cp -from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb +from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb, disable_memory_limit class ExitStatus(Enum): unsupported:str = "unsupported" # instance contains an unsupported feature (e.g. a unsupported global constraint) @@ -525,6 +525,7 @@ def run( except MemoryError as e: + disable_memory_limit() self.handle_memory_error(mem_limit) raise e except NotImplementedError as e: From 5b926807300e0d196197bcded7685085bd73cf4c Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 18:18:52 +0200 Subject: [PATCH 23/46] Overwritable exit status --- cpmpy/tools/benchmark/mse.py | 1 + cpmpy/tools/benchmark/opb.py | 39 +++++++++++++++++++++++++--------- cpmpy/tools/benchmark/xcsp3.py | 25 +++++++++++----------- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py index 3745e4503..b7d645369 100644 --- a/cpmpy/tools/benchmark/mse.py +++ b/cpmpy/tools/benchmark/mse.py @@ -31,6 +31,7 @@ .. autosummary:: :nosignatures: + MSEExitStatus MSEBenchmark ================= diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py index 9d669a075..905d7ab0e 100644 --- a/cpmpy/tools/benchmark/opb.py +++ b/cpmpy/tools/benchmark/opb.py @@ -31,6 +31,7 @@ .. autosummary:: :nosignatures: + OPBExitStatus OPBBenchmark ================= @@ -56,7 +57,7 @@ from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus -class ExitStatus(Enum): +class OPBExitStatus(Enum): unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found @@ -82,12 +83,12 @@ class OPBBenchmark(Benchmark): """ def __init__(self): - super().__init__(reader=read_opb) + super().__init__(reader=read_opb, exit_status=OPBExitStatus) def print_comment(self, comment:str): print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) - def print_status(self, status: ExitStatus) -> None: + def print_status(self, status: OPBExitStatus) -> None: print('s' + chr(32) + status.value, end="\n", flush=True) def print_value(self, value: str) -> None: @@ -101,27 +102,45 @@ def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: self.print_result() self.print_value(solution_opb(s)) - self.print_status(ExitStatus.optimal) + self.print_status(OPBExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: self.print_value(solution_opb(s)) - self.print_status(ExitStatus.sat) + self.print_status(OPBExitStatus.sat) elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: - self.print_status(ExitStatus.unsat) + self.print_status(OPBExitStatus.unsat) else: self.print_comment("Solver did not find any solution within the time/memory limit") - self.print_status(ExitStatus.unknown) + self.print_status(OPBExitStatus.unknown) def handle_memory_error(self, mem_limit): super().handle_memory_error(mem_limit) - self.print_status(ExitStatus.unknown) + self.print_status(OPBExitStatus.unknown) def handle_not_implemented(self, e): super().handle_not_implemented(e) - self.print_status(ExitStatus.unsupported) + self.print_status(OPBExitStatus.unsupported) def handle_exception(self, e): super().handle_exception(e) - self.print_status(ExitStatus.unknown) + self.print_status(OPBExitStatus.unknown) + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(OPBExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(OPBExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 def parse_output_line(self, line, result): if line.startswith('s '): diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py index 9601a4530..9dd9849fe 100644 --- a/cpmpy/tools/benchmark/xcsp3.py +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -31,6 +31,7 @@ .. autosummary:: :nosignatures: + XCSP3ExitStatus XCSP3Benchmark ================= @@ -60,7 +61,7 @@ import xml.etree.cElementTree as ET -class ExitStatus(Enum): +class XCSP3ExitStatus(Enum): unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint) sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found @@ -120,12 +121,12 @@ class XCSP3Benchmark(Benchmark): """ def __init__(self): - super().__init__(reader=read_xcsp3) + super().__init__(reader=read_xcsp3, exit_status=XCSP3ExitStatus) def print_comment(self, comment:str): print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) - def print_status(self, status: ExitStatus) -> None: + def print_status(self, status: XCSP3ExitStatus) -> None: print('s' + chr(32) + status.value, end="\n", flush=True) def print_value(self, value: str) -> None: @@ -139,35 +140,35 @@ def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: self.print_result() self.print_value(solution_xcsp3(s)) - self.print_status(ExitStatus.optimal) + self.print_status(XCSP3ExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: self.print_value(solution_xcsp3(s)) - self.print_status(ExitStatus.sat) + self.print_status(XCSP3ExitStatus.sat) elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: - self.print_status(ExitStatus.unsat) + self.print_status(XCSP3ExitStatus.unsat) else: self.print_comment("Solver did not find any solution within the time/memory limit") - self.print_status(ExitStatus.unknown) + self.print_status(XCSP3ExitStatus.unknown) def handle_memory_error(self, mem_limit): super().handle_memory_error(mem_limit) - self.print_status(ExitStatus.unknown) + self.print_status(XCSP3ExitStatus.unknown) def handle_not_implemented(self, e): super().handle_not_implemented(e) - self.print_status(ExitStatus.unsupported) + self.print_status(XCSP3ExitStatus.unsupported) def handle_exception(self, e): if isinstance(e, ParseError): if "out of memory" in e.msg: self.print_comment(f"MemoryError raised by parser.") - self.print_status(ExitStatus.unknown) + self.print_status(XCSP3ExitStatus.unknown) else: self.print_comment(f"An {type(e)} got raised by the parser: {e}") - self.print_status(ExitStatus.unknown) + self.print_status(XCSP3ExitStatus.unknown) else: super().handle_exception(e) - self.print_status(ExitStatus.unknown) + self.print_status(XCSP3ExitStatus.unknown) def parse_output_line(self, line, result): if line.startswith('s '): From 8fff25480e8bfdb9f0b7d787d26b4c143fb1fdbd Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 18:19:11 +0200 Subject: [PATCH 24/46] Validate dataset arguments --- cpmpy/tools/dataset/model/mse.py | 6 ++++++ cpmpy/tools/dataset/model/opb.py | 6 ++++++ cpmpy/tools/dataset/model/xcsp3.py | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py index ef31b0d64..3ddfebf35 100644 --- a/cpmpy/tools/dataset/model/mse.py +++ b/cpmpy/tools/dataset/model/mse.py @@ -55,6 +55,12 @@ def __init__( self.year = year self.track = track + # Check requested dataset + if not str(year).startswith('20'): + raise ValueError("Year must start with '20'") + if not track: + raise ValueError("Track must be specified, e.g. OPT-LIN, DEC-LIN, ...") + dataset_dir = self.root / str(year) / track super().__init__( diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py index 40e6a282d..0915c6509 100644 --- a/cpmpy/tools/dataset/model/opb.py +++ b/cpmpy/tools/dataset/model/opb.py @@ -55,6 +55,12 @@ def __init__( self.year = year self.track = track + # Check requested dataset + if not str(year).startswith('20'): + raise ValueError("Year must start with '20'") + if not track: + raise ValueError("Track must be specified, e.g. exact-weighted, exact-unweighted, ...") + dataset_dir = self.root / str(year) / track super().__init__( diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py index 597a2af55..21b38f35e 100644 --- a/cpmpy/tools/dataset/model/xcsp3.py +++ b/cpmpy/tools/dataset/model/xcsp3.py @@ -56,6 +56,12 @@ def __init__( self.year = year self.track = track + # Check requested dataset + if not str(year).startswith('20'): + raise ValueError("Year must start with '20'") + if not track: + raise ValueError("Track must be specified, e.g. COP, CSP, ...") + dataset_dir = self.root / str(year) / track super().__init__( From 2b4a8f02daa648e48d2e806b2c6ce98832323237 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 18:19:25 +0200 Subject: [PATCH 25/46] Check non-empty dataset --- cpmpy/tools/dataset/_base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py index aa22ae930..a8954aa9f 100644 --- a/cpmpy/tools/dataset/_base.py +++ b/cpmpy/tools/dataset/_base.py @@ -36,6 +36,10 @@ def __init__( raise ValueError(f"Dataset not found. Please set download=True to download the dataset.") else: self.download() + + files = sorted(list(self.dataset_dir.glob(f"*{self.extension}"))) + if len(files) == 0: + raise ValueError("Cannot find any instances inside dataset. Is it a valid dataset? If so, please report on GitHub.") @abstractmethod def category(self) -> dict: From b68144d160c28f0da421e5f9b986b7492aed9716 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 18:24:13 +0200 Subject: [PATCH 26/46] Add feedback finished downloading --- cpmpy/tools/dataset/_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py index a8954aa9f..496780b2d 100644 --- a/cpmpy/tools/dataset/_base.py +++ b/cpmpy/tools/dataset/_base.py @@ -36,6 +36,8 @@ def __init__( raise ValueError(f"Dataset not found. Please set download=True to download the dataset.") else: self.download() + files = sorted(list(self.dataset_dir.glob(f"*{self.extension}"))) + print(f"Finished downloading {len(files)} instances") files = sorted(list(self.dataset_dir.glob(f"*{self.extension}"))) if len(files) == 0: From b08df43dba1e034e7fb88d98ad624161faf534ee Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 12 Sep 2025 18:47:43 +0200 Subject: [PATCH 27/46] Small fixes --- cpmpy/tools/benchmark/opb.py | 1 - cpmpy/tools/benchmark/xcsp3.py | 3 +-- cpmpy/tools/dataset/model/opb.py | 2 +- cpmpy/tools/dataset/model/xcsp3.py | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py index 905d7ab0e..5c1e0f606 100644 --- a/cpmpy/tools/benchmark/opb.py +++ b/cpmpy/tools/benchmark/opb.py @@ -100,7 +100,6 @@ def print_objective(self, objective: int) -> None: def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: - self.print_result() self.print_value(solution_opb(s)) self.print_status(OPBExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py index 9dd9849fe..bb2f02410 100644 --- a/cpmpy/tools/benchmark/xcsp3.py +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -138,7 +138,6 @@ def print_objective(self, objective: int) -> None: def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: - self.print_result() self.print_value(solution_xcsp3(s)) self.print_status(XCSP3ExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: @@ -176,7 +175,7 @@ def parse_output_line(self, line, result): elif line.startswith('v ') and result['solution'] is None: # only record first line, contains 'type' and 'cost' solution = line.split("\n")[0][2:].strip() - result['solution'] = str(solution) + result['solution'] = solution complete_solution = line if "cost" in solution: result['objective_value'] = solution.split('cost="')[-1][:-2] diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py index 0915c6509..201075749 100644 --- a/cpmpy/tools/dataset/model/opb.py +++ b/cpmpy/tools/dataset/model/opb.py @@ -139,7 +139,7 @@ def download(self): tar_path.unlink() def open(self, instance: os.PathLike) -> callable: - return lzma.open if str(instance).endswith(".xz") else open + return lzma.open(instance, 'rt') if str(instance).endswith(".xz") else open(instance) if __name__ == "__main__": dataset = OPBDataset(year=2024, track="DEC-LIN", download=True) diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py index 21b38f35e..f17a4d193 100644 --- a/cpmpy/tools/dataset/model/xcsp3.py +++ b/cpmpy/tools/dataset/model/xcsp3.py @@ -129,7 +129,7 @@ def download(self): zip_path.unlink() def open(self, instance: os.PathLike) -> callable: - return partial(lzma.open, mode='rt', encoding='utf-8') if str(instance).endswith(".lzma") else open + return lzma.open(instance, mode='rt', encoding='utf-8') if str(instance).endswith(".lzma") else open(instance) if __name__ == "__main__": From 431b065609b3772dfa0bff4aa665f1d49d903548 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 10 Oct 2025 13:39:34 +0200 Subject: [PATCH 28/46] Fix intermediate solutions and time tracking --- cpmpy/tools/benchmark/_base.py | 2 +- cpmpy/tools/benchmark/mse.py | 14 ++++++++------ cpmpy/tools/benchmark/opb.py | 3 +++ cpmpy/tools/benchmark/runner.py | 1 + cpmpy/tools/benchmark/xcsp3.py | 3 +++ 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index 2e81505e1..8055d43a6 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -84,7 +84,7 @@ def print_comment(self, comment:str): print(comment) def print_intermediate(self, objective:int): - print("Intermediate solution:", objective) + self.print_comment("Intermediate solution:", objective) def print_result(self, s): self.print_comment(s.status()) diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py index b7d645369..656467bf9 100644 --- a/cpmpy/tools/benchmark/mse.py +++ b/cpmpy/tools/benchmark/mse.py @@ -99,11 +99,13 @@ def print_status(self, status: MSEExitStatus) -> None: print('s' + chr(32) + status.value, end="\n", flush=True) def print_value(self, value: str) -> None: - value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:] print('v' + chr(32) + value, end="\n", flush=True) def print_objective(self, objective: int) -> None: print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: @@ -159,17 +161,17 @@ def parse_output_line(self, line, result): result['solution'] = solution else: result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self._sol_time = float(parts[-1].replace('s', '').rstrip()) elif line.startswith('o '): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(sol_time, obj)] + result['intermediate'] += [(self._sol_time, obj)] result['objective_value'] = obj obj = None - elif line.startswith('c Solution'): - parts = line.split(', time = ') - # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines - sol_time = float(parts[-1].replace('s', '').rstrip()) elif line.startswith('c took '): # Parse timing information parts = line.split(' seconds to ') diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py index 5c1e0f606..b92fcb257 100644 --- a/cpmpy/tools/benchmark/opb.py +++ b/cpmpy/tools/benchmark/opb.py @@ -98,6 +98,9 @@ def print_value(self, value: str) -> None: def print_objective(self, objective: int) -> None: print('o' + chr(32) + str(objective), end="\n", flush=True) + def print_intermediate(self, objective:int): + self.print_objective(objective) + def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: self.print_value(solution_opb(s)) diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py index 933dac132..6bc85e6ae 100644 --- a/cpmpy/tools/benchmark/runner.py +++ b/cpmpy/tools/benchmark/runner.py @@ -94,6 +94,7 @@ def wrapper(instance_runner, conn, kwargs, verbose): sys.stdout = Tee(original_stdout, pipe_writer) # forward to pipe and console try: + kwargs["verbose"] = verbose instance_runner.run(**kwargs) conn.send({"status": "ok"}) except TimeoutError: diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py index bb2f02410..47d0289e4 100644 --- a/cpmpy/tools/benchmark/xcsp3.py +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -136,6 +136,9 @@ def print_value(self, value: str) -> None: def print_objective(self, objective: int) -> None: print('o' + chr(32) + str(objective), end="\n", flush=True) + def print_intermediate(self, objective:int): + self.print_objective(objective) + def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: self.print_value(solution_xcsp3(s)) From 7d98c354f8668ca61b1ce9950564d22fa0cf66fe Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 10 Oct 2025 13:39:54 +0200 Subject: [PATCH 29/46] Increase intermediate solution time resolution --- cpmpy/tools/benchmark/_base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index 8055d43a6..b7171c6f6 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -156,7 +156,7 @@ def on_solution_callback(self): current_time = time.time() obj = int(self.ObjectiveValue()) - _self.print_comment('Solution %i, time = %0.2fs' % + _self.print_comment('Solution %i, time = %0.4fs' % (self.__solution_count, current_time - self.__start_time)) _self.print_intermediate(obj) self.__solution_count += 1 @@ -286,7 +286,7 @@ def callback(self, *args, **kwargs): if model.cbGet(GRB.Callback.MIP_SOLCNT) > self.__solution_count: # do we have a new solution? obj = int(model.cbGet(GRB.Callback.MIP_OBJBST)) - _self.print_comment('Solution %i, time = %0.2fs' % + _self.print_comment('Solution %i, time = %0.4fs' % (self.__solution_count, current_time - self.__start_time)) _self.print_intermediate(obj) self.__solution_count = model.cbGet(GRB.Callback.MIP_SOLCNT) @@ -324,7 +324,7 @@ def result_found(self, solver, sres): current_time = time.time() obj = sres.get_objective_value() if obj is not None: - _self.print_comment('Solution %i, time = %0.2fs' % + _self.print_comment('Solution %i, time = %0.4fs' % (self.__solution_count, current_time - self.__start_time)) _self.print_intermediate(obj) self.__solution_count += 1 @@ -472,7 +472,7 @@ def run( time_parse = time.time() model = self.read_instance(instance, open=open) time_parse = time.time() - time_parse - if verbose: self.print_comment(f"took {time_parse:.4f} seconds to parse model [{instance}]") + if verbose: self.print_comment(f"took {time_parse:.4f} seconds to parse model") if time_limit and time_limit < _wall_time(p): raise TimeoutError("Time's up after parse") From 4664051472c3a9e59bb2c7769592737d614329ff Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 10 Oct 2025 15:26:44 +0200 Subject: [PATCH 30/46] Missing default return argument --- cpmpy/tools/benchmark/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index b7171c6f6..11d17ed42 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -421,7 +421,7 @@ def solver_arguments( return self.cpo_arguments(model=model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) else: self.print_comment(f"setting parameters of {solver} is not (yet) supported") - return dict() + return dict(), None def run( self, From 582fc963e2a5eb6e5189c32ed23f6584fa08d670 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 17 Oct 2025 09:10:31 +0200 Subject: [PATCH 31/46] Only import "resource" when supported --- cpmpy/tools/benchmark/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py index 54aa8031f..ce383c1de 100644 --- a/cpmpy/tools/benchmark/__init__.py +++ b/cpmpy/tools/benchmark/__init__.py @@ -1,5 +1,3 @@ - -import resource import sys import time import warnings @@ -22,12 +20,14 @@ def set_memory_limit(mem_limit): soft = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_SOFT), _mib_as_bytes(MEMORY_BUFFER_SOFT)) hard = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_HARD), _mib_as_bytes(MEMORY_BUFFER_HARD)) if sys.platform != "win32": + import resource resource.setrlimit(resource.RLIMIT_AS, (soft, hard)) # limit memory in number of bytes else: warnings.warn("Memory limits using `resource` are not supported on Windows. Skipping hard limit.") def disable_memory_limit(): if sys.platform != "win32": + import resource soft, hard = resource.getrlimit(resource.RLIMIT_AS) # set a very high soft limit resource.setrlimit(resource.RLIMIT_AS, (hard, hard)) @@ -38,6 +38,7 @@ def set_time_limit(time_limit, verbose:bool=False): """ if time_limit is not None: if sys.platform != "win32": + import resource soft = time_limit hard = resource.RLIM_INFINITY resource.setrlimit(resource.RLIMIT_CPU, (soft, hard)) From 2eea41c23d6bd54db0dc1d5aa399a3da8920354d Mon Sep 17 00:00:00 2001 From: OrestisLomis Date: Thu, 23 Oct 2025 17:49:01 +0200 Subject: [PATCH 32/46] remove var x0 which is not used in opb --- cpmpy/tools/opb/parser.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpmpy/tools/opb/parser.py b/cpmpy/tools/opb/parser.py index e300a2752..f63db7c7d 100644 --- a/cpmpy/tools/opb/parser.py +++ b/cpmpy/tools/opb/parser.py @@ -66,10 +66,10 @@ def _parse_term(line, vars): for v in vars_str.split(): if v.startswith("~x"): - idx = int(v[2:]) # remove "~x" + idx = int(v[2:]) - 1 # remove "~x" and opb is 1-based indexing factors.append(~vars[idx]) else: - idx = int(v[1:]) # remove "x" + idx = int(v[1:]) - 1 # remove "x" and opb is 1-based indexing factors.append(vars[idx]) term = int(w) * reduce(mul, factors, 1) # create weighted term @@ -162,13 +162,15 @@ def read_opb(opb: Union[str, os.PathLike], open=open) -> cp.Model: header = HEADER_RE.match(_line) if not header: raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...") - nr_vars = int(header.group(2)) + 1 + nr_vars = int(header.group(2)) # Generator without comment lines reader = (l for l in map(str.strip, f) if l and l[0] != '*') # CPMpy objects vars = cp.boolvar(shape=nr_vars, name="x") + if nr_vars == 1: + vars = cp.cpm_array([vars]) # ensure vars is indexable even for single variable case model = cp.Model() # Special case for first line -> might contain objective function From 6111fc43707d6455ce56f0821458638241c3724b Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 24 Oct 2025 15:25:26 +0200 Subject: [PATCH 33/46] rcpsp dataset and benchmark --- cpmpy/tools/benchmark/_base.py | 2 +- cpmpy/tools/benchmark/psplib.py | 213 ++++++++++++++++++++++++++ cpmpy/tools/dataset/problem/psplib.py | 119 ++++++++++++++ cpmpy/tools/rcpsp/__init__.py | 20 +++ cpmpy/tools/rcpsp/parser.py | 171 +++++++++++++++++++++ 5 files changed, 524 insertions(+), 1 deletion(-) create mode 100644 cpmpy/tools/benchmark/psplib.py create mode 100644 cpmpy/tools/dataset/problem/psplib.py create mode 100644 cpmpy/tools/rcpsp/__init__.py create mode 100644 cpmpy/tools/rcpsp/parser.py diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index 11d17ed42..4e718bc45 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -59,7 +59,7 @@ class Benchmark(ABC): It is designed to be extended or customized for specific benchmarking needs. """ - def __init__(self, reader:callable, exit_status:Enum): + def __init__(self, reader:callable, exit_status:Enum=ExitStatus): """ Arguments: reader (callable): A parser from a model format to a CPMPy model. diff --git a/cpmpy/tools/benchmark/psplib.py b/cpmpy/tools/benchmark/psplib.py new file mode 100644 index 000000000..4fab0c99c --- /dev/null +++ b/cpmpy/tools/benchmark/psplib.py @@ -0,0 +1,213 @@ +""" +PSPLIB as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on PSPLIB +instances. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on PSPLIB datasets. + +Usage: + python psplib.py --year 2024 --track exact-weighted --solver ortools + +Arguments: + --variant Problem variant (e.g., rcpsp). + --family Problem family (e.g., j30, j120, ...) + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + MSEExitStatus + MSEBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_mse +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark, ExitStatus +from cpmpy.tools.rcpsp import read_rcpsp +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +def solution_psplib(model): + """ + Convert a CPMpy model solution into the solution string format. + + Arguments: + model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution + + Returns: + str: formatted solution string. + """ + variables = {var.name: var.value() for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace + return str(variables) + +class PSPLIBBenchmark(Benchmark): + + """ + PSPLIB as a CPMpy benchmark. + """ + + def __init__(self): + super().__init__(reader=read_rcpsp) # TODO: reader should depend on problem variant + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(ExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(ExitStatus.unknown) + + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self.sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + result['intermediate'] += [(self.sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on PSPLIB instances') + parser.add_argument('--variant', type=str, required=True, help='Problem variant (e.g., rcpsp)') + parser.add_argument('--family', type=str, required=True, help='Problem family (e.g., j30, j120, ...)') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + # parser.add_argument('--checker-path', type=str, default=None, + # help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.problem.psplib import PSPLibDataset + dataset = PSPLibDataset(variant=args.variant, family=args.family, download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "psplib" / f"psplib_{args.variant}_{args.family}_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = PSPLIBBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/dataset/problem/psplib.py b/cpmpy/tools/dataset/problem/psplib.py new file mode 100644 index 000000000..b1cbf70f6 --- /dev/null +++ b/cpmpy/tools/dataset/problem/psplib.py @@ -0,0 +1,119 @@ +import os +import pathlib +from typing import Tuple, Any +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError +import zipfile + +class PSPLibDataset(object): # torch.utils.data.Dataset compatible + + """ + PSPlib Dataset in a PyTorch compatible format. + + Arguments: + root (str): Root directory containing the psplib instances (if 'download', instances will be downloaded to this location) + variant (str): scheduling variant (only 'rcpsp' is supported for now) + family (str): family name (e.g. j30, j60, etc...) + transform (callable, optional): Optional transform to be applied on the instance data + target_transform (callable, optional): Optional transform to be applied on the file path + download (bool): If True, downloads the dataset from the internet and puts it in `root` directory + """ + + def __init__(self, root: str = ".", variant: str = "rcpsp", family: str = "j30", transform=None, target_transform=None, download: bool = False): + """ + Initialize the PSPLib Dataset. + """ + + self.root = pathlib.Path(root) + self.variant = variant + self.family = family + self.transform = transform + self.target_transform = target_transform + self.family_dir = pathlib.Path(os.path.join(self.root, variant, family)) + + self.families = dict( + rcpsp = ["j30", "j60", "j90", "j120"] + ) + self.family_codes = dict(rcpsp="sm", mrcpsp="mm") + + if variant != "rcpsp": + raise ValueError("Only 'rcpsp' variant is supported for now") + if family not in self.families[variant]: + raise ValueError(f"Unknown problem family. Must be any of {','.join(self.families[variant])}") + # Create root directory if it doesn't exist + self.root.mkdir(parents=True, exist_ok=True) + + if not self.family_dir.exists(): + if not download: + raise ValueError(f"Dataset for variant {variant} and family {family} not found. Please set download=True to download the dataset.") + else: + print(f"Downloading PSPLib {variant} {family} instances...") + + zip_name = f"{family}.{self.family_codes[variant]}.zip" + url = f"https://www.om-db.wi.tum.de/psplib/files/" + + url_path = url + zip_name + zip_path = self.root / zip_name + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available for variant {variant} and family {family}. Error: {str(e)}") + + # make directory and extract files + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Create track folder in root directory, parents=True ensures recursive creation + self.family_dir.mkdir(parents=True, exist_ok=True) + + # Extract files + for file_info in zip_ref.infolist(): + # Extract file to family_dir, removing main_folder/track prefix + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.family_dir / filename, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + def open(self, instance: os.PathLike) -> callable: + return open(instance, "r") + + + def __len__(self) -> int: + """Return the total number of instances.""" + return len(list(self.family_dir.glob(f"*.{self.family_codes[self.variant]}"))) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + """ + Get a single RCPSP instance filename and metadata. + + Args: + index (int): Index of the instance to retrieve + + Returns: + Tuple[Any, Any]: A tuple containing: + - The filename of the instance + - Metadata dictionary with file name, track, year etc. + """ + if index < 0 or index >= len(self): + raise IndexError("Index out of range") + + # Get all instance files and sort for deterministic behavior # TODO: use natsort instead? + files = sorted(list(self.family_dir.glob(f"*.{self.family_codes[self.variant]}"))) + file_path = files[index] + + filename = str(file_path) + if self.transform: + # does not need to remain a filename... + filename = self.transform(filename) + + # Basic metadata about the instance + metadata = dict( + variant = self.variant, + family = self.family, + name = file_path.stem + ) + + if self.target_transform: + metadata = self.target_transform(metadata) + + return filename, metadata \ No newline at end of file diff --git a/cpmpy/tools/rcpsp/__init__.py b/cpmpy/tools/rcpsp/__init__.py new file mode 100644 index 000000000..b24d99980 --- /dev/null +++ b/cpmpy/tools/rcpsp/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with psplib-formatted rcpsp CP models. + + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser +""" + +from .parser import read_rcpsp diff --git a/cpmpy/tools/rcpsp/parser.py b/cpmpy/tools/rcpsp/parser.py new file mode 100644 index 000000000..cadc32482 --- /dev/null +++ b/cpmpy/tools/rcpsp/parser.py @@ -0,0 +1,171 @@ +""" +Parser for the PSPLIB RCPSP format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_rcpsp +""" + + +import os +import sys +import lzma +import argparse +import cpmpy as cp +from io import StringIO +from typing import Union + + +_std_open = open +def read_rcpsp(rcpsp: Union[str, os.PathLike], open=open) -> cp.Model: + """ + Parser for PSPLIB RCPSP format. Reads in an instance and returns its matching CPMpy model. + + Arguments: + rcpsp (str or os.PathLike): + - A file path to a PSPLIB RCPSP file + - OR a string containing the RCPSP content directly + open: (callable): + If rcpsp is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + cp.Model: The CPMpy model of the PSPLIB RCPSP instance. + """ + # If rcpsp is a path to a file -> open file + if isinstance(rcpsp, (str, os.PathLike)) and os.path.exists(rcpsp): + if open is not None: + f = open(rcpsp) + else: + f = _std_open(rcpsp, "rt") + # If rcpsp is a string containing a model -> create a memory-mapped file + else: + f = StringIO(rcpsp) + + + table, capacities = _parse_rcpsp(f) + model, (start, end, makespan) = _model_rcpsp(job_data=table, capacities=capacities) + return model + +def _parse_rcpsp(f): + + data = dict() + + line = f.readline() + while not line.startswith("PRECEDENCE RELATIONS:"): + line = f.readline() + + f.readline() # skip keyword line + line = f.readline() # first line of table, skip + while not line.startswith("*****"): + jobnr, n_modes, n_succ, *succ = [int(x) for x in line.split(" ") if len(x.strip())] + assert len(succ) == n_succ, "Expected %d successors for job %d, got %d" % (n_succ, jobnr, len(succ)) + data[jobnr] = dict(num_modes=n_modes, successors=succ) + line = f.readline() + + # skip to job info + while not line.startswith("REQUESTS/DURATIONS:"): + line = f.readline() + + line = f.readline() + _j, _m, _d, *_r = [x.strip() for x in line.split(" ") if len(x.strip())] # first line of table + resource_names = [f"{_r[i]}{_r[i+1]}" for i in range(0,len(_r),2)] + line = f.readline() # first line of table + if line.startswith("----") or line.startswith("*****"): # intermediate line in table... + line = f.readline() # skip + + while not line.startswith("*****"): + jobnr, mode, duration, *resources = [int(x) for x in line.split(" ") if len(x.strip())] + assert len(resources) == len(resource_names), "Expected %d resources for job %d, got %d" % (len(resource_names), jobnr, len(resources)) + data[jobnr].update(dict(mode=mode, duration=duration)) + data[jobnr].update({name : req for name, req in zip(resource_names, resources)}) + line = f.readline() + + # read resource availabilities + while not line.startswith("RESOURCEAVAILABILITIES:"): + line = f.readline() + + f.readline() # skip header + capacities = [int(x) for x in f.readline().split(" ") if len(x)] + + import pandas as pd + df =pd.DataFrame([dict(jobnr=k ,**info) for k, info in data.items()], + columns=["jobnr", "mode", "duration", "successors", *resource_names]) + df.set_index("jobnr", inplace=True) + + return df, dict(zip(resource_names, capacities)) + +def _model_rcpsp(job_data, capacities): + + model = cp.Model() + + horizon = job_data.duration.sum() # worst case, all jobs sequential on a machine + makespan = cp.intvar(0, horizon, name="makespan") + + start = cp.intvar(0, horizon, name="start", shape=len(job_data)) + end = cp.intvar(0, horizon, name="end", shape=len(job_data)) + + # ensure capacity is not exceeded + for rescource, capa in capacities.items(): + model += cp.Cumulative( + start = start, + duration = job_data['duration'].tolist(), + end = end, + demand = job_data[rescource].tolist(), + capacity = capa + ) + + # enforce precedences + for idx, (jobnr, info) in enumerate(job_data.iterrows()): + for succ in info['successors']: + model += end[idx] <= start[succ-1] # job ids start at idx 1 + + model += end <= makespan + model.minimize(makespan) + + return model, (start, end, makespan) + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a PSPLIB RCPSP model using CPMpy") + parser.add_argument("model", help="Path to a PSPLIB RCPSP file (or raw RCPSP string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw RCPSP string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_rcpsp(args.model) + else: + model = read_rcpsp(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() \ No newline at end of file From af36c877c9d61eae1b595aa1650a4d42caebf9a9 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 24 Oct 2025 15:26:11 +0200 Subject: [PATCH 34/46] opb fix intermediate solutions --- cpmpy/tools/benchmark/opb.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py index b92fcb257..5c0c222cd 100644 --- a/cpmpy/tools/benchmark/opb.py +++ b/cpmpy/tools/benchmark/opb.py @@ -154,17 +154,17 @@ def parse_output_line(self, line, result): result['solution'] = solution else: result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self.sol_time = float(parts[-1].replace('s', '').rstrip()) elif line.startswith('o '): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(sol_time, obj)] + result['intermediate'] += [(self.sol_time, obj)] result['objective_value'] = obj obj = None - elif line.startswith('c Solution'): - parts = line.split(', time = ') - # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines - sol_time = float(parts[-1].replace('s', '').rstrip()) elif line.startswith('c took '): # Parse timing information parts = line.split(' seconds to ') From a834387f7900d7d0289267d57800e2f63bb3824c Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 24 Oct 2025 15:33:43 +0200 Subject: [PATCH 35/46] update docstrings --- cpmpy/tools/dataset/problem/psplib.py | 35 ++++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/cpmpy/tools/dataset/problem/psplib.py b/cpmpy/tools/dataset/problem/psplib.py index b1cbf70f6..89f0e93c7 100644 --- a/cpmpy/tools/dataset/problem/psplib.py +++ b/cpmpy/tools/dataset/problem/psplib.py @@ -1,3 +1,8 @@ +""" +PSPlib Dataset + +https://www.om-db.wi.tum.de/psplib/getdata_sm.html +""" import os import pathlib from typing import Tuple, Any @@ -10,18 +15,25 @@ class PSPLibDataset(object): # torch.utils.data.Dataset compatible """ PSPlib Dataset in a PyTorch compatible format. - Arguments: - root (str): Root directory containing the psplib instances (if 'download', instances will be downloaded to this location) - variant (str): scheduling variant (only 'rcpsp' is supported for now) - family (str): family name (e.g. j30, j60, etc...) - transform (callable, optional): Optional transform to be applied on the instance data - target_transform (callable, optional): Optional transform to be applied on the file path - download (bool): If True, downloads the dataset from the internet and puts it in `root` directory + More information on PSPlib can be found here: https://www.om-db.wi.tum.de/psplib/main.html """ def __init__(self, root: str = ".", variant: str = "rcpsp", family: str = "j30", transform=None, target_transform=None, download: bool = False): """ - Initialize the PSPLib Dataset. + Constructor for a dataset object for PSPlib. + + Arguments: + root (str): Root directory containing the psplib instances (if 'download', instances will be downloaded to this location) + variant (str): scheduling variant (only 'rcpsp' is supported for now) + family (str): family name (e.g. j30, j60, etc...) + transform (callable, optional): Optional transform to be applied on the instance data + target_transform (callable, optional): Optional transform to be applied on the file path + download (bool): If True, downloads the dataset from the internet and puts it in `root` directory + + + Raises: + ValueError: If the dataset directory does not exist and `download=False`, + or if the requested variant/family combination is not available. """ self.root = pathlib.Path(root) @@ -116,4 +128,9 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: if self.target_transform: metadata = self.target_transform(metadata) - return filename, metadata \ No newline at end of file + return filename, metadata + +if __name__ == "__main__": + dataset = PSPLibDataset(variant="rcpsp", family="j30", download=True) + print("Dataset size:", len(dataset)) + print("Instance 0:", dataset[0]) \ No newline at end of file From 8805cad7fab38bc74d0d7b05698ab406469e8706 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 24 Oct 2025 15:49:29 +0200 Subject: [PATCH 36/46] Fix more docstring --- cpmpy/tools/benchmark/psplib.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpmpy/tools/benchmark/psplib.py b/cpmpy/tools/benchmark/psplib.py index 4fab0c99c..26046cf84 100644 --- a/cpmpy/tools/benchmark/psplib.py +++ b/cpmpy/tools/benchmark/psplib.py @@ -9,7 +9,7 @@ This script can be run directly to benchmark solvers on PSPLIB datasets. Usage: - python psplib.py --year 2024 --track exact-weighted --solver ortools + python psplib.py --year 2024 --variant rcpsp --family j30 Arguments: --variant Problem variant (e.g., rcpsp). @@ -30,8 +30,7 @@ .. autosummary:: :nosignatures: - MSEExitStatus - MSEBenchmark + PSPLIBBenchmark ================= List of functions @@ -40,7 +39,7 @@ .. autosummary:: :nosignatures: - solution_mse + solution_psplib """ import warnings From ce6b6bcc51604e24014c3ec4a9e482e46606463e Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 24 Oct 2025 15:50:01 +0200 Subject: [PATCH 37/46] Add JSPLib dataset and benchmark --- cpmpy/tools/benchmark/jsplib.py | 209 ++++++++++++++++++++++++ cpmpy/tools/dataset/problem/jsplib.py | 218 ++++++++++++++++++++++++++ cpmpy/tools/jsplib/__init__.py | 20 +++ cpmpy/tools/jsplib/parser.py | 148 +++++++++++++++++ 4 files changed, 595 insertions(+) create mode 100644 cpmpy/tools/benchmark/jsplib.py create mode 100644 cpmpy/tools/dataset/problem/jsplib.py create mode 100644 cpmpy/tools/jsplib/__init__.py create mode 100644 cpmpy/tools/jsplib/parser.py diff --git a/cpmpy/tools/benchmark/jsplib.py b/cpmpy/tools/benchmark/jsplib.py new file mode 100644 index 000000000..30c99da79 --- /dev/null +++ b/cpmpy/tools/benchmark/jsplib.py @@ -0,0 +1,209 @@ +""" +JSPLib as a CPMpy benchmark + +This module provides a benchmarking framework for running CPMpy on JSPLib +instances. + +Command-line Interface +---------------------- +This script can be run directly to benchmark solvers on JSPLib datasets. + +Usage: + python jsplib.py --solver ortools + +Arguments: + --solver Solver name (e.g., ortools, exact, choco, ...). + --workers Number of parallel workers to use. + --time-limit Time limit in seconds per instance. + --mem-limit Memory limit in MB per instance. + --cores Number of cores to assign to a single instance. + --output-dir Output directory for CSV files. + --verbose Show solver output if set. + --intermediate Report intermediate solutions if supported. + +=============== +List of classes +=============== + +.. autosummary:: + :nosignatures: + + MSEExitStatus + MSEBenchmark + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + solution_mse +""" + +import warnings +import argparse +from enum import Enum +from pathlib import Path +from datetime import datetime + +# CPMpy +from cpmpy.tools.benchmark.runner import benchmark_runner +from cpmpy.tools.benchmark._base import Benchmark, ExitStatus +from cpmpy.tools.jsplib import read_jsplib +from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus + + +def solution_psplib(model): + """ + Convert a CPMpy model solution into the solution string format. + + Arguments: + model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution + + Returns: + str: formatted solution string. + """ + variables = {var.name: var.value() for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace + return str(variables) + +class JSPLibBenchmark(Benchmark): + + """ + PSPLIB as a CPMpy benchmark. + """ + + def __init__(self): + super().__init__(reader=read_jsplib) + + def print_comment(self, comment:str): + print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True) + + def print_status(self, status: ExitStatus) -> None: + print('s' + chr(32) + status.value, end="\n", flush=True) + + def print_value(self, value: str) -> None: + print('v' + chr(32) + value, end="\n", flush=True) + + def print_objective(self, objective: int) -> None: + print('o' + chr(32) + str(objective), end="\n", flush=True) + + def print_intermediate(self, objective:int): + self.print_objective(objective) + + def print_result(self, s): + if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.optimal) + elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_value(solution_psplib(s)) + self.print_status(ExitStatus.sat) + elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: + self.print_status(ExitStatus.unsat) + else: + self.print_comment("Solver did not find any solution within the time/memory limit") + self.print_status(ExitStatus.unknown) + + def handle_memory_error(self, mem_limit): + super().handle_memory_error(mem_limit) + self.print_status(ExitStatus.unknown) + + def handle_not_implemented(self, e): + super().handle_not_implemented(e) + self.print_status(ExitStatus.unsupported) + + def handle_exception(self, e): + super().handle_exception(e) + self.print_status(ExitStatus.unknown) + + + def handle_sigterm(self): + """ + Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGTERM raised.") + return 0 + + def handle_rlimit_cpu(self): + """ + Handles a SIGXCPU. + """ + # Report that we haven't found a solution in time + self.print_status(ExitStatus.unknown) + self.print_comment("SIGXCPU raised.") + return 0 + + def parse_output_line(self, line, result): + if line.startswith('s '): + result['status'] = line[2:].strip() + elif line.startswith('v '): + # only record first line, contains 'type' and 'cost' + solution = line.split("\n")[0][2:].strip() + if solution not in result: + result['solution'] = solution + else: + result['solution'] = result['solution'] + ' ' + str(solution) + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self.sol_time = float(parts[-1].replace('s', '').rstrip()) + elif line.startswith('o '): + obj = int(line[2:].strip()) + if result['intermediate'] is None: + result['intermediate'] = [] + result['intermediate'] += [(self.sol_time, obj)] + result['objective_value'] = obj + obj = None + elif line.startswith('c took '): + # Parse timing information + parts = line.split(' seconds to ') + if len(parts) == 2: + time_val = float(parts[0].replace('c took ', '')) + action = parts[1].strip() + if action.startswith('parse'): + result['time_parse'] = time_val + elif action.startswith('convert'): + result['time_model'] = time_val + elif action.startswith('post'): + result['time_post'] = time_val + elif action.startswith('solve'): + result['time_solve'] = time_val + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Benchmark solvers on JSPLib instances') + parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)') + parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers') + parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance') + parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance') + parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance') + parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files') + parser.add_argument('--verbose', action='store_true', help='Show solver output') + parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions') + # parser.add_argument('--checker-path', type=str, default=None, + # help='Path to the XCSP3 solution checker JAR file') + args = parser.parse_args() + + if not args.verbose: + warnings.filterwarnings("ignore") + + # Load benchmark instances (as a dataset) + from cpmpy.tools.dataset.problem.jsplib import JSPLibDataset + dataset = JSPLibDataset(download=True) + + # Create output directory + output_dir = Path(args.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Get current timestamp in a filename-safe format + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Define output file path with timestamp + output_file = str(output_dir / "jsplib" / f"psplib_{args.solver}_{timestamp}.csv") + + # Run the benchmark + instance_runner = JSPLibBenchmark() + output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args)) + print(f"Results added to {output_file}") diff --git a/cpmpy/tools/dataset/problem/jsplib.py b/cpmpy/tools/dataset/problem/jsplib.py new file mode 100644 index 000000000..54cba2890 --- /dev/null +++ b/cpmpy/tools/dataset/problem/jsplib.py @@ -0,0 +1,218 @@ +""" +PyTorch-style Dataset for Jobshop instances from JSPLib + +Simply create a dataset instance and start iterating over its contents: +The `metadata` contains usefull information about the current problem instance. + +https://github.com/tamy0612/JSPLIB +""" +import os +import json +import pathlib +from os.path import join +from typing import Tuple, Any +from urllib.request import urlretrieve +from urllib.error import HTTPError, URLError +import zipfile +import numpy as np + +from matplotlib import pyplot as plt + +import cpmpy as cp + +class JSPLibDataset(object): # torch.utils.data.Dataset compatible + + """ + JSP Dataset in a PyTorch compatible format. + + More information on JSPLib can be found here: https://github.com/tamy0612/JSPLIB + """ + + def __init__(self, root: str = ".", transform=None, target_transform=None, download: bool = False): + """ + Initialize the PSPLib Dataset. + + Arguments: + root (str): Root directory containing the jsp instances (if 'download', instances will be downloaded to this location) + transform (callable, optional): Optional transform to be applied on the instance data + target_transform (callable, optional): Optional transform to be applied on the file path + download (bool): If True, downloads the dataset from the internet and puts it in `root` directory + """ + + self.root = pathlib.Path(root) + self.instance_dir = pathlib.Path(join(self.root, "jsplib")) + self.metadata_file = "instances.json" + self.transform = transform + self.target_transform = target_transform + + # Create root directory if it doesn't exist + self.root.mkdir(parents=True, exist_ok=True) + + print(self.instance_dir, self.instance_dir.exists(), self.instance_dir.is_dir()) + if not self.instance_dir.exists(): + if not download: + raise ValueError(f"Dataset not found in local file system. Please set download=True to download the dataset.") + else: + url = f"https://github.com/tamy0612/JSPLIB/archive/refs/heads/master.zip" # download full repo... + url_path = url + zip_path = pathlib.Path(join(root,"jsplib-master.zip")) + + print(f"Downloading JSPLib instances..") + + try: + urlretrieve(url_path, str(zip_path)) + except (HTTPError, URLError) as e: + raise ValueError(f"No dataset available on {url}. Error: {str(e)}") + + # make directory and extract files + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + self.instance_dir.mkdir(parents=True, exist_ok=True) + + # Extract files + for file_info in zip_ref.infolist(): + if file_info.filename.startswith("JSPLIB-master/instances/") and file_info.file_size > 0: + filename = pathlib.Path(file_info.filename).name + with zip_ref.open(file_info) as source, open(self.instance_dir / filename, 'wb') as target: + target.write(source.read()) + # extract metadata file + with zip_ref.open("JSPLIB-master/instances.json") as source, open(self.instance_dir / self.metadata_file, 'wb') as target: + target.write(source.read()) + # Clean up the zip file + zip_path.unlink() + + + def __len__(self) -> int: + """Return the total number of instances.""" + return len(list(self.instance_dir.glob("*"))) + + def __getitem__(self, index: int|str) -> Tuple[Any, Any]: + """ + Get a single JSPLib instance filename and metadata. + + Args: + index (int or str): Index or name of the instance to retrieve + + Returns: + Tuple[Any, Any]: A tuple containing: + - The filename of the instance + - Metadata dictionary with file name, track, year etc. + """ + if isinstance(index, int) and (index < 0 or index >= len(self)): + raise IndexError("Index out of range") + + # Get all instance files and sort for deterministic behavior # TODO: use natsort instead? + files = sorted(list(self.instance_dir.glob("*[!.json]"))) # exclude metadata file + if isinstance(index, int): + file_path = files[index] + elif isinstance(index, str): + for file_path in files: + if file_path.stem == index: + break + else: + raise IndexError(f"Instance {index} not found in dataset") + + filename = str(file_path) + if self.transform: + # does not need to remain a filename... + filename = self.transform(filename) + + with open(self.instance_dir / self.metadata_file, "r") as f: + for entry in json.load(f): + if entry["name"] == file_path.stem: + metadata = entry + metadata['path'] = str(file_path) + break + else: + metadata = dict() + + if self.target_transform: + metadata = self.target_transform(metadata) + + return filename, metadata + + def open(self, instance: os.PathLike) -> callable: + return open(instance, "r") + + +def parse_jsp(filename: str): + """ + Parse a JSPLib instance file + Returns two matrices: + - task to machines indicating on which machine to run which task + - task durations: indicating the duration of each task + """ + + with open(filename, "r") as f: + line = f.readline() + while line.startswith("#"): + line = f.readline() + n_jobs, n_tasks = map(int, line.strip().split(" ")) + matrix = np.fromstring(f.read(), sep=" ", dtype=int).reshape((n_jobs, n_tasks*2)) + + task_to_machines = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + task_durations = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + + for t in range(n_tasks): + task_to_machines[:, t] = matrix[:, t*2] + task_durations[:, t] = matrix[:, t*2+1] + + return task_to_machines, task_durations + +def jobshop_model(task_to_machines, task_durations): + + + task_to_machines = np.array(task_to_machines) + dur = np.array(task_durations) + + assert task_to_machines.shape == task_durations.shape + + n_jobs, n_tasks = task_to_machines.shape + + start = cp.intvar(0, task_durations.sum(), name="start", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + end = cp.intvar(0, task_durations.sum(), name="end", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + makespan = cp.intvar(0, task_durations.sum(), name="makespan") # extremely bad upperbound... TODO + + model = cp.Model() + model += start + dur == end + model += end[:,:-1] <= start[:,1:] # precedences + + for machine in set(task_to_machines.flat): + model += cp.NoOverlap(start[task_to_machines == machine], + dur[task_to_machines == machine], + end[task_to_machines == machine]) + + model += end <= makespan + model.minimize(makespan) + + return model, (start, makespan) + + +if __name__ == "__main__": + + dataset = JSPLibDataset(root=".", download=True, transform=parse_jsp) + print("Dataset size:", len(dataset)) + print("Instance 0:") + (machines, dur), metadata = dataset[0] + print("Machines:", machines) + print("Durations:", dur) + print("Metadata:", metadata) + + print("Solving", metadata['name']) + model, (start, makespan) = jobshop_model(task_to_machines=machines, task_durations=dur) + assert model.solve(time_limit=10) + + import pandas as pd + import plotly.express as px + import plotly.io as pio + pio.renderers.default = "browser" # ensure plotly opens figure in browser + + df = pd.DataFrame({"Start": start.value().flat, "Duration": dur.flat, "Machine": machines.flat}) + df["Job"] = [j for j in range(metadata['jobs']) for _ in range(metadata['machines']) ] + df["Task"] = [j for _ in range(metadata['machines']) for j in range(metadata['jobs'])] + df["Name"] = "T" + df["Job"].astype(str) + "-" + df["Task"].astype(str) + print(df) + ghant_fig = px.bar(df, orientation='h', + base="Start", x="Duration", y="Machine", color="Job", text="Name", + title=f"Jobshop instance {metadata['name']}, makespan: {makespan.value()}, status: {model.status()}" + ) + ghant_fig.show() \ No newline at end of file diff --git a/cpmpy/tools/jsplib/__init__.py b/cpmpy/tools/jsplib/__init__.py new file mode 100644 index 000000000..6ebdec377 --- /dev/null +++ b/cpmpy/tools/jsplib/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +#-*- coding:utf-8 -*- +## +## __init__.py +## +""" +Set of utilities for working with JSPLib-formatted CP models. + + +================== +List of submodules +================== + +.. autosummary:: + :nosignatures: + + parser +""" + +from .parser import read_jsplib diff --git a/cpmpy/tools/jsplib/parser.py b/cpmpy/tools/jsplib/parser.py new file mode 100644 index 000000000..11c820faa --- /dev/null +++ b/cpmpy/tools/jsplib/parser.py @@ -0,0 +1,148 @@ +""" +Parser for the JSPLib format. + + +================= +List of functions +================= + +.. autosummary:: + :nosignatures: + + read_jsplib +""" + + +import os +import sys +import lzma +import argparse +import cpmpy as cp +import numpy as np +from io import StringIO +from typing import Union + + +_std_open = open +def read_jsplib(jsp: Union[str, os.PathLike], open=open) -> cp.Model: + """ + Parser for JSPLib format. Reads in an instance and returns its matching CPMpy model. + + Arguments: + jsp (str or os.PathLike): + - A file path to a JSPlib file + - OR a string containing the JSPLib content directly + open: (callable): + If jsp is the path to a file, a callable to "open" that file (default=python standard library's 'open'). + + Returns: + cp.Model: The CPMpy model of the JSPLib instance. + """ + # If rcpsp is a path to a file -> open file + if isinstance(jsp, (str, os.PathLike)) and os.path.exists(jsp): + if open is not None: + f = open(jsp) + else: + f = _std_open(jsp, "rt") + # If rcpsp is a string containing a model -> create a memory-mapped file + else: + f = StringIO(jsp) + + + task_to_machines, task_durations = _parse_jsplib(f) + model, (start, makespan) = _model_jsplib(task_to_machines=task_to_machines, task_durations=task_durations) + return model + + +def _parse_jsplib(f): + """ + Parse a JSPLib instance file + Returns two matrices: + - task to machines indicating on which machine to run which task + - task durations: indicating the duration of each task + """ + + line = f.readline() + while line.startswith("#"): + line = f.readline() + n_jobs, n_tasks = map(int, line.strip().split(" ")) + matrix = np.fromstring(f.read(), sep=" ", dtype=int).reshape((n_jobs, n_tasks*2)) + + task_to_machines = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + task_durations = np.empty(dtype=int, shape=(n_jobs, n_tasks)) + + for t in range(n_tasks): + task_to_machines[:, t] = matrix[:, t*2] + task_durations[:, t] = matrix[:, t*2+1] + + return task_to_machines, task_durations + + + +def _model_jsplib(task_to_machines, task_durations): + + task_to_machines = np.array(task_to_machines) + dur = np.array(task_durations) + + assert task_to_machines.shape == task_durations.shape + + n_jobs, n_tasks = task_to_machines.shape + + start = cp.intvar(0, task_durations.sum(), name="start", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + end = cp.intvar(0, task_durations.sum(), name="end", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO + makespan = cp.intvar(0, task_durations.sum(), name="makespan") # extremely bad upperbound... TODO + + model = cp.Model() + model += start + dur == end + model += end[:,:-1] <= start[:,1:] # precedences + + for machine in set(task_to_machines.flat): + model += cp.NoOverlap(start[task_to_machines == machine], + dur[task_to_machines == machine], + end[task_to_machines == machine]) + + model += end <= makespan + model.minimize(makespan) + + return model, (start, makespan) + + + +def main(): + parser = argparse.ArgumentParser(description="Parse and solve a JSPLib model using CPMpy") + parser.add_argument("model", help="Path to a JSPLib file (or raw RCPSP string if --string is given)") + parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)") + parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw JSPLib string instead of a file path") + parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)") + args = parser.parse_args() + + # Build the CPMpy model + try: + if args.string: + model = read_jsplib(args.model) + else: + model = read_jsplib(os.path.expanduser(args.model)) + except Exception as e: + sys.stderr.write(f"Error reading model: {e}\n") + sys.exit(1) + + # Solve the model + try: + if args.solver: + result = model.solve(solver=args.solver, time_limit=args.time_limit) + else: + result = model.solve(time_limit=args.time_limit) + except Exception as e: + sys.stderr.write(f"Error solving model: {e}\n") + sys.exit(1) + + # Print results + print("Status:", model.status()) + if result is not None: + if model.has_objective(): + print("Objective:", model.objective_value()) + else: + print("No solution found.") + +if __name__ == "__main__": + main() \ No newline at end of file From 9098299f5ecc2f986e622abb1fb3b3135595b571 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Fri, 24 Oct 2025 16:04:42 +0200 Subject: [PATCH 38/46] Add bounds for all jsplib instances --- cpmpy/tools/dataset/problem/jsplib.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpmpy/tools/dataset/problem/jsplib.py b/cpmpy/tools/dataset/problem/jsplib.py index 54cba2890..d110c48db 100644 --- a/cpmpy/tools/dataset/problem/jsplib.py +++ b/cpmpy/tools/dataset/problem/jsplib.py @@ -120,6 +120,9 @@ def __getitem__(self, index: int|str) -> Tuple[Any, Any]: for entry in json.load(f): if entry["name"] == file_path.stem: metadata = entry + if "bounds" not in metadata: + metadata["bounds"] = {"upper": metadata["optimum"], "lower": metadata["optimum"]} + del metadata['path'] metadata['path'] = str(file_path) break else: From 658967d3951e2dc0e7219c755a6b7fc994271399 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Sat, 25 Oct 2025 10:48:47 +0200 Subject: [PATCH 39/46] Fix choco args --- cpmpy/tools/benchmark/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index 11d17ed42..3f9257d49 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -34,6 +34,7 @@ import signal import sys import time +import math import random import psutil import warnings @@ -191,7 +192,7 @@ def exact_arguments( return res, None - def choco_arguments(): + def choco_arguments(self): # Documentation: https://github.com/chocoteam/pychoco/blob/master/pychoco/solver.py return {}, None From 38db2906c84fe70ee0c979b0defedfcae94b4c0e Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Sat, 25 Oct 2025 18:16:40 +0200 Subject: [PATCH 40/46] Fixes --- cpmpy/tools/benchmark/jsplib.py | 6 +++++- cpmpy/tools/benchmark/psplib.py | 6 +++++- cpmpy/tools/benchmark/xcsp3.py | 10 +++++----- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/cpmpy/tools/benchmark/jsplib.py b/cpmpy/tools/benchmark/jsplib.py index 30c99da79..e9dacb7ce 100644 --- a/cpmpy/tools/benchmark/jsplib.py +++ b/cpmpy/tools/benchmark/jsplib.py @@ -74,6 +74,7 @@ class JSPLibBenchmark(Benchmark): """ def __init__(self): + self.sol_time = None super().__init__(reader=read_jsplib) def print_comment(self, comment:str): @@ -93,9 +94,11 @@ def print_intermediate(self, objective:int): def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) self.print_value(solution_psplib(s)) self.print_status(ExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) self.print_value(solution_psplib(s)) self.print_status(ExitStatus.sat) elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: @@ -153,7 +156,8 @@ def parse_output_line(self, line, result): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(self.sol_time, obj)] + if self.sol_time is not None: + result['intermediate'] += [(self.sol_time, obj)] result['objective_value'] = obj obj = None elif line.startswith('c took '): diff --git a/cpmpy/tools/benchmark/psplib.py b/cpmpy/tools/benchmark/psplib.py index 26046cf84..0f1a1639f 100644 --- a/cpmpy/tools/benchmark/psplib.py +++ b/cpmpy/tools/benchmark/psplib.py @@ -75,6 +75,7 @@ class PSPLIBBenchmark(Benchmark): """ def __init__(self): + self.sol_time = None super().__init__(reader=read_rcpsp) # TODO: reader should depend on problem variant def print_comment(self, comment:str): @@ -94,9 +95,11 @@ def print_intermediate(self, objective:int): def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) self.print_value(solution_psplib(s)) self.print_status(ExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) self.print_value(solution_psplib(s)) self.print_status(ExitStatus.sat) elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: @@ -154,7 +157,8 @@ def parse_output_line(self, line, result): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(self.sol_time, obj)] + if self.sol_time is not None: + result['intermediate'] += [(self.sol_time, obj)] result['objective_value'] = obj obj = None elif line.startswith('c took '): diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py index 47d0289e4..176d42d18 100644 --- a/cpmpy/tools/benchmark/xcsp3.py +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -182,17 +182,17 @@ def parse_output_line(self, line, result): complete_solution = line if "cost" in solution: result['objective_value'] = solution.split('cost="')[-1][:-2] + elif line.startswith('c Solution'): + parts = line.split(', time = ') + # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines + self._sol_time = float(parts[-1].replace('s', '').rstrip()) elif line.startswith('o '): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(sol_time, obj)] + result['intermediate'] += [(self._sol_time, obj)] result['objective_value'] = obj obj = None - elif line.startswith('c Solution'): - parts = line.split(', time = ') - # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines - sol_time = float(parts[-1].replace('s', '').rstrip()) elif line.startswith('c took '): # Parse timing information parts = line.split(' seconds to ') From 62b605d99109fa9bafd4b16ca92a0bb812963121 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Mon, 3 Nov 2025 17:08:37 +0100 Subject: [PATCH 41/46] correct jsplib output file name --- cpmpy/tools/benchmark/jsplib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/jsplib.py b/cpmpy/tools/benchmark/jsplib.py index e9dacb7ce..343c2dfdd 100644 --- a/cpmpy/tools/benchmark/jsplib.py +++ b/cpmpy/tools/benchmark/jsplib.py @@ -205,7 +205,7 @@ def parse_output_line(self, line, result): timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Define output file path with timestamp - output_file = str(output_dir / "jsplib" / f"psplib_{args.solver}_{timestamp}.csv") + output_file = str(output_dir / "jsplib" / f"jsplib_{args.solver}_{timestamp}.csv") # Run the benchmark instance_runner = JSPLibBenchmark() From ddf69389644bd69c9efa2cc06090a0d399dbef0c Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Mon, 3 Nov 2025 17:09:02 +0100 Subject: [PATCH 42/46] remove matplotlib import --- cpmpy/tools/dataset/problem/jsplib.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpmpy/tools/dataset/problem/jsplib.py b/cpmpy/tools/dataset/problem/jsplib.py index d110c48db..17453fe32 100644 --- a/cpmpy/tools/dataset/problem/jsplib.py +++ b/cpmpy/tools/dataset/problem/jsplib.py @@ -16,8 +16,6 @@ import zipfile import numpy as np -from matplotlib import pyplot as plt - import cpmpy as cp class JSPLibDataset(object): # torch.utils.data.Dataset compatible From 344aaafd94fef5765b6d7baa22e111ad204cf7c4 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Mon, 3 Nov 2025 17:09:23 +0100 Subject: [PATCH 43/46] xcsp3 track intermediate sol time --- cpmpy/tools/benchmark/xcsp3.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py index 176d42d18..1bc70ad9b 100644 --- a/cpmpy/tools/benchmark/xcsp3.py +++ b/cpmpy/tools/benchmark/xcsp3.py @@ -121,6 +121,7 @@ class XCSP3Benchmark(Benchmark): """ def __init__(self): + self._sol_time = None super().__init__(reader=read_xcsp3, exit_status=XCSP3ExitStatus) def print_comment(self, comment:str): @@ -190,7 +191,8 @@ def parse_output_line(self, line, result): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(self._sol_time, obj)] + if self._sol_time is not None: + result['intermediate'] += [(self._sol_time, obj)] result['objective_value'] = obj obj = None elif line.startswith('c took '): From 7cd1bb1630345303c42d49a2dc328572a7e6ad6a Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Mon, 3 Nov 2025 17:09:41 +0100 Subject: [PATCH 44/46] opb print intermediate solutions --- cpmpy/tools/benchmark/opb.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py index 5c0c222cd..0c571a058 100644 --- a/cpmpy/tools/benchmark/opb.py +++ b/cpmpy/tools/benchmark/opb.py @@ -83,6 +83,7 @@ class OPBBenchmark(Benchmark): """ def __init__(self): + self.sol_time = None super().__init__(reader=read_opb, exit_status=OPBExitStatus) def print_comment(self, comment:str): @@ -103,9 +104,11 @@ def print_intermediate(self, objective:int): def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) self.print_value(solution_opb(s)) self.print_status(OPBExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) self.print_value(solution_opb(s)) self.print_status(OPBExitStatus.sat) elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: @@ -162,7 +165,8 @@ def parse_output_line(self, line, result): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(self.sol_time, obj)] + if self.sol_time is not None: + result['intermediate'] += [(self.sol_time, obj)] result['objective_value'] = obj obj = None elif line.startswith('c took '): From a21a0404c22463fae25e945e1cd4f5c688cfad80 Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Mon, 3 Nov 2025 17:09:54 +0100 Subject: [PATCH 45/46] mse print intermediate solutions --- cpmpy/tools/benchmark/mse.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py index 656467bf9..a1936346e 100644 --- a/cpmpy/tools/benchmark/mse.py +++ b/cpmpy/tools/benchmark/mse.py @@ -90,6 +90,7 @@ class MSEBenchmark(Benchmark): """ def __init__(self): + self._sol_time = None super().__init__(reader=read_wcnf, exit_status=MSEExitStatus) def print_comment(self, comment:str): @@ -109,9 +110,11 @@ def print_intermediate(self, objective:int): def print_result(self, s): if s.status().exitstatus == CPMStatus.OPTIMAL: + self.print_objective(s.objective_value()) self.print_value(solution_mse(s)) self.print_status(MSEExitStatus.optimal) elif s.status().exitstatus == CPMStatus.FEASIBLE: + self.print_objective(s.objective_value()) self.print_value(solution_mse(s)) self.print_status(MSEExitStatus.sat) elif s.status().exitstatus == CPMStatus.UNSATISFIABLE: @@ -169,7 +172,8 @@ def parse_output_line(self, line, result): obj = int(line[2:].strip()) if result['intermediate'] is None: result['intermediate'] = [] - result['intermediate'] += [(self._sol_time, obj)] + if self._sol_time is not None: + result['intermediate'] += [(self._sol_time, obj)] result['objective_value'] = obj obj = None elif line.startswith('c took '): From eda839c8b9df7df12c238a9e16a4bde6494fa8aa Mon Sep 17 00:00:00 2001 From: ThomSerg Date: Mon, 3 Nov 2025 17:13:07 +0100 Subject: [PATCH 46/46] cplex and hexaly solver arguments --- cpmpy/tools/benchmark/_base.py | 93 ++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py index 4de8ce816..ce103dcab 100644 --- a/cpmpy/tools/benchmark/_base.py +++ b/cpmpy/tools/benchmark/_base.py @@ -338,7 +338,96 @@ def solution_count(self): res |= { "solution_callback": CpoSolutionCallback } return res, None + + def cplex_arguments( + self, + cores: Optional[int] = None, + seed: Optional[int] = None, + **kwargs + ): + res = dict() + if cores is not None: + res |= {"threads": cores} + if seed is not None: + res |= {"randomseed": seed} + + return res, None + + def hexaly_arguments( + self, + model: cp.Model, + cores: Optional[int] = None, + seed: Optional[int] = None, + intermediate: bool = False, + **kwargs + ): + res = dict() + #res |= {"nb_threads": cores} + #res |= {"seed": seed} + + + if intermediate and model.has_objective(): + # Define custom Hexaly solution callback, then register it + + _self = self + class HexSolutionCallback: + def __init__(self): + self.__start_time = time.time() + self.__solution_count = 0 + + + def on_solution_callback(self, optimizer, cb_type): + """Called on each new solution.""" + # check if solution with different objective (or if verbose) + current_time = time.time() + obj = optimizer.model.objectives[0] + _self.print_comment('Solution %i, time = %0.4fs' % + (self.__solution_count, current_time - self.__start_time)) + _self.print_intermediate(obj) + self.__solution_count += 1 + + def solution_count(self): + return self.__solution_count + + # Register the callback + res |= { "solution_callback": HexSolutionCallback().on_solution_callback } + + + # def internal_options(solver: "CPM_hexaly"): + # # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688 + # #solver.native_model.get_param().set_seed(seed) + # #solver.native_model.get_param().set_nr_threads(cores) + + # _self = self + # class CallbackExample: + # def __init__(self): + # self.last_best_value = 0 + # self.last_best_running_time = 0 + # self.__solution_count = 0 + # self.__start_time = time.time() + + # def my_callback(self, optimizer, cb_type): + # stats = optimizer.statistics + # obj = optimizer.model.objectives[0] + # current_time = time.time() + # #obj = int(self.ObjectiveValue()) + # #obj = optimizer.get_objective_bound(0).value + # if obj.value > self.last_best_value: + # self.last_best_running_time = stats.running_time + # self.last_best_value = obj.value + # self.__solution_count += 1 + + # _self.print_comment('Solution %i, time = %0.4fs' % + # (self.__solution_count, current_time - self.__start_time)) + # _self.print_intermediate(obj.value) + + # optimizer = solver.native_model + # cb = CallbackExample() + # from hexaly.optimizer import HxCallbackType + # optimizer.add_callback(HxCallbackType.TIME_TICKED, cb.my_callback) + + return res, None """ Methods which can, bit most likely shouldn't, be overwritten. @@ -420,6 +509,10 @@ def solver_arguments( return self.gurobi_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, intermediate=intermediate, opt=opt, **kwargs) elif solver == "cpo": return self.cpo_arguments(model=model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) + elif solver == "hexaly": + return self.hexaly_arguments(model, cores=cores, seed=seed, intermediate=intermediate, **kwargs) + elif solver == "cplex": + return self.cplex_arguments(cores=cores, **kwargs) else: self.print_comment(f"setting parameters of {solver} is not (yet) supported") return dict(), None