From feead09d8259f9e05bf6276fee51c913d4be95b9 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Thu, 11 Sep 2025 17:49:43 +0200
Subject: [PATCH 01/46] WCNF parser

---
 cpmpy/tools/wcnf/__init__.py | 90 ++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 cpmpy/tools/wcnf/__init__.py

diff --git a/cpmpy/tools/wcnf/__init__.py b/cpmpy/tools/wcnf/__init__.py
new file mode 100644
index 000000000..3446f0906
--- /dev/null
+++ b/cpmpy/tools/wcnf/__init__.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+##
+## __init__.py
+##
+"""
+Set of utilities for working with WCNF-formatted CP models.
+
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    read_wcnf
+"""
+
+
+import os
+import lzma
+import cpmpy as cp
+from io import StringIO
+from typing import Union
+
+
+def _get_var(i, vars_dict):
+    """
+    Returns CPMpy boolean decision variable matching to index `i` if exists, else creates a new decision variable.
+
+    Arguments:
+        i: index
+        vars_dict (dict): dictionary to keep track of previously generated decision variables
+    """
+    if i not in vars_dict:
+        vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables
+    return vars_dict[i]
+
+
+def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model:
+    """
+    Parser for WCNF format. Reads in an instance and returns its matching CPMpy model.
+
+    Arguments: 
+        wcnf (str or os.PathLike): A string containing a WCNF-formatted model, or a path to a file containing containing the same.
+
+    Returns:
+        cp.Model: The CPMpy model of the WCNF instance.
+    """
+    # If wcnf is a path to a file -> open file
+    if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf):
+        f_open = lzma.open if str(wcnf).endswith(".xz") else open
+        f = f_open(wcnf, "rt")
+    # If wcnf is a string containing a model -> create a memory-mapped file
+    else:
+        f = StringIO(wcnf)
+
+    model = cp.Model()
+    vars = {}
+    soft_terms = []
+
+    for raw in f:
+        line = raw.strip()
+
+        # Empty line or a comment -> skip
+        if not line or line.startswith("c"):
+            continue
+
+        # Hard clause
+        if line[0] == "h":
+            literals = map(int, line[1:].split())
+            clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars)
+                      for i in literals if i != 0]
+            model.add(cp.any(clause))
+
+        # Soft clause (weight first)
+        else:
+            parts = line.split()
+            weight = int(parts[0])
+            literals = map(int, parts[1:])
+            clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars)
+                    for i in literals if i != 0]
+            soft_terms.append(weight * cp.any(clause))
+
+    # Objective = sum of soft clause terms
+    if soft_terms:
+        model.maximize(sum(soft_terms))
+
+    return model
\ No newline at end of file

From 5ade48ec7a661123688a79979d262e591c44e21c Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Thu, 11 Sep 2025 18:02:32 +0200
Subject: [PATCH 02/46] Small docstring change

---
 cpmpy/tools/wcnf/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/wcnf/__init__.py b/cpmpy/tools/wcnf/__init__.py
index 3446f0906..5ce83a146 100644
--- a/cpmpy/tools/wcnf/__init__.py
+++ b/cpmpy/tools/wcnf/__init__.py
@@ -43,7 +43,9 @@ def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model:
     Parser for WCNF format. Reads in an instance and returns its matching CPMpy model.
 
     Arguments: 
-        wcnf (str or os.PathLike): A string containing a WCNF-formatted model, or a path to a file containing containing the same.
+        wcnf (str or os.PathLike):
+            - A file path to an WCNF file (optionally LZMA-compressed with `.xz`)
+            - OR a string containing the WCNF content directly
 
     Returns:
         cp.Model: The CPMpy model of the WCNF instance.

From 7f52f5fc7694d6736c877889835ca4266db95109 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Thu, 11 Sep 2025 18:02:54 +0200
Subject: [PATCH 03/46] OPB parser

---
 cpmpy/tools/opb/__init__.py | 179 ++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 cpmpy/tools/opb/__init__.py

diff --git a/cpmpy/tools/opb/__init__.py b/cpmpy/tools/opb/__init__.py
new file mode 100644
index 000000000..963c021cb
--- /dev/null
+++ b/cpmpy/tools/opb/__init__.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+##
+## __init__.py
+##
+"""
+Set of utilities for working with OPB-formatted CP models.
+
+Currently only the restricted OPB PB24 format is supported (without WBO).
+
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    read_opb
+"""
+
+
+import os
+import re
+import lzma
+import cpmpy as cp
+from io import StringIO
+from typing import Union
+from functools import reduce
+from operator import mul
+
+# Regular expressions
+HEADER_RE = re.compile(r'(.*)\s*#variable=\s*(\d+)\s*#constraint=\s*(\d+).*')
+TERM_RE = re.compile(r"([+-]?\d+)((?:\s+~?x\d+)+)")
+OBJ_TERM_RE = re.compile(r'^min:')
+IND_TERM_RE = re.compile(r'([>=|<=|=]+)\s+([+-]?\d+)')
+IND_TERM_RE = re.compile(r'(>=|<=|=)\s*([+-]?\d+)')
+
+
+def _parse_term(line, vars):
+    """
+    Parse a line containing OPB terms into a CPMpy expression.
+
+    Supports:
+        - Linear terms (e.g., +2 x1)
+        - Non-linear terms (e.g., -1 x1 x14)
+        - Negated variables using '~' (e.g., ~x5)
+
+    Arguments:
+        line (str):                 A string containing one or more terms.
+        vars (list[cp.boolvar]):    List or array of CPMpy Boolean variables.
+
+    Returns:
+        cp.Expression: A CPMpy expression representing the sum of all parsed terms.
+
+    Example:
+        >>> _parse_term("2 x2 x3 +3 x4 ~x5", vars)
+        sum([2, 3] * [(IV2*IV3), (IV4*~IV5)])
+    """
+
+    terms = []
+    for w, vars_str in TERM_RE.findall(line):
+        factors = []
+
+        for v in vars_str.split():
+            if v.startswith("~x"):
+                idx = int(v[2:]) # remove "~x"
+                factors.append(~vars[idx])
+            else:
+                idx = int(v[1:]) # remove "x"
+                factors.append(vars[idx])
+        
+        term = int(w) * reduce(mul, factors, 1) # create weighted term
+        terms.append(term)
+
+    return cp.sum(terms)
+
+def _parse_constraint(line, vars):
+    """
+    Parse a single OPB constraint line into a CPMpy comparison expression.
+
+    Arguments:
+        line (str):                 A string representing a single OPB constraint.
+        vars (list[cp.boolvar]):    List or array of CPMpy Boolean variables. Will be index to get the variables for the constraint.
+
+    Returns:
+        cp.expressions.core.Comparison: A CPMpy comparison expression representing
+                                        the constraint.
+
+    Example:
+        >>> _parse_constraint("-1 x1 x14 -1 x1 ~x17 >= -1", vars)
+        sum([-1, -1] * [(IV1*IV14), (IV1*~IV17)]) >= -1
+    """
+
+    op, ind_term = IND_TERM_RE.search(line).groups()
+    lhs = _parse_term(line, vars)
+
+    rhs = int(ind_term) if ind_term.lstrip("+-").isdigit() else vars[int(ind_term)]
+
+    return cp.expressions.core.Comparison(
+        name="==" if op == "=" else ">=",
+        left=lhs,
+        right=rhs
+    )
+
+def read_opb(opb: Union[str, os.PathLike]) -> cp.Model:
+    """
+    Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model.
+
+    Based on PyPBLib's example parser: https://hardlog.udl.cat/static/doc/pypblib/html/library/index.html#example-from-opb-to-cnf-file
+
+    Supports:
+        - Linear and non-linear terms (e.g., -1 x1 x14 +2 x2)
+        - Negated variables using '~' (e.g., ~x5)
+        - Minimisation objective
+        - Comparison operators in constraints: '=', '>='
+
+    Arguments:
+        opb (str or os.PathLike): 
+            - A file path to an OPB file (optionally LZMA-compressed with `.xz`)
+            - OR a string containing the OPB content directly
+
+    Returns:
+        cp.Model: The CPMpy model of the OPB instance.
+
+    Example:
+        >>> opb_text = '''
+        ... * #variable= 5 #constraint= 2 #equal= 1 intsize= 64 #product= 5 sizeproduct= 13
+        ... min: 2 x2 x3 +3 x4 ~x5 +2 ~x1 x2 +3 ~x1 x2 x3 ~x4 ~x5 ;
+        ... 2 x2 x3 -1 x1 ~x3 = 5 ;
+        ... '''
+        >>> model = read_opb(opb_text)
+        >>> print(model)
+        Model(...)
+    
+    Notes:
+        - Comment lines starting with '*' are ignored.
+        - Only "min:" objectives are supported; "max:" is not recognized.
+    """
+
+    
+    # If opb is a path to a file -> open file
+    if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb):
+        f_open = lzma.open if str(opb).endswith(".xz") else open
+        f = f_open(opb, 'rt')
+    # If opb is a string containing a model -> create a memory-mapped file
+    else:
+        f = StringIO(opb)
+
+    # Look for header on first line
+    line = f.readline()
+    header = HEADER_RE.match(line)
+    if not header: # If not found on first line, look on second (happens when passing multi line string)
+        _line = f.readline()
+        header = HEADER_RE.match(_line)
+        if not header:
+            raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...")
+    nr_vars = int(header.group(2)) + 1
+
+    # Generator without comment lines
+    reader = (l for l in map(str.strip, f) if l and l[0] != '*')
+
+    # CPMpy objects
+    vars = cp.boolvar(shape=nr_vars, name="x")
+    model = cp.Model()
+    
+    # Special case for first line -> might contain objective function
+    first_line = next(reader)
+    if OBJ_TERM_RE.match(first_line):
+        obj_expr = _parse_term(first_line, vars)
+        model.minimize(obj_expr)
+    else: # no objective found, parse as a constraint instead
+        model.add(_parse_constraint(first_line, vars))
+
+    # Start parsing line by line
+    for line in reader:
+        model.add(_parse_constraint(line, vars))
+
+    return model
\ No newline at end of file

From 548de8e13dd30137fd84031ccafc66bdb9f85bf1 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 09:35:49 +0200
Subject: [PATCH 04/46] Move parser out of init and add cli

---
 cpmpy/tools/opb/__init__.py  | 168 +-------------------------
 cpmpy/tools/opb/parser.py    | 221 +++++++++++++++++++++++++++++++++++
 cpmpy/tools/wcnf/__init__.py |  82 +------------
 cpmpy/tools/wcnf/parser.py   | 133 +++++++++++++++++++++
 4 files changed, 364 insertions(+), 240 deletions(-)
 create mode 100644 cpmpy/tools/opb/parser.py
 create mode 100644 cpmpy/tools/wcnf/parser.py

diff --git a/cpmpy/tools/opb/__init__.py b/cpmpy/tools/opb/__init__.py
index 963c021cb..ae751c7e7 100644
--- a/cpmpy/tools/opb/__init__.py
+++ b/cpmpy/tools/opb/__init__.py
@@ -8,172 +8,14 @@
 
 Currently only the restricted OPB PB24 format is supported (without WBO).
 
-
-=================
-List of functions
-=================
+==================
+List of submodules
+==================
 
 .. autosummary::
     :nosignatures:
 
-    read_opb
+    parser
 """
 
-
-import os
-import re
-import lzma
-import cpmpy as cp
-from io import StringIO
-from typing import Union
-from functools import reduce
-from operator import mul
-
-# Regular expressions
-HEADER_RE = re.compile(r'(.*)\s*#variable=\s*(\d+)\s*#constraint=\s*(\d+).*')
-TERM_RE = re.compile(r"([+-]?\d+)((?:\s+~?x\d+)+)")
-OBJ_TERM_RE = re.compile(r'^min:')
-IND_TERM_RE = re.compile(r'([>=|<=|=]+)\s+([+-]?\d+)')
-IND_TERM_RE = re.compile(r'(>=|<=|=)\s*([+-]?\d+)')
-
-
-def _parse_term(line, vars):
-    """
-    Parse a line containing OPB terms into a CPMpy expression.
-
-    Supports:
-        - Linear terms (e.g., +2 x1)
-        - Non-linear terms (e.g., -1 x1 x14)
-        - Negated variables using '~' (e.g., ~x5)
-
-    Arguments:
-        line (str):                 A string containing one or more terms.
-        vars (list[cp.boolvar]):    List or array of CPMpy Boolean variables.
-
-    Returns:
-        cp.Expression: A CPMpy expression representing the sum of all parsed terms.
-
-    Example:
-        >>> _parse_term("2 x2 x3 +3 x4 ~x5", vars)
-        sum([2, 3] * [(IV2*IV3), (IV4*~IV5)])
-    """
-
-    terms = []
-    for w, vars_str in TERM_RE.findall(line):
-        factors = []
-
-        for v in vars_str.split():
-            if v.startswith("~x"):
-                idx = int(v[2:]) # remove "~x"
-                factors.append(~vars[idx])
-            else:
-                idx = int(v[1:]) # remove "x"
-                factors.append(vars[idx])
-        
-        term = int(w) * reduce(mul, factors, 1) # create weighted term
-        terms.append(term)
-
-    return cp.sum(terms)
-
-def _parse_constraint(line, vars):
-    """
-    Parse a single OPB constraint line into a CPMpy comparison expression.
-
-    Arguments:
-        line (str):                 A string representing a single OPB constraint.
-        vars (list[cp.boolvar]):    List or array of CPMpy Boolean variables. Will be index to get the variables for the constraint.
-
-    Returns:
-        cp.expressions.core.Comparison: A CPMpy comparison expression representing
-                                        the constraint.
-
-    Example:
-        >>> _parse_constraint("-1 x1 x14 -1 x1 ~x17 >= -1", vars)
-        sum([-1, -1] * [(IV1*IV14), (IV1*~IV17)]) >= -1
-    """
-
-    op, ind_term = IND_TERM_RE.search(line).groups()
-    lhs = _parse_term(line, vars)
-
-    rhs = int(ind_term) if ind_term.lstrip("+-").isdigit() else vars[int(ind_term)]
-
-    return cp.expressions.core.Comparison(
-        name="==" if op == "=" else ">=",
-        left=lhs,
-        right=rhs
-    )
-
-def read_opb(opb: Union[str, os.PathLike]) -> cp.Model:
-    """
-    Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model.
-
-    Based on PyPBLib's example parser: https://hardlog.udl.cat/static/doc/pypblib/html/library/index.html#example-from-opb-to-cnf-file
-
-    Supports:
-        - Linear and non-linear terms (e.g., -1 x1 x14 +2 x2)
-        - Negated variables using '~' (e.g., ~x5)
-        - Minimisation objective
-        - Comparison operators in constraints: '=', '>='
-
-    Arguments:
-        opb (str or os.PathLike): 
-            - A file path to an OPB file (optionally LZMA-compressed with `.xz`)
-            - OR a string containing the OPB content directly
-
-    Returns:
-        cp.Model: The CPMpy model of the OPB instance.
-
-    Example:
-        >>> opb_text = '''
-        ... * #variable= 5 #constraint= 2 #equal= 1 intsize= 64 #product= 5 sizeproduct= 13
-        ... min: 2 x2 x3 +3 x4 ~x5 +2 ~x1 x2 +3 ~x1 x2 x3 ~x4 ~x5 ;
-        ... 2 x2 x3 -1 x1 ~x3 = 5 ;
-        ... '''
-        >>> model = read_opb(opb_text)
-        >>> print(model)
-        Model(...)
-    
-    Notes:
-        - Comment lines starting with '*' are ignored.
-        - Only "min:" objectives are supported; "max:" is not recognized.
-    """
-
-    
-    # If opb is a path to a file -> open file
-    if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb):
-        f_open = lzma.open if str(opb).endswith(".xz") else open
-        f = f_open(opb, 'rt')
-    # If opb is a string containing a model -> create a memory-mapped file
-    else:
-        f = StringIO(opb)
-
-    # Look for header on first line
-    line = f.readline()
-    header = HEADER_RE.match(line)
-    if not header: # If not found on first line, look on second (happens when passing multi line string)
-        _line = f.readline()
-        header = HEADER_RE.match(_line)
-        if not header:
-            raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...")
-    nr_vars = int(header.group(2)) + 1
-
-    # Generator without comment lines
-    reader = (l for l in map(str.strip, f) if l and l[0] != '*')
-
-    # CPMpy objects
-    vars = cp.boolvar(shape=nr_vars, name="x")
-    model = cp.Model()
-    
-    # Special case for first line -> might contain objective function
-    first_line = next(reader)
-    if OBJ_TERM_RE.match(first_line):
-        obj_expr = _parse_term(first_line, vars)
-        model.minimize(obj_expr)
-    else: # no objective found, parse as a constraint instead
-        model.add(_parse_constraint(first_line, vars))
-
-    # Start parsing line by line
-    for line in reader:
-        model.add(_parse_constraint(line, vars))
-
-    return model
\ No newline at end of file
+from .parser import read_opb
diff --git a/cpmpy/tools/opb/parser.py b/cpmpy/tools/opb/parser.py
new file mode 100644
index 000000000..846c0874b
--- /dev/null
+++ b/cpmpy/tools/opb/parser.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+##
+## __init__.py
+##
+"""
+OPB parser.
+
+Currently only the restricted OPB PB24 format is supported (without WBO).
+
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    read_opb
+"""
+
+
+import os
+import re
+import sys
+import lzma
+import argparse
+import cpmpy as cp
+from io import StringIO
+from typing import Union
+from functools import reduce
+from operator import mul
+
+# Regular expressions
+HEADER_RE = re.compile(r'(.*)\s*#variable=\s*(\d+)\s*#constraint=\s*(\d+).*')
+TERM_RE = re.compile(r"([+-]?\d+)((?:\s+~?x\d+)+)")
+OBJ_TERM_RE = re.compile(r'^min:')
+IND_TERM_RE = re.compile(r'([>=|<=|=]+)\s+([+-]?\d+)')
+IND_TERM_RE = re.compile(r'(>=|<=|=)\s*([+-]?\d+)')
+
+
+def _parse_term(line, vars):
+    """
+    Parse a line containing OPB terms into a CPMpy expression.
+
+    Supports:
+        - Linear terms (e.g., +2 x1)
+        - Non-linear terms (e.g., -1 x1 x14)
+        - Negated variables using '~' (e.g., ~x5)
+
+    Arguments:
+        line (str):                 A string containing one or more terms.
+        vars (list[cp.boolvar]):    List or array of CPMpy Boolean variables.
+
+    Returns:
+        cp.Expression: A CPMpy expression representing the sum of all parsed terms.
+
+    Example:
+        >>> _parse_term("2 x2 x3 +3 x4 ~x5", vars)
+        sum([2, 3] * [(IV2*IV3), (IV4*~IV5)])
+    """
+
+    terms = []
+    for w, vars_str in TERM_RE.findall(line):
+        factors = []
+
+        for v in vars_str.split():
+            if v.startswith("~x"):
+                idx = int(v[2:]) # remove "~x"
+                factors.append(~vars[idx])
+            else:
+                idx = int(v[1:]) # remove "x"
+                factors.append(vars[idx])
+        
+        term = int(w) * reduce(mul, factors, 1) # create weighted term
+        terms.append(term)
+
+    return cp.sum(terms)
+
+def _parse_constraint(line, vars):
+    """
+    Parse a single OPB constraint line into a CPMpy comparison expression.
+
+    Arguments:
+        line (str):                 A string representing a single OPB constraint.
+        vars (list[cp.boolvar]):    List or array of CPMpy Boolean variables. Will be index to get the variables for the constraint.
+
+    Returns:
+        cp.expressions.core.Comparison: A CPMpy comparison expression representing
+                                        the constraint.
+
+    Example:
+        >>> _parse_constraint("-1 x1 x14 -1 x1 ~x17 >= -1", vars)
+        sum([-1, -1] * [(IV1*IV14), (IV1*~IV17)]) >= -1
+    """
+
+    op, ind_term = IND_TERM_RE.search(line).groups()
+    lhs = _parse_term(line, vars)
+
+    rhs = int(ind_term) if ind_term.lstrip("+-").isdigit() else vars[int(ind_term)]
+
+    return cp.expressions.core.Comparison(
+        name="==" if op == "=" else ">=",
+        left=lhs,
+        right=rhs
+    )
+
+def read_opb(opb: Union[str, os.PathLike]) -> cp.Model:
+    """
+    Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model.
+
+    Based on PyPBLib's example parser: https://hardlog.udl.cat/static/doc/pypblib/html/library/index.html#example-from-opb-to-cnf-file
+
+    Supports:
+        - Linear and non-linear terms (e.g., -1 x1 x14 +2 x2)
+        - Negated variables using '~' (e.g., ~x5)
+        - Minimisation objective
+        - Comparison operators in constraints: '=', '>='
+
+    Arguments:
+        opb (str or os.PathLike): 
+            - A file path to an OPB file (optionally LZMA-compressed with `.xz`)
+            - OR a string containing the OPB content directly
+
+    Returns:
+        cp.Model: The CPMpy model of the OPB instance.
+
+    Example:
+        >>> opb_text = '''
+        ... * #variable= 5 #constraint= 2 #equal= 1 intsize= 64 #product= 5 sizeproduct= 13
+        ... min: 2 x2 x3 +3 x4 ~x5 +2 ~x1 x2 +3 ~x1 x2 x3 ~x4 ~x5 ;
+        ... 2 x2 x3 -1 x1 ~x3 = 5 ;
+        ... '''
+        >>> model = read_opb(opb_text)
+        >>> print(model)
+        Model(...)
+    
+    Notes:
+        - Comment lines starting with '*' are ignored.
+        - Only "min:" objectives are supported; "max:" is not recognized.
+    """
+
+    
+    # If opb is a path to a file -> open file
+    if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb):
+        f_open = lzma.open if str(opb).endswith(".xz") else open
+        f = f_open(opb, 'rt')
+    # If opb is a string containing a model -> create a memory-mapped file
+    else:
+        f = StringIO(opb)
+
+    # Look for header on first line
+    line = f.readline()
+    header = HEADER_RE.match(line)
+    if not header: # If not found on first line, look on second (happens when passing multi line string)
+        _line = f.readline()
+        header = HEADER_RE.match(_line)
+        if not header:
+            raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...")
+    nr_vars = int(header.group(2)) + 1
+
+    # Generator without comment lines
+    reader = (l for l in map(str.strip, f) if l and l[0] != '*')
+
+    # CPMpy objects
+    vars = cp.boolvar(shape=nr_vars, name="x")
+    model = cp.Model()
+    
+    # Special case for first line -> might contain objective function
+    first_line = next(reader)
+    if OBJ_TERM_RE.match(first_line):
+        obj_expr = _parse_term(first_line, vars)
+        model.minimize(obj_expr)
+    else: # no objective found, parse as a constraint instead
+        model.add(_parse_constraint(first_line, vars))
+
+    # Start parsing line by line
+    for line in reader:
+        model.add(_parse_constraint(line, vars))
+
+    return model
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Parse and solve an OPB model using CPMpy")
+    parser.add_argument("model", help="Path to an OPB file (or raw OPB string if --string is given)")
+    parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)")
+    parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw OPB string instead of a file path")
+    parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)")
+    args = parser.parse_args()
+
+    # Build the CPMpy model
+    try:
+        if args.string:
+            model = read_opb(args.model)
+        else:
+            model = read_opb(os.path.expanduser(args.model))
+    except Exception as e:
+        sys.stderr.write(f"Error reading model: {e}\n")
+        sys.exit(1)
+
+    # Solve the model
+    try:
+        if args.solver:
+            result = model.solve(solver=args.solver, time_limit=args.time_limit)
+        else:
+            result = model.solve(time_limit=args.time_limit)
+    except Exception as e:
+        sys.stderr.write(f"Error solving model: {e}\n")
+        sys.exit(1)
+
+    # Print results
+    print("Status:", model.status())
+    if result is not None:
+        if model.has_objective():
+            print("Objective:", model.objective_value())
+    else:
+        print("No solution found.")
+
+if __name__ == "__main__":
+    main()
diff --git a/cpmpy/tools/wcnf/__init__.py b/cpmpy/tools/wcnf/__init__.py
index 5ce83a146..e2db10412 100644
--- a/cpmpy/tools/wcnf/__init__.py
+++ b/cpmpy/tools/wcnf/__init__.py
@@ -7,86 +7,14 @@
 Set of utilities for working with WCNF-formatted CP models.
 
 
-=================
-List of functions
-=================
+==================
+List of submodules
+==================
 
 .. autosummary::
     :nosignatures:
 
-    read_wcnf
+    parser
 """
 
-
-import os
-import lzma
-import cpmpy as cp
-from io import StringIO
-from typing import Union
-
-
-def _get_var(i, vars_dict):
-    """
-    Returns CPMpy boolean decision variable matching to index `i` if exists, else creates a new decision variable.
-
-    Arguments:
-        i: index
-        vars_dict (dict): dictionary to keep track of previously generated decision variables
-    """
-    if i not in vars_dict:
-        vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables
-    return vars_dict[i]
-
-
-def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model:
-    """
-    Parser for WCNF format. Reads in an instance and returns its matching CPMpy model.
-
-    Arguments: 
-        wcnf (str or os.PathLike):
-            - A file path to an WCNF file (optionally LZMA-compressed with `.xz`)
-            - OR a string containing the WCNF content directly
-
-    Returns:
-        cp.Model: The CPMpy model of the WCNF instance.
-    """
-    # If wcnf is a path to a file -> open file
-    if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf):
-        f_open = lzma.open if str(wcnf).endswith(".xz") else open
-        f = f_open(wcnf, "rt")
-    # If wcnf is a string containing a model -> create a memory-mapped file
-    else:
-        f = StringIO(wcnf)
-
-    model = cp.Model()
-    vars = {}
-    soft_terms = []
-
-    for raw in f:
-        line = raw.strip()
-
-        # Empty line or a comment -> skip
-        if not line or line.startswith("c"):
-            continue
-
-        # Hard clause
-        if line[0] == "h":
-            literals = map(int, line[1:].split())
-            clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars)
-                      for i in literals if i != 0]
-            model.add(cp.any(clause))
-
-        # Soft clause (weight first)
-        else:
-            parts = line.split()
-            weight = int(parts[0])
-            literals = map(int, parts[1:])
-            clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars)
-                    for i in literals if i != 0]
-            soft_terms.append(weight * cp.any(clause))
-
-    # Objective = sum of soft clause terms
-    if soft_terms:
-        model.maximize(sum(soft_terms))
-
-    return model
\ No newline at end of file
+from .parser import read_wcnf
diff --git a/cpmpy/tools/wcnf/parser.py b/cpmpy/tools/wcnf/parser.py
new file mode 100644
index 000000000..72cec94c8
--- /dev/null
+++ b/cpmpy/tools/wcnf/parser.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+##
+## __init__.py
+##
+"""
+Parser for the WCNF format.
+
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    read_wcnf
+"""
+
+
+import os
+import sys
+import lzma
+import argparse
+import cpmpy as cp
+from io import StringIO
+from typing import Union
+
+
+def _get_var(i, vars_dict):
+    """
+    Returns CPMpy boolean decision variable matching to index `i` if exists, else creates a new decision variable.
+
+    Arguments:
+        i: index
+        vars_dict (dict): dictionary to keep track of previously generated decision variables
+    """
+    if i not in vars_dict:
+        vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables
+    return vars_dict[i]
+
+
+def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model:
+    """
+    Parser for WCNF format. Reads in an instance and returns its matching CPMpy model.
+
+    Arguments: 
+        wcnf (str or os.PathLike):
+            - A file path to an WCNF file (optionally LZMA-compressed with `.xz`)
+            - OR a string containing the WCNF content directly
+
+    Returns:
+        cp.Model: The CPMpy model of the WCNF instance.
+    """
+    # If wcnf is a path to a file -> open file
+    if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf):
+        f_open = lzma.open if str(wcnf).endswith(".xz") else open
+        f = f_open(wcnf, "rt")
+    # If wcnf is a string containing a model -> create a memory-mapped file
+    else:
+        f = StringIO(wcnf)
+
+    model = cp.Model()
+    vars = {}
+    soft_terms = []
+
+    for raw in f:
+        line = raw.strip()
+
+        # Empty line or a comment -> skip
+        if not line or line.startswith("c"):
+            continue
+
+        # Hard clause
+        if line[0] == "h":
+            literals = map(int, line[1:].split())
+            clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars)
+                      for i in literals if i != 0]
+            model.add(cp.any(clause))
+
+        # Soft clause (weight first)
+        else:
+            parts = line.split()
+            weight = int(parts[0])
+            literals = map(int, parts[1:])
+            clause = [_get_var(i, vars) if i > 0 else ~_get_var(-i, vars)
+                    for i in literals if i != 0]
+            soft_terms.append(weight * cp.any(clause))
+
+    # Objective = sum of soft clause terms
+    if soft_terms:
+        model.maximize(sum(soft_terms))
+
+    return model
+
+def main():
+    parser = argparse.ArgumentParser(description="Parse and solve a WCNF model using CPMpy")
+    parser.add_argument("model", help="Path to a WCNF file (or raw WCNF string if --string is given)")
+    parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)")
+    parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw WCNF string instead of a file path")
+    parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)")
+    args = parser.parse_args()
+
+    # Build the CPMpy model
+    try:
+        if args.string:
+            model = read_wcnf(args.model)
+        else:
+            model = read_wcnf(os.path.expanduser(args.model))
+    except Exception as e:
+        sys.stderr.write(f"Error reading model: {e}\n")
+        sys.exit(1)
+
+    # Solve the model
+    try:
+        if args.solver:
+            result = model.solve(solver=args.solver, time_limit=args.time_limit)
+        else:
+            result = model.solve(time_limit=args.time_limit)
+    except Exception as e:
+        sys.stderr.write(f"Error solving model: {e}\n")
+        sys.exit(1)
+
+    # Print results
+    print("Status:", model.status())
+    if result is not None:
+        if model.has_objective():
+            print("Objective:", model.objective_value())
+    else:
+        print("No solution found.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 450502570a36a8958610b36554dd2ced0f0814e7 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 11:53:31 +0200
Subject: [PATCH 05/46] Add MSE and OPB datasets

---
 cpmpy/tools/datasets/_base.py     |  85 +++++++++++++++++++
 cpmpy/tools/datasets/model/mse.py | 104 +++++++++++++++++++++++
 cpmpy/tools/datasets/model/opb.py | 135 ++++++++++++++++++++++++++++++
 3 files changed, 324 insertions(+)
 create mode 100644 cpmpy/tools/datasets/_base.py
 create mode 100644 cpmpy/tools/datasets/model/mse.py
 create mode 100644 cpmpy/tools/datasets/model/opb.py

diff --git a/cpmpy/tools/datasets/_base.py b/cpmpy/tools/datasets/_base.py
new file mode 100644
index 000000000..3c5338489
--- /dev/null
+++ b/cpmpy/tools/datasets/_base.py
@@ -0,0 +1,85 @@
+"""
+Dataset Base Class
+
+This module defines the abstract `_Dataset` class, which serves as the foundation
+for loading and managing benchmark instance collections in CPMpy-based experiments.  
+It standardizes how datasets are stored, accessed, and optionally transformed.
+"""
+
+from abc import ABC, abstractmethod
+import pathlib
+from typing import Any, Tuple
+
+class _Dataset(ABC):
+    """
+    Abstract base class for PyTorch-style datasets of benchmarking instances.
+
+    The `_Dataset` class provides a standardized interface for downloading and
+    accessing benchmark instances. This class should not be used on its own.
+    """
+
+    def __init__(
+            self, 
+            dataset_dir: str = ".",
+            transform=None, target_transform=None, 
+            download: bool = False,
+            extension:str=".txt",
+            **kwargs
+        ):
+        self.dataset_dir = pathlib.Path(dataset_dir)
+        self.transform = transform
+        self.target_transform = target_transform
+        self.extension = extension
+
+        if not self.dataset_dir.exists():
+            if not download:
+                raise ValueError(f"Dataset not found. Please set download=True to download the dataset.")
+            else:
+                self.download()
+                
+    @abstractmethod
+    def category(self):
+        pass
+
+    @abstractmethod
+    def download(self, *args, **kwargs):
+        pass
+
+    def metadata(self, file):
+        metadata = self.category() | {
+            'name': pathlib.Path(file).stem.replace(self.extension, ''),
+            'path': file,
+        }
+        return metadata
+    
+    def __len__(self) -> int:
+        """Return the total number of instances."""
+        return len(list(self.dataset_dir.glob(f"*{self.extension}")))
+    
+
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+
+        if index < 0 or index >= len(self):
+            raise IndexError("Index out of range")
+
+        # Get all compressed XML files and sort for deterministic behavior
+        files = sorted(list(self.dataset_dir.glob(f"*{self.extension}")))
+        file_path = files[index]
+
+        filename = str(file_path)
+        if self.transform:
+            # does not need to remain a filename...
+            filename = self.transform(filename)
+            
+        # Basic metadata about the instance
+        metadata = self.metadata(file=filename, )
+        if self.target_transform:
+            metadata = self.target_transform(metadata)
+            
+        return filename, metadata
+    
+    
+
+
+
+
diff --git a/cpmpy/tools/datasets/model/mse.py b/cpmpy/tools/datasets/model/mse.py
new file mode 100644
index 000000000..a749d75d0
--- /dev/null
+++ b/cpmpy/tools/datasets/model/mse.py
@@ -0,0 +1,104 @@
+"""
+MaxSAT Evaluation (MSE) Dataset
+
+https://maxsat-evaluations.github.io/
+"""
+
+import zipfile
+import pathlib
+from urllib.request import urlretrieve
+from urllib.error import HTTPError, URLError
+
+from .._base import _Dataset
+
+
+class MSEDataset(_Dataset):  # torch.utils.data.Dataset compatible
+    """
+    MaxSAT Evaluation (MSE) benchmark dataset.
+
+    Provides access to benchmark instances from the MaxSAT Evaluation 
+    competitions. Instances are grouped by `year` and `track` (e.g., 
+    `"exact-unweighted"`, `"exact-weighted"`) and stored as `.wcnf.xz` files. 
+    If the dataset is not available locally, it can be automatically 
+    downloaded and extracted.
+
+    More information on the competition can be found here: https://maxsat-evaluations.github.io/
+    """
+
+    def __init__(
+            self, 
+            root: str = ".", 
+            year: int = 2024, track: str = "exact-unweighted", 
+            transform=None, target_transform=None, 
+            download: bool = False
+        ):
+        """
+        Constructor for a dataset object of the MSE competition.
+
+        Arguments:
+            root (str): Root directory where datasets are stored or will be downloaded to (default="."). 
+            year (int): Competition year of the dataset to use (default=2024).
+            track (str): Track name specifying which subset of the competition instances to load (default="exact-unweighted").
+            transform (callable, optional): Optional transform applied to the instance file path.
+            target_transform (callable, optional): Optional transform applied to the metadata dictionary.
+            download (bool): If True, downloads the dataset if it does not exist locally (default=False).
+
+
+        Raises:
+            ValueError: If the dataset directory does not exist and `download=False`,
+                or if the requested year/track combination is not available.
+        """
+
+        self.root = pathlib.Path(root)
+        self.year = year
+        self.track = track
+
+        dataset_dir = self.root / str(year) / track
+
+        super().__init__(
+            dataset_dir=dataset_dir, 
+            transform=transform, target_transform=target_transform, 
+            download=download, extension=".wcnf.xz"
+        )
+
+
+    def category(self):
+        return {
+            "year": self.year,
+            "track": self.track
+        }
+        
+    
+    def download(self):
+        print(f"Downloading MaxSAT Eval {self.year} {self.track} instances...")
+        
+        zip_name = f"mse{str(self.year)[2:]}-{self.track}.zip"
+        url = f"https://www.cs.helsinki.fi/group/coreo/MSE{self.year}-instances/"
+
+        url_path = url + zip_name
+        zip_path = self.root / zip_name
+        
+        try:
+            urlretrieve(url_path, str(zip_path))
+        except (HTTPError, URLError) as e:
+            raise ValueError(f"No dataset available for year {self.year} and track {self.track}. Error: {str(e)}")
+        
+        # Extract only the specific track folder from the tar
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:                    
+            # Create track folder in root directory, parents=True ensures recursive creation
+            self.dataset_dir.mkdir(parents=True, exist_ok=True)
+            
+            # Extract files
+            for file_info in zip_ref.infolist():
+                # Extract file to family_dir, removing main_folder/track prefix
+                filename = pathlib.Path(file_info.filename).name
+                with zip_ref.open(file_info) as source, open(self.dataset_dir / filename, 'wb') as target:
+                    target.write(source.read())
+        # Clean up the zip file
+        zip_path.unlink()
+
+
+if __name__ == "__main__":
+    dataset = MSEDataset(year=2024, track="exact-weighted", download=True)
+    print("Dataset size:", len(dataset))
+    print("Instance 0:", dataset[0])
diff --git a/cpmpy/tools/datasets/model/opb.py b/cpmpy/tools/datasets/model/opb.py
new file mode 100644
index 000000000..d3602954c
--- /dev/null
+++ b/cpmpy/tools/datasets/model/opb.py
@@ -0,0 +1,135 @@
+"""
+Pseudo Boolean Competition (PB) Dataset
+
+https://www.cril.univ-artois.fr/PB25/
+"""
+
+import os
+import pathlib
+from urllib.request import urlretrieve
+from urllib.error import HTTPError, URLError
+import tarfile
+
+from .._base import _Dataset
+
+
+class OPBDataset(_Dataset): 
+    """
+    Pseudo Boolean Competition (PB) benchmark dataset.
+
+    Provides access to benchmark instances from the Pseudo Boolean 
+    competitions. Instances are grouped by `year` and `track` (e.g., 
+    `"OPT-LIN"`, `"DEC-LIN"`) and stored as `.opb.xz` files. 
+    If the dataset is not available locally, it can be automatically 
+    downloaded and extracted.
+
+    More information on the competition can be found here: https://www.cril.univ-artois.fr/PB25/
+    """
+
+    def __init__(
+            self, 
+            root: str = ".", 
+            year: int = 2024, track: str = "OPT-LIN", 
+            transform=None, target_transform=None, 
+            download: bool = False
+        ):
+        """
+        Constructor for a dataset object of the PB competition.
+
+        Arguments:
+            root (str): Root directory where datasets are stored or will be downloaded to (default="."). 
+            year (int): Competition year of the dataset to use (default=2024).
+            track (str): Track name specifying which subset of the competition instances to load (default="OPT-LIN").
+            transform (callable, optional): Optional transform applied to the instance file path.
+            target_transform (callable, optional): Optional transform applied to the metadata dictionary.
+            download (bool): If True, downloads the dataset if it does not exist locally (default=False).
+
+
+        Raises:
+            ValueError: If the dataset directory does not exist and `download=False`,
+                or if the requested year/track combination is not available.
+        """
+
+        self.root = pathlib.Path(root)
+        self.year = year
+        self.track = track
+
+        dataset_dir = self.root / str(year) / track
+
+        super().__init__(
+            dataset_dir=dataset_dir, 
+            transform=transform, target_transform=target_transform, 
+            download=download, extension=".opb.xz"
+        )
+
+    def category(self):
+        return {
+            "year": self.year,
+            "track": self.track
+        }
+
+    def metadata(self, file):
+        return super().metadata(file) | {'author': str(file).split(os.sep)[-1].split("_")[0],}
+                
+
+    def download(self):
+        # TODO: add option to filter on competition instances
+        print(f"Downloading OPB {self.year} {self.track} instances...")
+        url = f"https://www.cril.univ-artois.fr/PB24/benchs/"
+        year_suffix = str(self.year)[2:]  # Drop the starting '20'
+        url_path = url + f"normalized-PB{year_suffix}.tar"
+        tar_path = self.root / f"normalized-extraPB{year_suffix}.tar"
+        
+        try:
+            urlretrieve(url_path, str(tar_path))
+        except (HTTPError, URLError) as e:
+            raise ValueError(f"No dataset available for year {self.year}. Error: {str(e)}")
+        
+        # Extract only the specific track folder from the tar
+        with tarfile.open(tar_path, "r:*") as tar_ref:  # r:* handles .tar, .tar.gz, .tar.bz2, etc.
+            # Get the main folder name
+            main_folder = None
+            for name in tar_ref.getnames():
+                if "/" in name:
+                    main_folder = name.split("/")[0]
+                    break
+
+            if main_folder is None:
+                raise ValueError(f"Could not find main folder in tar file")
+
+            # Extract only files from the specified track
+            # Get all unique track names from tar
+            tracks = set()
+            for member in tar_ref.getmembers():
+                parts = member.name.split("/")
+                if len(parts) > 2 and parts[0] == main_folder:
+                    tracks.add(parts[1])
+
+            # Check if requested track exists
+            if self.track not in tracks:
+                raise ValueError(f"Track '{self.track}' not found in dataset. Available tracks: {sorted(tracks)}")
+
+            # Create track folder in root directory
+            self.dataset_dir.mkdir(parents=True, exist_ok=True)
+
+            # Extract files for the specified track
+            prefix = f"{main_folder}/{self.track}/"
+            for member in tar_ref.getmembers():
+                if member.name.startswith(prefix) and member.isfile():
+                    # Path relative to main_folder/track
+                    relative_path = member.name[len(prefix):]
+
+                    # Flatten: replace "/" with "_" to encode subfolders (some instances have clashing names)
+                    flat_name = relative_path.replace("/", "_")
+                    target_path = self.dataset_dir / flat_name
+
+                    with tar_ref.extractfile(member) as source, open(target_path, "wb") as target:
+                        target.write(source.read())
+
+        # Clean up the tar file
+        tar_path.unlink()
+
+if __name__ == "__main__":
+    dataset = OPBDataset(year=2024, track="DEC-LIN", download=True)
+    print("Dataset size:", len(dataset))
+    print("Instance 0:", dataset[0])

From 2b26034cc3e8ecadde39b1b16ba8884b59155673 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 11:55:12 +0200
Subject: [PATCH 06/46] Rename datasets to dataset

---
 cpmpy/tools/{datasets => dataset}/_base.py     | 0
 cpmpy/tools/{datasets => dataset}/model/mse.py | 0
 cpmpy/tools/{datasets => dataset}/model/opb.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename cpmpy/tools/{datasets => dataset}/_base.py (100%)
 rename cpmpy/tools/{datasets => dataset}/model/mse.py (100%)
 rename cpmpy/tools/{datasets => dataset}/model/opb.py (100%)

diff --git a/cpmpy/tools/datasets/_base.py b/cpmpy/tools/dataset/_base.py
similarity index 100%
rename from cpmpy/tools/datasets/_base.py
rename to cpmpy/tools/dataset/_base.py
diff --git a/cpmpy/tools/datasets/model/mse.py b/cpmpy/tools/dataset/model/mse.py
similarity index 100%
rename from cpmpy/tools/datasets/model/mse.py
rename to cpmpy/tools/dataset/model/mse.py
diff --git a/cpmpy/tools/datasets/model/opb.py b/cpmpy/tools/dataset/model/opb.py
similarity index 100%
rename from cpmpy/tools/datasets/model/opb.py
rename to cpmpy/tools/dataset/model/opb.py

From e238c2934c3f5127f3c0e6ab8766a25814941fce Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 13:45:48 +0200
Subject: [PATCH 07/46] Dataset specific 'open'

---
 cpmpy/tools/dataset/_base.py     | 4 ++++
 cpmpy/tools/dataset/model/mse.py | 5 ++++-
 cpmpy/tools/dataset/model/opb.py | 4 ++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py
index 3c5338489..ce2206110 100644
--- a/cpmpy/tools/dataset/_base.py
+++ b/cpmpy/tools/dataset/_base.py
@@ -45,6 +45,10 @@ def category(self):
     def download(self, *args, **kwargs):
         pass
 
+    @abstractmethod
+    def open(self, instance):
+        pass
+
     def metadata(self, file):
         metadata = self.category() | {
             'name': pathlib.Path(file).stem.replace(self.extension, ''),
diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py
index a749d75d0..84e8c5dfa 100644
--- a/cpmpy/tools/dataset/model/mse.py
+++ b/cpmpy/tools/dataset/model/mse.py
@@ -4,6 +4,8 @@
 https://maxsat-evaluations.github.io/
 """
 
+import lzma
+import os
 import zipfile
 import pathlib
 from urllib.request import urlretrieve
@@ -11,7 +13,6 @@
 
 from .._base import _Dataset
 
-
 class MSEDataset(_Dataset):  # torch.utils.data.Dataset compatible
     """
     MaxSAT Evaluation (MSE) benchmark dataset.
@@ -97,6 +98,8 @@ def download(self):
         # Clean up the zip file
         zip_path.unlink()
 
+    def open(self, instance: os.PathLike) -> callable:
+        return lzma.open if str(instance).endswith(".xz") else open
 
 if __name__ == "__main__":
     dataset = MSEDataset(year=2024, track="exact-weighted", download=True)
diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py
index d3602954c..bc051d784 100644
--- a/cpmpy/tools/dataset/model/opb.py
+++ b/cpmpy/tools/dataset/model/opb.py
@@ -4,6 +4,7 @@
 https://www.cril.univ-artois.fr/PB25/
 """
 
+import lzma
 import os
 import pathlib
 from urllib.request import urlretrieve
@@ -129,6 +130,9 @@ def download(self):
         # Clean up the tar file
         tar_path.unlink()
 
+    def open(self, instance: os.PathLike) -> callable:
+        return lzma.open if str(instance).endswith(".xz") else open
+
 if __name__ == "__main__":
     dataset = OPBDataset(year=2024, track="DEC-LIN", download=True)
     print("Dataset size:", len(dataset))

From 669875acbcfc5fa43a1ab139c92fcd9b4c5badf1 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 13:46:22 +0200
Subject: [PATCH 08/46] Dataset module init file

---
 cpmpy/tools/dataset/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 cpmpy/tools/dataset/__init__.py

diff --git a/cpmpy/tools/dataset/__init__.py b/cpmpy/tools/dataset/__init__.py
new file mode 100644
index 000000000..e69de29bb

From c1bd2fef45bbb4a39413794fbfcfbd551fe54db2 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 13:48:36 +0200
Subject: [PATCH 09/46] Add benchmark runners

---
 cpmpy/tools/benchmark/__init__.py |   0
 cpmpy/tools/benchmark/_base.py    | 496 ++++++++++++++++++++++++++++++
 cpmpy/tools/benchmark/mse.py      | 205 ++++++++++++
 cpmpy/tools/benchmark/opb.py      | 197 ++++++++++++
 cpmpy/tools/benchmark/runner.py   | 287 +++++++++++++++++
 5 files changed, 1185 insertions(+)
 create mode 100644 cpmpy/tools/benchmark/__init__.py
 create mode 100644 cpmpy/tools/benchmark/_base.py
 create mode 100644 cpmpy/tools/benchmark/mse.py
 create mode 100644 cpmpy/tools/benchmark/opb.py
 create mode 100644 cpmpy/tools/benchmark/runner.py

diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
new file mode 100644
index 000000000..85119a822
--- /dev/null
+++ b/cpmpy/tools/benchmark/_base.py
@@ -0,0 +1,496 @@
+"""
+Benchmark framework for CPMpy models.
+
+This module provides the `Benchmark` base class, designed to run constraint programming 
+benchmarks in a structured fashion. It allows reading instances, posting them to different 
+back-end solvers, and handling solver execution with limits on time and memory. 
+It also provides hooks for customizing logging, intermediate solution printing, and 
+error handling. Although this base class can be used on its own (example below),
+users will most likely want to have a look at one of its subclasses for running a specific
+benchmark dataset, e.g. xcsp3, opb, mse, ...
+
+
+Usage Example
+-------------
+>>> from myparser import read_instance    # your custom model parser (or one included in CPMpy)
+>>> bm = Benchmark(reader=read_instance)
+>>> bm.run(
+...     instance="example.extension",     # your benchmark instance (e.g. coming from a CPMpy model dataset)
+...     solver="ortools",
+...     time_limit=30,
+...     mem_limit=1024,
+...     verbose=True
+... )
+Status: OPTIMAL
+Objective: 42
+Solution: ...
+
+"""
+
+
+from abc import ABC
+
+import time
+import random
+import psutil
+import warnings
+from typing import Optional
+
+import cpmpy as cp
+from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb
+
+
+class Benchmark(ABC):
+    """
+    Abstract base class for running CPMpy benchmarks.
+
+    The `Benchmark` class provides a standardized framework for reading instances,
+    posting models to solvers, and managing solver runs with resource limits.
+    It is designed to be extended or customized for specific benchmarking needs.    
+    """
+
+    def __init__(self, reader:callable):
+        """
+        Arguments:
+            reader (callable): A parser from a model format to a CPMPy model.
+        """
+        self.reader = reader
+        
+    def read_instance(self, instance) -> cp.Model:
+        """
+        Parse a model instance to a CPMpy model.
+
+        Arguments:
+            instance (str or os.PathLike): The model instance to parse into a CPMpy model.
+        """
+        return self.reader(instance)
+    
+    """
+    Callback methods which can be overwritten to make a custom benchmark run.
+    """
+
+    def print_comment(self, comment:str):
+        print(comment)
+
+    def print_intermediate(self, objective:int):
+        print("Intermediate solution:", objective)
+
+    def print_result(self, s):
+        self.print_comment(s.status())
+
+    def handle_memory_error(self, mem_limit):
+        self.print_comment(f"MemoryError raised. Reached limit of {mem_limit} MiB")
+    
+    def handle_not_implemented(self, e):
+        self.print_comment(str(e))
+
+    def handle_exception(self, e):
+        self.print_comment(f"An {type(e)} got raised: {e}")
+        import traceback
+        self.print_comment("Stack trace:")
+        for line in traceback.format_exc().split('\n'):
+            if line.strip():
+                self.print_comment(line)
+
+    """
+    Solver arguments (can also be tweaked for a specific benchmark).
+    """
+
+    def ortools_arguments(
+            self,
+            model: cp.Model,
+            cores: Optional[int] = None,
+            seed: Optional[int] = None,
+            intermediate: bool = False,
+            **kwargs
+        ):
+        # https://github.com/google/or-tools/blob/stable/ortools/sat/sat_parameters.proto
+        res = dict()
+
+        # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688
+        res |= {
+            "interleave_search": True,
+            "use_rins_lns": False,
+        }
+        if not model.has_objective():
+            res |= { "num_violation_ls": 1 }
+
+        if cores is not None:
+            res |= { "num_search_workers": cores }
+        if seed is not None: 
+            res |= { "random_seed": seed }
+
+        if intermediate and model.has_objective():
+            # Define custom ORT solution callback, then register it
+            _self = self
+            from ortools.sat.python import cp_model as ort
+            class OrtSolutionCallback(ort.CpSolverSolutionCallback):
+                """
+                    For intermediate objective printing.
+                """
+
+                def __init__(self):
+                    super().__init__()
+                    self.__start_time = time.time()
+                    self.__solution_count = 1
+
+                def on_solution_callback(self):
+                    """Called on each new solution."""
+                    
+                    current_time = time.time()
+                    obj = int(self.ObjectiveValue())
+                    _self.print_comment('Solution %i, time = %0.2fs' % 
+                                (self.__solution_count, current_time - self.__start_time))
+                    _self.print_intermediate(obj)
+                    self.__solution_count += 1
+                
+
+                def solution_count(self):
+                    """Returns the number of solutions found."""
+                    return self.__solution_count
+                
+            # Register the callback
+            res |= { "solution_callback": OrtSolutionCallback() }
+
+        def internal_options(solver: "CPM_ortools"):
+            # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688
+            solver.ort_solver.parameters.subsolvers.extend(["default_lp", "max_lp", "quick_restart"])
+            if not model.has_objective():
+                solver.ort_solver.parameters.subsolvers.append("core_or_no_lp")
+            if len(solver.ort_model.proto.search_strategy) != 0:
+                solver.ort_solver.parameters.subsolvers.append("fixed")
+
+        return res, internal_options
+    
+    def exact_arguments(
+            self,
+            seed: Optional[int] = None, 
+            **kwargs
+        ):
+        # Documentation: https://gitlab.com/JoD/exact/-/blob/main/src/Options.hpp?ref_type=heads
+        res = dict()
+        if seed is not None: 
+            res |= { "seed": seed }
+
+        return res, None
+
+    def choco_arguments(): 
+        # Documentation: https://github.com/chocoteam/pychoco/blob/master/pychoco/solver.py
+        return {}, None
+
+    def z3_arguments(
+            self,
+            model: cp.Model,
+            cores: int = 1,
+            seed: Optional[int] = None,
+            mem_limit: Optional[int] = None,
+            **kwargs
+        ):
+        # Documentation: https://microsoft.github.io/z3guide/programming/Parameters/
+        # -> is outdated, just let it crash and z3 will report the available options
+
+        res = dict()
+        
+        if model.has_objective():
+            # Opt does not seem to support setting random seed or max memory
+            pass
+        else:
+            # Sat parameters
+            if cores is not None:
+                res |= { "threads": cores }  # TODO what with hyperthreadding, when more threads than cores
+            if seed is not None: 
+                res |= { "random_seed": seed }
+            if mem_limit is not None:
+                res |= { "max_memory": _bytes_as_mb(mem_limit) }
+
+        return res, None
+
+    def minizinc_arguments(
+            self,
+            solver: str,
+            cores: Optional[int] = None,
+            seed: Optional[int] = None,
+            **kwargs
+        ):
+        # Documentation: https://minizinc-python.readthedocs.io/en/latest/api.html#minizinc.instance.Instance.solve
+        res = dict()
+        if cores is not None:
+            res |= { "processes": cores }
+        if seed is not None: 
+            res |= { "random_seed": seed }
+
+        #if solver.endswith("gecode"):
+            # Documentation: https://www.minizinc.org/doc-2.4.3/en/lib-gecode.html
+        #elif solver.endswith("chuffed"):
+            # Documentation: 
+            # - https://www.minizinc.org/doc-2.5.5/en/lib-chuffed.html
+            # - https://github.com/chuffed/chuffed/blob/develop/chuffed/core/options.h
+        
+        return res, None
+
+    def gurobi_arguments(
+            self,
+            model: cp.Model,
+            cores: Optional[int] = None,
+            seed: Optional[int] = None,
+            mem_limit: Optional[int] = None,
+            intermediate: bool = False,
+            **kwargs
+        ):
+        # Documentation: https://www.gurobi.com/documentation/9.5/refman/parameters.html#sec:Parameters
+        res = dict()
+        if cores is not None:
+            res |= { "Threads": cores }
+        if seed is not None:
+            res |= { "Seed": seed }
+        if mem_limit is not None:
+            res |= { "MemLimit": _bytes_as_gb(mem_limit) }
+
+        if intermediate and model.has_objective():
+
+            _self = self
+
+            class GurobiSolutionCallback:
+                def __init__(self, model:cp.Model):
+                    self.__start_time = time.time()
+                    self.__solution_count = 0
+                    self.model = model
+
+                def callback(self, *args, **kwargs):
+                    current_time = time.time()
+                    model, state = args
+
+                    # Callback codes: https://www.gurobi.com/documentation/current/refman/cb_codes.html#sec:CallbackCodes
+                    
+                    from gurobipy import GRB
+                    # if state == GRB.Callback.MESSAGE: # verbose logging
+                    #     print_comment("log message: " + str(model.cbGet(GRB.Callback.MSG_STRING)))
+                    if state == GRB.Callback.MIP: # callback from the MIP solver
+                        if model.cbGet(GRB.Callback.MIP_SOLCNT) > self.__solution_count: # do we have a new solution?
+
+                            obj = int(model.cbGet(GRB.Callback.MIP_OBJBST))
+                            _self.print_comment('Solution %i, time = %0.2fs' % 
+                                        (self.__solution_count, current_time - self.__start_time))
+                            _self.print_intermediate(obj)
+                            self.__solution_count = model.cbGet(GRB.Callback.MIP_SOLCNT)
+
+            res |= { "solution_callback": GurobiSolutionCallback(model).callback }
+
+        return res, None
+
+    def cpo_arguments(
+            self,
+            model: cp.Model,
+            cores: Optional[int] = None,
+            seed: Optional[int] = None,
+            intermediate: bool = False,
+            **kwargs
+        ):
+        # Documentation: https://ibmdecisionoptimization.github.io/docplex-doc/cp/docplex.cp.parameters.py.html#docplex.cp.parameters.CpoParameters
+        res = dict()
+        if cores is not None:
+            res |= { "Workers": cores }
+        if seed is not None:
+            res |= { "RandomSeed": seed }
+
+        if intermediate and model.has_objective():
+            from docplex.cp.solver.solver_listener import CpoSolverListener
+            _self = self
+            class CpoSolutionCallback(CpoSolverListener):
+
+                def __init__(self):
+                    super().__init__()
+                    self.__start_time = time.time()
+                    self.__solution_count = 1
+
+                def result_found(self, solver, sres):
+                    current_time = time.time()
+                    obj = sres.get_objective_value()
+                    if obj is not None:
+                        _self.print_comment('Solution %i, time = %0.2fs' % 
+                                    (self.__solution_count, current_time - self.__start_time))
+                        _self.print_intermediate(obj)
+                        self.__solution_count += 1
+
+                def solution_count(self):
+                    """Returns the number of solutions found."""
+                    return self.__solution_count
+
+            # Register the callback
+            res |= { "solution_callback": CpoSolutionCallback }
+
+        return res, None
+    
+
+    """
+    Methods which can, bit most likely shouldn't, be overwritten.
+    """
+    
+    def set_memory_limit(self, mem_limit, verbose=False):
+        set_memory_limit(mem_limit, verbose=verbose)
+
+    def set_time_limit(self, time_limit, verbose=False):
+        p = psutil.Process()
+        if time_limit is not None:
+            set_time_limit(int(time_limit - _wall_time(p) + time.process_time()), verbose=verbose)
+        else:
+            set_time_limit(None)
+
+    def post_model(self, model, solver, solver_args):
+        """
+        Post the model to the selected backend solver.
+        """
+        if solver == "exact": # Exact2 takes its options at creation time
+            s = cp.SolverLookup.get(solver, model, **solver_args)
+            solver_args = dict()  # no more solver args needed
+        else:
+            s = cp.SolverLookup.get(solver, model)
+        return s
+
+    
+    """
+    Internal workings
+    """
+
+    def solver_arguments(
+            self,
+            solver: str, 
+            model: cp.Model, 
+            seed: Optional[int] = None,
+            intermediate: bool = False,
+            cores: int = 1,
+            mem_limit: Optional[int] = None,
+            **kwargs
+        ):
+        opt = model.has_objective()
+        sat = not opt
+
+        if solver == "ortools":
+            return self.ortools_arguments(model, cores=cores, seed=seed, intermediate=intermediate, **kwargs)
+        elif solver == "exact":
+            return self.exact_arguments(seed=seed, **kwargs)
+        elif solver == "choco":
+            return self.choco_arguments()
+        elif solver == "z3":
+            return self.z3_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, **kwargs)
+        elif solver.startswith("minizinc"):  # also can have a subsolver
+            return self.minizinc_arguments(solver, cores=cores, seed=seed, **kwargs)
+        elif solver == "gurobi":
+            return self.gurobi_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, intermediate=intermediate, opt=opt, **kwargs)
+        elif solver == "cpo":
+            return self.cpo_arguments(model=model, cores=cores, seed=seed, intermediate=intermediate, **kwargs)
+        else:
+            self.print_comment(f"setting parameters of {solver} is not (yet) supported")
+            return dict()
+
+    def run(
+        self,
+        instance:str,                           # path to the instance to run
+        open:Optional[callable] = None,         # how to 'open' the instance file
+        seed: Optional[int] = None,             # random seed
+        time_limit: Optional[int] = None,       # time limit for this single instance
+        mem_limit: Optional[int] = None,        # MiB: 1024 * 1024 bytes
+        cores: int = 1,                         
+        solver: str = None,                     # which backend solver to use
+        time_buffer: int = 0,               
+        intermediate: bool = False,             
+        verbose: bool = False,
+        **kwargs,     
+    ):
+        
+        if not verbose:
+            warnings.filterwarnings("ignore")
+            
+        try:
+
+            # --------------------------- Global Configuration --------------------------- #
+
+            # Get the current process
+            p = psutil.Process()
+
+            # pychoco currently does not support setting the mem_limit
+            if solver == "choco" and mem_limit is not None:
+                warnings.warn("'mem_limit' is currently not supported with choco, issues with GraalVM")
+                mem_limit = None
+
+            # Set random seed (if provided)
+            if seed is not None:
+                random.seed(seed)
+
+            # Set memory limit (if provided)
+            if mem_limit is not None:
+                self.set_memory_limit(mem_limit, verbose=verbose)
+
+            # Set time limit (if provided)
+            if time_limit is not None:
+                self.set_time_limit(time_limit, verbose=verbose) # set remaining process time != wall time
+    
+            # ------------------------------ Parse instance ------------------------------ #
+
+            time_parse = time.time()
+            model = self.read_instance(instance, open=open)
+            time_parse = time.time() - time_parse
+            if verbose: self.print_comment(f"took {time_parse:.4f} seconds to parse model [{instance}]")
+
+            if time_limit and time_limit < _wall_time(p):
+                raise TimeoutError("Time's up after parse")
+            
+            # ------------------------ Post CPMpy model to solver ------------------------ #
+
+            solver_args, internal_options = self.solver_arguments(solver, model=model, seed=seed,
+                                        intermediate=intermediate,
+                                        cores=cores, mem_limit=_mib_as_bytes(mem_limit) if mem_limit is not None else None,
+                                        **kwargs)
+
+            # Post model to solver
+            time_post = time.time()
+            s = self.post_model(model, solver, solver_args)
+            time_post = time.time() - time_post
+            if verbose: self.print_comment(f"took {time_post:.4f} seconds to post model to {solver}")
+
+            if time_limit and time_limit < _wall_time(p):
+                raise TimeoutError("Time's up after post")
+            
+            # ------------------------------- Solve model ------------------------------- #
+            
+            if time_limit:
+                # give solver only the remaining time
+                time_limit = time_limit - _wall_time(p) - time_buffer
+                # disable signal-based time limit and let the solver handle it (solvers don't play well with difference between cpu and wall time)
+                self.set_time_limit(None)
+                
+                if verbose: self.print_comment(f"{time_limit}s left to solve")
+            
+            time_solve = time.time()
+            try:
+                if internal_options is not None:
+                    internal_options(s) # Set more internal solver options (need access to native solver object)
+                is_sat = s.solve(time_limit=time_limit, **solver_args)
+            except RuntimeError as e:
+                if "Program interrupted by user." in str(e): # Special handling for Exact
+                    raise TimeoutError("Exact interrupted due to timeout")
+                else:
+                    raise e
+
+            time_solve = time.time() - time_solve
+            if verbose: self.print_comment(f"took {time_solve:.4f} seconds to solve")
+
+            # ------------------------------- Print result ------------------------------- #
+
+            self.print_result(s)
+
+            # ------------------------------------- - ------------------------------------ #
+
+            
+        except MemoryError as e:
+            self.handle_memory_error(mem_limit)
+            raise e
+        except NotImplementedError as e:
+            self.handle_not_implemented(e)
+            raise e
+        except Exception as e:
+            self.handle_exception(e)
+            raise e
+        
+    
+    
\ No newline at end of file
diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py
new file mode 100644
index 000000000..08084b645
--- /dev/null
+++ b/cpmpy/tools/benchmark/mse.py
@@ -0,0 +1,205 @@
+"""
+MSE competition as a CPMpy benchmark
+
+This module provides a benchmarking framework for running CPMpy on MaxSAT Evaluation (MSE) 
+competition instances encoded in WCNF (Weighted CNF) format. It extends the generic 
+`Benchmark` base class with MSE-specific logging and result reporting in DIMACS-like format.
+
+Command-line Interface
+----------------------
+This script can be run directly to benchmark solvers on MSE datasets.
+
+Usage:
+    python mse.py --year 2024 --track exact-weighted --solver ortools
+
+Arguments:
+    --year          Competition year (e.g., 2024).
+    --track         Track type (e.g., exact-weighted, exact-unweighted).
+    --solver        Solver name (e.g., ortools, exact, choco, ...).
+    --workers       Number of parallel workers to use.
+    --time-limit    Time limit in seconds per instance.
+    --mem-limit     Memory limit in MB per instance.
+    --cores         Number of cores to assign to a single instance.
+    --output-dir    Output directory for CSV files.
+    --verbose       Show solver output if set.
+    --intermediate  Report intermediate solutions if supported.
+
+===============
+List of classes
+===============
+
+.. autosummary::
+    :nosignatures:
+
+    MSEBenchmark
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    solution_mse
+"""
+
+import argparse
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+import warnings
+
+from cpmpy.tools.benchmark.runner import benchmark_runner
+from _base import Benchmark
+
+from cpmpy.tools.wcnf import read_wcnf
+from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
+
+
+class ExitStatus(Enum):
+    unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint)
+    sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality
+    optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found
+    unsat:str = "UNSATISFIABLE" # instance is unsatisfiable
+    unknown:str = "UNKNOWN" # any other case
+
+def solution_mse(model):
+    """
+    Convert a CPMpy model solution into the MSE solution string format.
+
+    Arguments:
+        model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution in MSE format.
+
+    Returns:
+        str: MSE-formatted solution string.
+    """
+    variables = [var for var in model.user_vars if var.name[:2] == "BV"] # dirty workaround for all missed aux vars in user vars
+    variables = sorted(variables, key=lambda v: int("".join(filter(str.isdigit, v.name))))
+    return " ".join([str(1 if var.value() else 0) for var in variables])
+
+class MSEBenchmark(Benchmark):
+
+    """
+    Benchmark runner for MSE (MaxSAT Evaluation) competition instances.
+
+    This class extends `Benchmark` to implement MSE-specific solution printing
+    in DIMACS-like output format (`c`, `s`, `v`, `o` lines). It uses CPMpy's `read_wcnf`
+    to parse WCNF (Weighted CNF) instances and runs them on a selected solver supported 
+    by CPMpy.
+    """
+
+    def __init__(self):
+        super().__init__(reader=read_wcnf)
+    
+    def print_comment(self, comment:str):
+        print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
+
+    def print_status(self, status: ExitStatus) -> None:
+        print('s' + chr(32) + status.value, end="\n", flush=True)
+
+    def print_value(self, value: str) -> None:
+        value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:]
+        print('v' + chr(32) + value, end="\n", flush=True)
+
+    def print_objective(self, objective: int) -> None:
+        print('o' + chr(32) + str(objective), end="\n", flush=True)
+
+    def print_result(self, s):
+        if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_value(solution_mse(s))
+            self.print_status(ExitStatus.optimal)
+        elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_value(solution_mse(s))
+            self.print_status(ExitStatus.sat)
+        elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
+            self.print_status(ExitStatus.unsat)
+        else:
+            self.print_comment("Solver did not find any solution within the time/memory limit")
+            self.print_status(ExitStatus.unknown)
+
+    def handle_memory_error(self, mem_limit):
+        super().handle_memory_error(mem_limit)
+        self.print_status(ExitStatus.unknown)
+
+    def handle_not_implemented(self, e):
+        super().handle_not_implemented(e)
+        self.print_status(ExitStatus.unsupported)
+
+    def handle_exception(self, e):
+        super().handle_exception(e)
+        self.print_status(ExitStatus.unknown)
+
+    def parse_output_line(self, line, result):
+        if line.startswith('s '):
+            result['status'] = line[2:].strip()
+        elif line.startswith('v '):
+            # only record first line, contains 'type' and 'cost'
+            solution = line.split("\n")[0][2:].strip()
+            if solution not in result:
+                result['solution'] = solution
+            else:
+                result['solution'] = result['solution'] + ' ' + str(solution)
+        elif line.startswith('o '):
+            obj = int(line[2:].strip())
+            if result['intermediate'] is None:
+                result['intermediate'] = []
+            result['intermediate'] += [(sol_time, obj)]
+            result['objective_value'] = obj
+            obj = None
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            sol_time = float(parts[-1].replace('s', '').rstrip())
+        elif line.startswith('c took '):
+            # Parse timing information
+            parts = line.split(' seconds to ')
+            if len(parts) == 2:
+                time_val = float(parts[0].replace('c took ', ''))
+                action = parts[1].strip()
+                if action.startswith('parse'):
+                    result['time_parse'] = time_val
+                elif action.startswith('convert'):
+                    result['time_model'] = time_val
+                elif action.startswith('post'):
+                    result['time_post'] = time_val
+                elif action.startswith('solve'):
+                    result['time_solve'] = time_val
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Benchmark solvers on MSE instances')
+    parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2024)')
+    parser.add_argument('--track', type=str, required=True, help='Track type (e.g., exact-weighted, exact-unweighted)')
+    parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)')
+    parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers')
+    parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance')
+    parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance')
+    parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance')
+    parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files')
+    parser.add_argument('--verbose', action='store_true', help='Show solver output')
+    parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions')
+    # parser.add_argument('--checker-path', type=str, default=None,
+    #                 help='Path to the XCSP3 solution checker JAR file')
+    args = parser.parse_args()
+
+    if not args.verbose:
+        warnings.filterwarnings("ignore")
+    
+    # Load benchmark instances (as a dataset)
+    from cpmpy.tools.dataset.model.mse import MSEDataset
+    dataset = MSEDataset(year=args.year, track=args.track, download=True)
+    
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Get current timestamp in a filename-safe format
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    # Define output file path with timestamp
+    output_file = str(output_dir / "mse" / f"mse_{args.year}_{args.track}_{args.solver}_{timestamp}.csv")
+
+    # Run the benchmark
+    instance_runner = MSEBenchmark()
+    output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args))
+    print(f"Results added to {output_file}")
diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py
new file mode 100644
index 000000000..3fc5202cd
--- /dev/null
+++ b/cpmpy/tools/benchmark/opb.py
@@ -0,0 +1,197 @@
+"""
+PB competition as a CPMpy benchmark
+
+This module provides a benchmarking framework for running CPMpy on PB 
+competition instances. It extends the generic `Benchmark` base class with
+PB Competition-specific logging and result reporting.
+
+Command-line Interface
+----------------------
+This script can be run directly to benchmark solvers on MSE datasets.
+
+Usage:
+    python opb.py --year 2024 --track OPT-LIN --solver ortools
+
+Arguments:
+    --year          Competition year (e.g., 2024).
+    --track         Track type (e.g., OPT_LIN, DEC_LIN).
+    --solver        Solver name (e.g., ortools, exact, choco, ...).
+    --workers       Number of parallel workers to use.
+    --time-limit    Time limit in seconds per instance.
+    --mem-limit     Memory limit in MB per instance.
+    --cores         Number of cores to assign to a single instance.
+    --output-dir    Output directory for CSV files.
+    --verbose       Show solver output if set.
+    --intermediate  Report intermediate solutions if supported.
+
+===============
+List of classes
+===============
+
+.. autosummary::
+    :nosignatures:
+
+    OPBBenchmark
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    solution_opb
+"""
+
+import warnings
+import argparse
+from enum import Enum
+from pathlib import Path
+from datetime import datetime
+
+# CPMpy
+from cpmpy.tools.benchmark.runner import benchmark_runner
+from cpmpy.tools.benchmark._base import Benchmark
+from cpmpy.tools.opb import read_opb
+from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
+
+
+class ExitStatus(Enum):
+    unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint)
+    sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality
+    optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found
+    unsat:str = "UNSATISFIABLE" # instance is unsatisfiable
+    unknown:str = "UNKNOWN" # any other case
+
+def solution_opb(model):
+    """
+        Formats a solution according to the PB24 specification.
+
+        Arguments:
+            model: CPMpy model for which to format its solution (should be solved first)
+
+        Returns:
+            Formatted model solution according to PB24 specification.
+    """
+    variables = [var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]] # dirty workaround for all missed aux vars in user vars TODO
+    return " ".join([var.name.replace("[","").replace("]","") if var.value() else "-"+var.name.replace("[","").replace("]","") for var in variables])
+
+class OPBBenchmark(Benchmark):
+    """
+    The PB competition as a CPMpy benchmark.
+    """
+
+    def __init__(self):
+        super().__init__(reader=read_opb)
+    
+    def print_comment(self, comment:str):
+        print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
+
+    def print_status(self, status: ExitStatus) -> None:
+        print('s' + chr(32) + status.value, end="\n", flush=True)
+
+    def print_value(self, value: str) -> None:
+        value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:]
+        print('v' + chr(32) + value, end="\n", flush=True)
+
+    def print_objective(self, objective: int) -> None:
+        print('o' + chr(32) + str(objective), end="\n", flush=True)
+
+    def print_result(self, s):
+        if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_result()
+            self.print_value(solution_opb(s))
+            self.print_status(ExitStatus.optimal)
+        elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_value(solution_opb(s))
+            self.print_status(ExitStatus.sat)
+        elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
+            self.print_status(ExitStatus.unsat)
+        else:
+            self.print_comment("Solver did not find any solution within the time/memory limit")
+            self.print_status(ExitStatus.unknown)
+
+    def handle_memory_error(self, mem_limit):
+        super().handle_memory_error(mem_limit)
+        self.print_status(ExitStatus.unknown)
+
+    def handle_not_implemented(self, e):
+        super().handle_not_implemented(e)
+        self.print_status(ExitStatus.unsupported)
+
+    def handle_exception(self, e):
+        super().handle_exception(e)
+        self.print_status(ExitStatus.unknown)
+
+    def parse_output_line(self, line, result):
+        if line.startswith('s '):
+            result['status'] = line[2:].strip()
+        elif line.startswith('v '):
+            # only record first line, contains 'type' and 'cost'
+            solution = line.split("\n")[0][2:].strip()
+            if solution not in result:
+                result['solution'] = solution
+            else:
+                result['solution'] = result['solution'] + ' ' + str(solution)
+        elif line.startswith('o '):
+            obj = int(line[2:].strip())
+            if result['intermediate'] is None:
+                result['intermediate'] = []
+            result['intermediate'] += [(sol_time, obj)]
+            result['objective_value'] = obj
+            obj = None
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            sol_time = float(parts[-1].replace('s', '').rstrip())
+        elif line.startswith('c took '):
+            # Parse timing information
+            parts = line.split(' seconds to ')
+            if len(parts) == 2:
+                time_val = float(parts[0].replace('c took ', ''))
+                action = parts[1].strip()
+                if action.startswith('parse'):
+                    result['time_parse'] = time_val
+                elif action.startswith('convert'):
+                    result['time_model'] = time_val
+                elif action.startswith('post'):
+                    result['time_post'] = time_val
+                elif action.startswith('solve'):
+                    result['time_solve'] = time_val
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Benchmark solvers on OPB instances')
+    parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2023)')
+    parser.add_argument('--track', type=str, required=True, help='Track type (e.g., OPT-LIN, DEC-LIN)')
+    parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)')
+    parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers')
+    parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance')
+    parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance')
+    parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance')
+    parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files')
+    parser.add_argument('--verbose', action='store_true', help='Show solver output')
+    parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions')
+    args = parser.parse_args()
+
+    if not args.verbose:
+        warnings.filterwarnings("ignore")
+    
+    # Load benchmark instances (as a dataset)
+    from cpmpy.tools.dataset.model.opb import OPBDataset
+    dataset = OPBDataset(year=args.year, track=args.track, download=True)
+    
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Get current timestamp in a filename-safe format
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    # Define output file path with timestamp
+    output_file = str(output_dir / "opb" / f"opb_{args.year}_{args.track}_{args.solver}_{timestamp}.csv")
+
+    # Run the benchmark
+    instance_runner = OPBBenchmark()
+    output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args))
+    print(f"Results added to {output_file}")
diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py
new file mode 100644
index 000000000..325ac54cd
--- /dev/null
+++ b/cpmpy/tools/benchmark/runner.py
@@ -0,0 +1,287 @@
+"""
+Benchmark Runner for CPMpy Instances
+
+This module provides tools to execute benchmark instances in parallel while
+safely capturing solver output, enforcing time and memory limits, and
+writing structured results to a CSV file. The included functions should not
+be used directly, but rather through one of the available benchmarks.
+
+Key Features
+------------
+- Supports running multiple instances in parallel using threads.
+- Executes each instance in a separate subprocess for isolation.
+- Forwards stdout to both console and parent process, preserving output.
+- Handles timeouts and SIGTERM/SIGKILL signals gracefully.
+- Writes results to a CSV file.
+- Optional reporting of intermediate solutions and solution checking.
+"""
+
+import csv
+from io import StringIO
+import os
+import signal
+import time
+import sys
+import warnings
+import traceback
+import multiprocessing
+from tqdm import tqdm
+from typing import Optional, Tuple
+from filelock import FileLock
+from concurrent.futures import ThreadPoolExecutor
+
+from cpmpy.tools.xcsp3.xcsp3_cpmpy import xcsp3_cpmpy, init_signal_handlers, ExitStatus
+
+class Tee:
+    """
+    A stream-like object that duplicates writes to multiple underlying streams.
+    """
+    def __init__(self, *streams):
+        """
+        Arguments:
+            *streams: Any number of file-like objects that implement a write() method,
+                      such as sys.stdout, sys.stderr, or StringIO.
+        """
+        self.streams = streams
+
+    def write(self, data):
+        """
+        Write data to all underlying streams.
+
+        Args:
+            data (str): The string to write.
+        """
+        for s in self.streams:
+            s.write(data)
+
+    def flush(self):
+        """
+        Flush all underlying streams to ensure all data is written out.
+        """
+        for s in self.streams:
+            s.flush()
+
+class PipeWriter:
+    """
+    Stdout wrapper for a multiprocessing pipe.
+    """
+    def __init__(self, conn):
+        self.conn = conn
+    def write(self, data):
+        if data:  # avoid empty writes
+            try:
+                self.conn.send(data)
+            except:
+                pass
+    def flush(self):
+        pass  # no buffering
+
+
+def wrapper(instance_runner, conn, kwargs, verbose):
+    """
+    Wraps a call to a benchmark as to correctly 
+    forward stdout to the multiprocessing pipe (conn).
+    Also sends a last status report though the pipe.
+
+    Status report can be missing when process has been terminated by a SIGTERM.
+    """
+    
+    original_stdout = sys.stdout
+    pipe_writer = PipeWriter(conn)
+
+    if not verbose:
+        warnings.filterwarnings("ignore")
+        sys.stdout = pipe_writer # only forward to pipe
+    else:
+        sys.stdout = Tee(original_stdout, pipe_writer) # forward to pipe and console
+
+    try:
+        init_signal_handlers() # configure OS signal handlers
+        instance_runner.run(**kwargs)
+        conn.send({"status": "ok"})
+    except Exception as e: # capture exceptions and report in state
+        tb_str = traceback.format_exc()
+        conn.send({"status": "error", "exception": e, "traceback": tb_str})
+    finally:
+        sys.stdout = original_stdout
+        conn.close()
+
+# exec_args = (instance_runner, filename, metadata, open, solver, time_limit, mem_limit, output_file, verbose) 
+def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, int, str, bool, bool, str]) -> None:
+    """
+    Solve a single benchmark instance and write results to file immediately.
+    
+    Args is a list of:
+        filename: Path to the instance file
+        metadata: Dictionary containing instance metadata (year, track, name)
+        solver: Name of the solver to use
+        time_limit: Time limit in seconds
+        mem_limit: Memory limit in MB
+        output_file: Path to the output CSV file
+        verbose: Whether to show solver output
+    """
+    
+    instance_runner, filename, metadata, open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path = args
+
+    # Fieldnames for the CSV file
+    fieldnames = ['instance'] + list(metadata.keys()) + \
+                 ['solver',
+                  'time_total', 'time_parse', 'time_model', 'time_post', 'time_solve',
+                  'status', 'objective_value', 'solution', 'intermediate', 'checker_result']
+    result = dict.fromkeys(fieldnames)  # init all fields to None
+    for k in metadata.keys():
+        result[k] = metadata[k]
+    result['solver'] = solver
+
+    # Decompress before timers start
+    with open(filename) as f:   # <- dataset-specific 'open' callable
+        filename = StringIO(f.read()) # read to memory-mapped file
+
+    # Start total timing
+    total_start = time.time()
+    
+    # Call xcsp3 in separate process
+    ctx = multiprocessing.get_context("spawn")
+    parent_conn, child_conn = multiprocessing.Pipe() # communication pipe between processes
+    process = ctx.Process(target=wrapper, args=(
+                                                    instance_runner,
+                                                    child_conn, 
+                                                      {
+                                                          "instance": filename, 
+                                                          "solver": solver, 
+                                                          "time_limit": time_limit, 
+                                                          "mem_limit": mem_limit, 
+                                                          "intermediate": intermediate, 
+                                                          "force_mem_limit": True,
+                                                          "time_buffer": 1,
+                                                          "cores": cores,
+                                                        }, 
+                                                    verbose))
+    process.start()
+    process.join(timeout=time_limit)
+
+    # Replicate competition convention on how jobs get terminated
+    if process.is_alive():
+        # Send sigterm to let process know it reached its time limit
+        os.kill(process.pid, signal.SIGTERM)
+        # 1 second grace period
+        process.join(timeout=1)
+        # Kill if still alive
+        if process.is_alive():
+            os.kill(process.pid, signal.SIGKILL)
+            process.join()
+
+    result['time_total'] = time.time() - total_start
+          
+    # Default status if nothing returned by subprocess
+    # -> process exited prematurely due to sigterm
+    status = {"status": "error", "exception": "sigterm"}
+
+    # Parse the output to get status, solution and timings
+    while parent_conn.poll(timeout=1):
+        line = parent_conn.recv()
+
+        # Received a print statement from the subprocess
+        if isinstance(line, str):
+            instance_runner.parse_output_line(line, result)
+        
+        # Received a new status from the subprocess
+        elif isinstance(line, dict):
+            status = line
+
+        else:
+            raise()
+
+    # Parse the exit status
+    if status["status"] == "error":
+        # Ignore timeouts
+        if "TimeoutError" in repr(status["exception"]):
+            pass
+        # All other exceptions, put in solution field
+        elif result['solution'] is None:
+            result['status'] = ExitStatus.unknown.value
+            result["solution"] = status["exception"]    
+
+    # if checker_path is not None and complete_solution is not None: TODO: generalise 'checkers' for benchmarks
+    #     checker_output, checker_time = run_solution_checker(
+    #         JAR=checker_path,
+    #         instance_location=file_path,
+    #         out_file="'" + complete_solution.replace("\n\r", " ").replace("\n", " ").replace("v   ", "").replace("v ", "")+ "'",
+    #         verbose=verbose,
+    #         cpm_time=result.get('time_solve', 0)  # or total solve time you have
+    #     )
+
+    #     if checker_output is not None:
+    #         result['checker_result'] = checker_output
+    #     else:
+    #         result['checker_result'] = None
+
+    # Use a lock file to prevent concurrent writes
+    lock_file = f"{output_file}.lock"
+    lock = FileLock(lock_file)
+    try:
+        with lock:
+            # Pre-check if file exists to determine if we need to write header
+            write_header = not os.path.exists(output_file)
+
+            with open(output_file, 'a', newline='') as f:
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                if write_header:
+                    writer.writeheader()
+                writer.writerow(result)
+    finally:
+        # Optional: cleanup if the lock file somehow persists
+        if os.path.exists(lock_file):
+            try:
+                os.remove(lock_file)
+            except Exception:
+                pass  # avoid crashing on cleanup
+
+
+
+def benchmark_runner(
+        dataset, instance_runner,
+        output_file: str,
+        solver: str, workers: int = 1, 
+        time_limit: int = 300, mem_limit: Optional[int] = 4096, cores: int=1,
+        verbose: bool = False, intermediate: bool = False,
+        checker_path: Optional[str] = None,
+        **kwargs
+    ) -> str:
+    """
+    Run a benchmark over all instances in a dataset using multiple threads.
+
+    Arguments:
+        dataset (_Dataset):             Dataset object containing instances to benchmark.
+        instance_runner (Benchmark):    Benchmark runner that implements the run() method.
+        output_file (str):              Path to the CSV file where results will be stored.
+        solver (str):                   Name of the solver to use.
+        workers (int):                  Number of parallel processes to run instances (default=1).
+        time_limit (int):               Time limit in seconds for each instance (default=300).
+        mem_limit (int, optional):      Memory limit in MB per instance (default=4096).
+        cores (int):                    Number of CPU cores assigned per instance (default=1).
+        verbose (bool):                 Whether to show solver output in stdout (default=False).
+        intermediate (bool):            Whether to report intermediate solutions if supported (default=False).
+        checker_path (str, optional):   Path to a solution checker for validating instance solutions.
+        **kwargs:                       Additional arguments passed to `execute_instance`.
+
+    Returns:
+        str: Path to the CSV file where benchmark results were written.
+    """
+
+    # Process instances in parallel
+    with ThreadPoolExecutor(max_workers=workers) as executor:
+        # Submit all tasks and track their futures
+        futures = [executor.submit(execute_instance,  # below: args
+                                   (instance_runner, filename, metadata, dataset.open(), solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path))
+                   for filename, metadata in dataset]
+        # Process results as they complete
+        for i,future in enumerate(tqdm(futures, total=len(futures), desc=f"Running {solver}")):
+            try:
+                _ = future.result(timeout=time_limit+60)  # for cleanliness sake, result is empty
+            except TimeoutError:
+                pass
+            except Exception as e:
+                print(f"Job {i}: {dataset[i][1]['name']}, ProcessPoolExecutor caught: {e}")
+    
+    return output_file

From 83454e00dae27aa38acffe8ee00cb60c8808d6ac Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 14:00:47 +0200
Subject: [PATCH 10/46] Formatting

---
 cpmpy/tools/benchmark/mse.py     | 10 +++++-----
 cpmpy/tools/dataset/model/mse.py |  3 ++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py
index 08084b645..3654c2bc8 100644
--- a/cpmpy/tools/benchmark/mse.py
+++ b/cpmpy/tools/benchmark/mse.py
@@ -43,15 +43,15 @@
     solution_mse
 """
 
+import warnings
 import argparse
-from datetime import datetime
 from enum import Enum
 from pathlib import Path
-import warnings
+from datetime import datetime
 
+# CPMpy
 from cpmpy.tools.benchmark.runner import benchmark_runner
-from _base import Benchmark
-
+from cpmpy.tools.benchmark._base import Benchmark
 from cpmpy.tools.wcnf import read_wcnf
 from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
 
@@ -80,7 +80,7 @@ def solution_mse(model):
 class MSEBenchmark(Benchmark):
 
     """
-    Benchmark runner for MSE (MaxSAT Evaluation) competition instances.
+    MSE (MaxSAT Evaluation) competition as a CPMpy benchmark.
 
     This class extends `Benchmark` to implement MSE-specific solution printing
     in DIMACS-like output format (`c`, `s`, `v`, `o` lines). It uses CPMpy's `read_wcnf`
diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py
index 84e8c5dfa..711a560bb 100644
--- a/cpmpy/tools/dataset/model/mse.py
+++ b/cpmpy/tools/dataset/model/mse.py
@@ -4,8 +4,9 @@
 https://maxsat-evaluations.github.io/
 """
 
-import lzma
+
 import os
+import lzma
 import zipfile
 import pathlib
 from urllib.request import urlretrieve

From 7f2d363282588ac9c298e80df5c14bb8760daf80 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 14:06:44 +0200
Subject: [PATCH 11/46] XCSP3 as dataset and benchmark

---
 cpmpy/tools/benchmark/xcsp3.py     | 248 +++++++++++++++++++++++++++++
 cpmpy/tools/dataset/model/xcsp3.py | 132 +++++++++++++++
 cpmpy/tools/xcsp3/__init__.py      | 137 ++--------------
 cpmpy/tools/xcsp3/parser.py        | 146 +++++++++++++++++
 4 files changed, 543 insertions(+), 120 deletions(-)
 create mode 100644 cpmpy/tools/benchmark/xcsp3.py
 create mode 100644 cpmpy/tools/dataset/model/xcsp3.py
 create mode 100644 cpmpy/tools/xcsp3/parser.py

diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py
new file mode 100644
index 000000000..e52e41a4a
--- /dev/null
+++ b/cpmpy/tools/benchmark/xcsp3.py
@@ -0,0 +1,248 @@
+"""
+XCSP3 competition as a CPMpy benchmark
+
+This module provides a benchmarking framework for running CPMpy on XCSP3 
+competition instances. It extends the generic `Benchmark` base class with
+XCSP3-specific logging and result reporting.
+
+Command-line Interface
+----------------------
+This script can be run directly to benchmark solvers on XCSP3 datasets.
+
+Usage:
+    python xcsp3.py --year 2024 --track CSP --solver ortools
+
+Arguments:
+    --year          Competition year (e.g., 2024).
+    --track         Track type (e.g., CSP, COP).
+    --solver        Solver name (e.g., ortools, exact, choco, ...).
+    --workers       Number of parallel workers to use.
+    --time-limit    Time limit in seconds per instance.
+    --mem-limit     Memory limit in MB per instance.
+    --cores         Number of cores to assign to a single instance.
+    --output-dir    Output directory for CSV files.
+    --verbose       Show solver output if set.
+    --intermediate  Report intermediate solutions if supported.
+
+===============
+List of classes
+===============
+
+.. autosummary::
+    :nosignatures:
+
+    XCSP3Benchmark
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    solution_xcsp3
+"""
+
+import warnings
+import argparse
+from enum import Enum
+from pathlib import Path
+from datetime import datetime
+
+# CPMpy
+from cpmpy.tools.benchmark.runner import benchmark_runner
+from cpmpy.tools.benchmark._base import Benchmark
+from cpmpy.tools.xcsp3 import read_xcsp3
+from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
+
+# PyCSP3
+from xml.etree.ElementTree import ParseError
+import xml.etree.cElementTree as ET
+
+
+class ExitStatus(Enum):
+    unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint)
+    sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality
+    optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found
+    unsat:str = "UNSATISFIABLE" # instance is unsatisfiable
+    unknown:str = "UNKNOWN" # any other case
+
+def solution_xcsp3(model, useless_style="*", boolean_style="int"):
+    """
+        Formats a solution according to the XCSP3 specification.
+
+        Arguments:
+            model: CPMpy model for which to format its solution (should be solved first)
+            useless_style: How to process unused decision variables (with value `None`). 
+                           If "*", variable is included in reporting with value "*". 
+                           If "drop", variable is excluded from reporting.
+            boolean_style: Print style for boolean constants.
+                           "int" results in 0/1, "bool" results in False/True.
+
+        Returns:
+            XML-formatted model solution according to XCSP3 specification.
+    """
+
+    # CSP
+    if not model.has_objective():
+        root = ET.Element("instantiation", type="solution")
+    # COP
+    else:
+        root = ET.Element("instantiation", type="optimum", cost=str(int(model.objective_value())))
+
+    # How useless variables should be handled
+    #    (variables which have value `None` in the solution)
+    variables = {var.name: var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars
+    if useless_style == "*":
+        variables = {k:(v.value() if v.value() is not None else "*") for k,v in variables.items()}
+    elif useless_style == "drop":
+        variables = {k:v.value() for k,v in variables.items() if v.value() is not None}
+
+    # Convert booleans
+    if boolean_style == "bool":
+        pass
+    elif boolean_style == "int":
+        variables = {k:(v if (not isinstance(v, bool)) else (1 if v else 0)) for k,v in variables.items()}
+
+    # Build XCSP3 XML tree
+    ET.SubElement(root, "list").text=" " + " ".join([str(v) for v in variables.keys()]) + " "
+    ET.SubElement(root, "values").text=" " + " ".join([str(v) for v in variables.values()]) + " "
+    tree = ET.ElementTree(root)
+    ET.indent(tree, space="  ", level=0)
+    res = ET.tostring(root).decode("utf-8")
+
+    return str(res)
+
+
+class XCSP3Benchmark(Benchmark):
+    """
+    The XCSP3 competition as a CPMpy benchmark.
+    """
+
+    def __init__(self):
+        super().__init__(reader=read_xcsp3)
+    
+    def print_comment(self, comment:str):
+        print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
+
+    def print_status(self, status: ExitStatus) -> None:
+        print('s' + chr(32) + status.value, end="\n", flush=True)
+
+    def print_value(self, value: str) -> None:
+        value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:]
+        print('v' + chr(32) + value, end="\n", flush=True)
+
+    def print_objective(self, objective: int) -> None:
+        print('o' + chr(32) + str(objective), end="\n", flush=True)
+
+    def print_result(self, s):
+        if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_result()
+            self.print_value(solution_xcsp3(s))
+            self.print_status(ExitStatus.optimal)
+        elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_value(solution_xcsp3(s))
+            self.print_status(ExitStatus.sat)
+        elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
+            self.print_status(ExitStatus.unsat)
+        else:
+            self.print_comment("Solver did not find any solution within the time/memory limit")
+            self.print_status(ExitStatus.unknown)
+
+    def handle_memory_error(self, mem_limit):
+        super().handle_memory_error(mem_limit)
+        self.print_status(ExitStatus.unknown)
+
+    def handle_not_implemented(self, e):
+        super().handle_not_implemented(e)
+        self.print_status(ExitStatus.unsupported)
+
+    def handle_exception(self, e):
+        if isinstance(e, ParseError):
+            if "out of memory" in e.msg:
+                self.print_comment(f"MemoryError raised by parser.")
+                self.print_status(ExitStatus.unknown)
+            else:
+                self.print_comment(f"An {type(e)} got raised by the parser: {e}")
+                self.print_status(ExitStatus.unknown)
+        else:
+            super().handle_exception(e)
+            self.print_status(ExitStatus.unknown)
+
+    def parse_output_line(self, line, result):
+        if line.startswith('s '):
+            result['status'] = line[2:].strip()
+        elif line.startswith('v ') and result['solution'] is None:
+            # only record first line, contains 'type' and 'cost'
+            solution = line.split("\n")[0][2:].strip()
+            result['solution'] = str(solution)
+            complete_solution = line
+            if "cost" in solution:
+                result['objective_value'] = solution.split('cost="')[-1][:-2]
+        elif line.startswith('o '):
+            obj = int(line[2:].strip())
+            if result['intermediate'] is None:
+                result['intermediate'] = []
+            result['intermediate'] += [(sol_time, obj)]
+            result['objective_value'] = obj
+            obj = None
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            sol_time = float(parts[-1].replace('s', '').rstrip())
+        elif line.startswith('c took '):
+            # Parse timing information
+            parts = line.split(' seconds to ')
+            if len(parts) == 2:
+                time_val = float(parts[0].replace('c took ', ''))
+                action = parts[1].strip()
+                if action.startswith('parse'):
+                    result['time_parse'] = time_val
+                elif action.startswith('convert'):
+                    result['time_model'] = time_val
+                elif action.startswith('post'):
+                    result['time_post'] = time_val
+                elif action.startswith('solve'):
+                    result['time_solve'] = time_val
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Benchmark solvers on XCSP3 instances')
+    parser.add_argument('--year', type=int, required=True, help='Competition year (e.g., 2023)')
+    parser.add_argument('--track', type=str, required=True, help='Track type (e.g., COP, CSP, MiniCOP)')
+    parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)')
+    parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers')
+    parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance')
+    parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance')
+    parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance')
+    parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files')
+    parser.add_argument('--verbose', action='store_true', help='Show solver output')
+    parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions')
+    parser.add_argument('--checker-path', type=str, default=None,
+                    help='Path to the XCSP3 solution checker JAR file')
+    args = parser.parse_args()
+
+    if not args.verbose:
+        warnings.filterwarnings("ignore")
+    
+    # Load benchmark instances (as a dataset)
+    from cpmpy.tools.dataset.model.xcsp3 import XCSP3Dataset
+    dataset = XCSP3Dataset(year=args.year, track=args.track, download=True)
+    
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Get current timestamp in a filename-safe format
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    # Define output file path with timestamp
+    output_file = str(output_dir / "xcsp3" / f"xcsp3_{args.year}_{args.track}_{args.solver}_{timestamp}.csv")
+
+    # Run the benchmark
+    instance_runner = XCSP3Benchmark()
+    output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args))
+    print(f"Results added to {output_file}")
+
+       
diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py
new file mode 100644
index 000000000..e71df1d04
--- /dev/null
+++ b/cpmpy/tools/dataset/model/xcsp3.py
@@ -0,0 +1,132 @@
+"""
+XCS3 Dataset
+
+https://xcsp.org/instances/
+"""
+
+from functools import partial
+import os
+import lzma
+import zipfile
+import pathlib
+from urllib.request import urlretrieve
+from urllib.error import HTTPError, URLError
+
+from cpmpy.tools.dataset._base import _Dataset
+
+
+class XCSP3Dataset(_Dataset):
+    """
+    XCSP3 benchmark dataset.
+
+    Provides access to benchmark instances from the XCSP3
+    competitions. Instances are grouped by `year` and `track` (e.g., 
+    `"CSP"`, `"eCOP"`) and stored as `.xml.lzma` files. 
+    If the dataset is not available locally, it can be automatically 
+    downloaded and extracted.
+
+    More information on the competition can be found here: https://xcsp.org/competitions/
+    """
+
+    def __init__(
+            self,
+            root: str = ".", 
+            year: int = 2023, track: str = "CSP", 
+            transform=None, target_transform=None, 
+            download: bool = False
+        ):
+        """
+        Constructor for a dataset object of the XCP3 competition.
+
+        Arguments:
+            root (str): Root directory where datasets are stored or will be downloaded to (default="."). 
+            year (int): Competition year of the dataset to use (default=2024).
+            track (str): Track name specifying which subset of the competition instances to load (default="CSP").
+            transform (callable, optional): Optional transform applied to the instance file path.
+            target_transform (callable, optional): Optional transform applied to the metadata dictionary.
+            download (bool): If True, downloads the dataset if it does not exist locally (default=False).
+
+
+        Raises:
+            ValueError: If the dataset directory does not exist and `download=False`,
+                or if the requested year/track combination is not available.
+        """
+
+        self.root = pathlib.Path(root)
+        self.year = year
+        self.track = track
+
+        dataset_dir = self.root / str(year) / track
+
+        super().__init__(
+            dataset_dir=dataset_dir, 
+            transform=transform, target_transform=target_transform, 
+            download=download, extension=".xml.lzma"
+        )
+
+
+    def category(self):
+        return {
+            "year": self.year,
+            "track": self.track
+        }
+
+    def download(self):
+        print(f"Downloading XCSP3 {self.year} instances...")
+
+        url = f"https://www.cril.univ-artois.fr/~lecoutre/compets/"
+        year_suffix = str(self.year)[2:]  # Drop the starting '20'
+        url_path = url + f"instancesXCSP{year_suffix}.zip"
+        zip_path = self.root / f"instancesXCSP{year_suffix}.zip"
+
+        try:
+            urlretrieve(url_path, str(zip_path))
+        except (HTTPError, URLError) as e:
+            raise ValueError(f"No dataset available for year {self.year}. Error: {str(e)}")
+        
+        # Extract only the specific track folder from the zip
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            # Get the main folder name (e.g., "024_V3")
+            main_folder = None
+            for name in zip_ref.namelist():
+                if '/' in name:
+                    main_folder = name.split('/')[0]
+                    break
+            
+            if main_folder is None:
+                raise ValueError(f"Could not find main folder in zip file")
+
+            # Extract only files from the specified track
+            # Get all unique track names from zip
+            tracks = set()
+            for file_info in zip_ref.infolist():
+                parts = file_info.filename.split('/')
+                if len(parts) > 2 and parts[0] == main_folder:
+                    tracks.add(parts[1])
+            
+            # Check if requested track exists
+            if self.track not in tracks:
+                raise ValueError(f"Track '{self.track}' not found in dataset. Available tracks: {sorted(tracks)}")
+            
+            # Create track folder in root directory, parents=True ensures recursive creation
+            self.dataset_dir.mkdir(parents=True, exist_ok=True)
+            
+            # Extract files for the specified track
+            prefix = f"{main_folder}/{self.track}/"
+            for file_info in zip_ref.infolist():
+                if file_info.filename.startswith(prefix):
+                    # Extract file to track_dir, removing main_folder/track prefix
+                    filename = pathlib.Path(file_info.filename).name
+                    with zip_ref.open(file_info) as source, open(self.dataset_dir / filename, 'wb') as target:
+                        target.write(source.read())
+        # Clean up the zip file
+        zip_path.unlink()
+
+    def open(self, instance: os.PathLike) -> callable:
+        return partial(lzma.open, mode='rt', encoding='utf-8') if str(instance).endswith(".lzma") else open
+
+
+if __name__ == "__main__":
+    dataset = XCSP3Dataset(year=2024, track="MiniCOP", download=True)
+    print("Dataset size:", len(dataset))
+    print("Instance 0:", dataset[0])
diff --git a/cpmpy/tools/xcsp3/__init__.py b/cpmpy/tools/xcsp3/__init__.py
index d5abf2766..9572943d8 100644
--- a/cpmpy/tools/xcsp3/__init__.py
+++ b/cpmpy/tools/xcsp3/__init__.py
@@ -4,127 +4,24 @@
 ## __init__.py
 ##
 """
-    Set of utilities for working with XCSP3-formatted CP models.
-
-
-    =================
-    List of functions
-    =================
-
-    .. autosummary::
-        :nosignatures:
-
-        read_xcsp3
-
-    ========================
-    List of helper functions
-    ========================
-
-    .. autosummary::
-        :nosignatures:
-
-        _parse_xcsp3
-        _load_xcsp3
-
-    ==================
-    List of submodules
-    ==================
-
-    .. autosummary::
-        :nosignatures:
-
-        parser_callbacks
-        analyze
-        benchmark
-        xcsp3_cpmpy
-        dataset
-        globals
+Set of utilities for working with XCSP3-formatted CP models.
+
+==================
+List of submodules
+==================
+
+.. autosummary::
+    :nosignatures:
+
+    parser
+    parser_callbacks
+    analyze
+    benchmark
+    xcsp3_cpmpy
+    dataset
+    globals
 """
-from io import StringIO
-import lzma
-import os
-import cpmpy as cp
-
-# Special case for optional cpmpy dependencies
-from typing import TYPE_CHECKING
 
-if TYPE_CHECKING:
-    from pycsp3.parser.xparser import CallbackerXCSP3, ParserXCSP3
 
 from .dataset import XCSP3Dataset # for easier importing
-
-def _parse_xcsp3(path: os.PathLike) -> "ParserXCSP3":
-    """
-    Parses an XCSP3 instance file (.xml) and returns a `ParserXCSP3` instance.
-    
-    Arguments:
-        path: location of the XCSP3 instance to read (expects a .xml file).
-    
-    Returns:
-        A parser object.
-    """
-    try:
-        from pycsp3.parser.xparser import ParserXCSP3
-    except ImportError as e:
-        raise ImportError("The 'pycsp3' package is required to parse XCSP3 files. "
-                          "Please install it with `pip install pycsp3`.") from e
-    
-    parser = ParserXCSP3(path)
-    return parser
-
-def _load_xcsp3(parser: "ParserXCSP3") -> cp.Model:
-    """
-    Takes in a `ParserXCSP3` instance and loads its captured model as a CPMpy model.
-
-    Arguments:
-        parser (ParserXCSP3): A parser object to load from.
-
-    Returns:
-        The XCSP3 instance loaded as a CPMpy model.
-    """
-    from .parser_callbacks import CallbacksCPMPy
-    from pycsp3.parser.xparser import CallbackerXCSP3
-    callbacks = CallbacksCPMPy()
-    callbacks.force_exit = True
-    callbacker = CallbackerXCSP3(parser, callbacks)
-    callbacker.load_instance()
-    model = callbacks.cpm_model
-   
-    return model
-
-
-def read_xcsp3(path: os.PathLike) -> cp.Model:
-    """
-    Reads in an XCSP3 instance (.xml or .xml.lzma) and returns its matching CPMpy model.
-
-    Arguments:
-        path: location of the XCSP3 instance to read (expects a .xml or .xml.lzma file).
-
-    Returns:
-        The XCSP3 instance loaded as a CPMpy model.
-    """
-    # Decompress on the fly if still in .lzma format
-    if str(path).endswith(".lzma"):
-        path = decompress_lzma(path)
-
-    # Parse and create CPMpy model
-    parser = _parse_xcsp3(path)
-    model = _load_xcsp3(parser)
-    return model
-
-def decompress_lzma(path: os.PathLike) -> StringIO:
-    """
-    Decompresses a .lzma file.
-
-    Arguments:
-        path: Location of .lzma file
-
-    Returns:
-        Memory-mapped decompressed file
-    """
-    # Decompress the XZ file
-    with lzma.open(path, 'rt', encoding='utf-8') as f:
-        return StringIO(f.read()) # read to memory-mapped file
-        
-
-    
\ No newline at end of file
+from .parser import read_xcsp3
\ No newline at end of file
diff --git a/cpmpy/tools/xcsp3/parser.py b/cpmpy/tools/xcsp3/parser.py
new file mode 100644
index 000000000..761ef7caa
--- /dev/null
+++ b/cpmpy/tools/xcsp3/parser.py
@@ -0,0 +1,146 @@
+"""
+Parser for the XCSP3 format.
+
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    read_xcsp3
+
+========================
+List of helper functions
+========================
+
+.. autosummary::
+    :nosignatures:
+
+    _parse_xcsp3
+    _load_xcsp3
+"""
+
+import os
+import sys
+import argparse
+from io import StringIO
+
+import cpmpy as cp
+
+# Special case for optional cpmpy dependencies
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pycsp3.parser.xparser import ParserXCSP3
+
+def _parse_xcsp3(path: os.PathLike) -> "ParserXCSP3":
+    """
+    Parses an XCSP3 instance file (.xml) and returns a `ParserXCSP3` instance.
+    
+    Arguments:
+        path: location of the XCSP3 instance to read (expects a .xml file).
+    
+    Returns:
+        A parser object.
+    """
+    try:
+        from pycsp3.parser.xparser import ParserXCSP3
+    except ImportError as e:
+        raise ImportError("The 'pycsp3' package is required to parse XCSP3 files. "
+                          "Please install it with `pip install pycsp3`.") from e
+    
+    parser = ParserXCSP3(path)
+    return parser
+
+def _load_xcsp3(parser: "ParserXCSP3") -> cp.Model:
+    """
+    Takes in a `ParserXCSP3` instance and loads its captured model as a CPMpy model.
+
+    Arguments:
+        parser (ParserXCSP3): A parser object to load from.
+
+    Returns:
+        The XCSP3 instance loaded as a CPMpy model.
+    """
+    from .parser_callbacks import CallbacksCPMPy
+    from pycsp3.parser.xparser import CallbackerXCSP3
+    callbacks = CallbacksCPMPy()
+    callbacks.force_exit = True
+    callbacker = CallbackerXCSP3(parser, callbacks)
+    callbacker.load_instance()
+    model = callbacks.cpm_model
+   
+    return model
+
+_std_open = open
+def read_xcsp3(xcsp3: os.PathLike, open=open) -> cp.Model:
+    """
+    Reads in an XCSP3 instance (.xml or .xml.lzma) and returns its matching CPMpy model.
+
+    Arguments:
+        xcsp3 (str or os.PathLike):
+            - A file path to an WCNF file (optionally LZMA-compressed with `.lzma`)
+            - OR a string containing the WCNF content directly
+        open: (callable):
+            If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open').
+
+    Returns:
+        The XCSP3 instance loaded as a CPMpy model.
+    """
+    # If wcnf is a path to a file -> open file
+    if isinstance(xcsp3, (str, os.PathLike)) and os.path.exists(xcsp3):
+        if open is not None:
+            f = open(xcsp3)
+        else:
+            f = _std_open(xcsp3, "rt")
+    # If wcnf is a string containing a model -> create a memory-mapped file
+    else:
+        f = StringIO(xcsp3)
+
+    # Parse and create CPMpy model
+    parser = _parse_xcsp3(f)
+    model = _load_xcsp3(parser)
+    return model
+
+        
+def main():
+    parser = argparse.ArgumentParser(description="Parse and solve a WCNF model using CPMpy")
+    parser.add_argument("model", help="Path to a WCNF file (or raw WCNF string if --string is given)")
+    parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)")
+    parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw WCNF string instead of a file path")
+    parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)")
+    args = parser.parse_args()
+
+    # Build the CPMpy model
+    try:
+        if args.string:
+            model = read_xcsp3(args.model)
+        else:
+            model = read_xcsp3(os.path.expanduser(args.model))
+    except Exception as e:
+        sys.stderr.write(f"Error reading model: {e}\n")
+        sys.exit(1)
+
+    # Solve the model
+    try:
+        if args.solver:
+            result = model.solve(solver=args.solver, time_limit=args.time_limit)
+        else:
+            result = model.solve(time_limit=args.time_limit)
+    except Exception as e:
+        sys.stderr.write(f"Error solving model: {e}\n")
+        sys.exit(1)
+
+    # Print results
+    print("Status:", model.status())
+    if result is not None:
+        if model.has_objective():
+            print("Objective:", model.objective_value())
+    else:
+        print("No solution found.")
+
+if __name__ == "__main__":
+    main()
+    
\ No newline at end of file

From 9173c9faebbb4077368f6ba73c3990804c055fe0 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 14:07:22 +0200
Subject: [PATCH 12/46] Parsers with changeable 'open'

---
 cpmpy/tools/opb/parser.py  | 11 ++++++++---
 cpmpy/tools/wcnf/parser.py | 17 ++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/cpmpy/tools/opb/parser.py b/cpmpy/tools/opb/parser.py
index 846c0874b..e300a2752 100644
--- a/cpmpy/tools/opb/parser.py
+++ b/cpmpy/tools/opb/parser.py
@@ -105,7 +105,8 @@ def _parse_constraint(line, vars):
         right=rhs
     )
 
-def read_opb(opb: Union[str, os.PathLike]) -> cp.Model:
+_std_open = open
+def read_opb(opb: Union[str, os.PathLike], open=open) -> cp.Model:
     """
     Parser for OPB (Pseudo-Boolean) format. Reads in an instance and returns its matching CPMpy model.
 
@@ -121,6 +122,8 @@ def read_opb(opb: Union[str, os.PathLike]) -> cp.Model:
         opb (str or os.PathLike): 
             - A file path to an OPB file (optionally LZMA-compressed with `.xz`)
             - OR a string containing the OPB content directly
+        open: (callable):
+            If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open').
 
     Returns:
         cp.Model: The CPMpy model of the OPB instance.
@@ -143,8 +146,10 @@ def read_opb(opb: Union[str, os.PathLike]) -> cp.Model:
     
     # If opb is a path to a file -> open file
     if isinstance(opb, (str, os.PathLike)) and os.path.exists(opb):
-        f_open = lzma.open if str(opb).endswith(".xz") else open
-        f = f_open(opb, 'rt')
+        if open is not None:
+            f = open(opb)
+        else:
+            f = _std_open(opb, "rt")
     # If opb is a string containing a model -> create a memory-mapped file
     else:
         f = StringIO(opb)
diff --git a/cpmpy/tools/wcnf/parser.py b/cpmpy/tools/wcnf/parser.py
index 72cec94c8..84b484979 100644
--- a/cpmpy/tools/wcnf/parser.py
+++ b/cpmpy/tools/wcnf/parser.py
@@ -1,8 +1,3 @@
-#!/usr/bin/env python
-#-*- coding:utf-8 -*-
-##
-## __init__.py
-##
 """
 Parser for the WCNF format.
 
@@ -39,8 +34,8 @@ def _get_var(i, vars_dict):
         vars_dict[i] = cp.boolvar(name=f"x{i}") # <- be carefull that name doesn't clash with generated variables during transformations / user variables
     return vars_dict[i]
 
-
-def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model:
+_std_open = open
+def read_wcnf(wcnf: Union[str, os.PathLike], open=open) -> cp.Model:
     """
     Parser for WCNF format. Reads in an instance and returns its matching CPMpy model.
 
@@ -48,14 +43,18 @@ def read_wcnf(wcnf: Union[str, os.PathLike]) -> cp.Model:
         wcnf (str or os.PathLike):
             - A file path to an WCNF file (optionally LZMA-compressed with `.xz`)
             - OR a string containing the WCNF content directly
+        open: (callable):
+            If wcnf is the path to a file, a callable to "open" that file (default=python standard library's 'open').
 
     Returns:
         cp.Model: The CPMpy model of the WCNF instance.
     """
     # If wcnf is a path to a file -> open file
     if isinstance(wcnf, (str, os.PathLike)) and os.path.exists(wcnf):
-        f_open = lzma.open if str(wcnf).endswith(".xz") else open
-        f = f_open(wcnf, "rt")
+        if open is not None:
+            f = open(wcnf)
+        else:
+            f = _std_open(wcnf, "rt")
     # If wcnf is a string containing a model -> create a memory-mapped file
     else:
         f = StringIO(wcnf)

From 52b95de5f6e556e0d853b4c5f85c08a13f93087b Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 14:13:58 +0200
Subject: [PATCH 13/46] Type-hints and docstrings

---
 cpmpy/tools/dataset/_base.py       | 20 +++++++++++++++++---
 cpmpy/tools/dataset/model/mse.py   |  2 +-
 cpmpy/tools/dataset/model/opb.py   |  6 ++++--
 cpmpy/tools/dataset/model/xcsp3.py |  2 +-
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py
index ce2206110..aa22ae930 100644
--- a/cpmpy/tools/dataset/_base.py
+++ b/cpmpy/tools/dataset/_base.py
@@ -38,18 +38,32 @@ def __init__(
                 self.download()
                 
     @abstractmethod
-    def category(self):
+    def category(self) -> dict:
+        """
+        Labels to distinguish instances into categories matching to those of the dataset.
+        E.g. 
+            - year
+            - track
+        """
         pass
 
     @abstractmethod
     def download(self, *args, **kwargs):
+        """
+        How the dataset should be downloaded.
+        """
         pass
 
     @abstractmethod
-    def open(self, instance):
+    def open(self, instance) -> callable:
+        """
+        How an instance file from the dataset should be opened.
+        Especially usefull when files come compressed and won't work with 
+        python standard library's 'open', e.g. '.xz', '.lzma'.
+        """
         pass
 
-    def metadata(self, file):
+    def metadata(self, file) -> dict:
         metadata = self.category() | {
             'name': pathlib.Path(file).stem.replace(self.extension, ''),
             'path': file,
diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py
index 711a560bb..8f395d677 100644
--- a/cpmpy/tools/dataset/model/mse.py
+++ b/cpmpy/tools/dataset/model/mse.py
@@ -64,7 +64,7 @@ def __init__(
         )
 
 
-    def category(self):
+    def category(self) -> dict:
         return {
             "year": self.year,
             "track": self.track
diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py
index bc051d784..40e6a282d 100644
--- a/cpmpy/tools/dataset/model/opb.py
+++ b/cpmpy/tools/dataset/model/opb.py
@@ -63,19 +63,21 @@ def __init__(
             download=download, extension=".opb.xz"
         )
 
-    def category(self):
+    def category(self) -> dict:
         return {
             "year": self.year,
             "track": self.track
         }
 
-    def metadata(self, file):
+    def metadata(self, file) -> dict:
+        # Add the author to the metadata
         return super().metadata(file) | {'author': str(file).split(os.sep)[-1].split("_")[0],}
                 
 
     def download(self):
         # TODO: add option to filter on competition instances
         print(f"Downloading OPB {self.year} {self.track} instances...")
+        
         url = f"https://www.cril.univ-artois.fr/PB24/benchs/"
         year_suffix = str(self.year)[2:]  # Drop the starting '20'
         url_path = url + f"normalized-PB{year_suffix}.tar"
diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py
index e71df1d04..597a2af55 100644
--- a/cpmpy/tools/dataset/model/xcsp3.py
+++ b/cpmpy/tools/dataset/model/xcsp3.py
@@ -65,7 +65,7 @@ def __init__(
         )
 
 
-    def category(self):
+    def category(self) -> dict:
         return {
             "year": self.year,
             "track": self.track

From bf5ecd2c65b537c10ad67a368da0b6631b7c2a58 Mon Sep 17 00:00:00 2001
From: Thomas Sergeys <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 14:18:10 +0200
Subject: [PATCH 14/46] Add TODOs

---
 cpmpy/tools/benchmark/mse.py   | 2 +-
 cpmpy/tools/benchmark/opb.py   | 2 +-
 cpmpy/tools/benchmark/xcsp3.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py
index 3654c2bc8..a11b1f5cb 100644
--- a/cpmpy/tools/benchmark/mse.py
+++ b/cpmpy/tools/benchmark/mse.py
@@ -73,7 +73,7 @@ def solution_mse(model):
     Returns:
         str: MSE-formatted solution string.
     """
-    variables = [var for var in model.user_vars if var.name[:2] == "BV"] # dirty workaround for all missed aux vars in user vars
+    variables = [var for var in model.user_vars if var.name[:2] == "BV"] # dirty workaround for all missed aux vars in user vars TODO fix with Ignace
     variables = sorted(variables, key=lambda v: int("".join(filter(str.isdigit, v.name))))
     return " ".join([str(1 if var.value() else 0) for var in variables])
 
diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py
index 3fc5202cd..9d669a075 100644
--- a/cpmpy/tools/benchmark/opb.py
+++ b/cpmpy/tools/benchmark/opb.py
@@ -73,7 +73,7 @@ def solution_opb(model):
         Returns:
             Formatted model solution according to PB24 specification.
     """
-    variables = [var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]] # dirty workaround for all missed aux vars in user vars TODO
+    variables = [var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]] # dirty workaround for all missed aux vars in user vars TODO fix with Ignace
     return " ".join([var.name.replace("[","").replace("]","") if var.value() else "-"+var.name.replace("[","").replace("]","") for var in variables])
 
 class OPBBenchmark(Benchmark):
diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py
index e52e41a4a..9601a4530 100644
--- a/cpmpy/tools/benchmark/xcsp3.py
+++ b/cpmpy/tools/benchmark/xcsp3.py
@@ -92,7 +92,7 @@ def solution_xcsp3(model, useless_style="*", boolean_style="int"):
 
     # How useless variables should be handled
     #    (variables which have value `None` in the solution)
-    variables = {var.name: var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars
+    variables = {var.name: var for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace
     if useless_style == "*":
         variables = {k:(v.value() if v.value() is not None else "*") for k,v in variables.items()}
     elif useless_style == "drop":

From 5dc388647c8c7fde2a3ec270f01c5506eabf02f7 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 15:07:56 +0200
Subject: [PATCH 15/46] Mising helper functions

---
 cpmpy/tools/benchmark/__init__.py | 59 +++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py
index e69de29bb..b5a26f62a 100644
--- a/cpmpy/tools/benchmark/__init__.py
+++ b/cpmpy/tools/benchmark/__init__.py
@@ -0,0 +1,59 @@
+
+import resource
+import sys
+import time
+import warnings
+import psutil
+
+
+TIME_BUFFER = 5 # seconds
+# TODO : see if good value
+MEMORY_BUFFER_SOFT = 2 # MiB
+MEMORY_BUFFER_HARD = 0 # MiB
+MEMORY_BUFFER_SOLVER = 20 # MB
+
+
+def set_memory_limit(mem_limit):
+    """
+    Set memory limit (Virtual Memory Size). 
+    """
+    if mem_limit is not None:
+        soft = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_SOFT), _mib_as_bytes(MEMORY_BUFFER_SOFT))
+        hard = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_HARD), _mib_as_bytes(MEMORY_BUFFER_HARD))
+        if sys.platform != "win32":
+            resource.setrlimit(resource.RLIMIT_AS, (soft, hard)) # limit memory in number of bytes
+        else:
+            warnings.warn("Memory limits using `resource` are not supported on Windows. Skipping hard limit.")
+
+def set_time_limit(time_limit, verbose:bool=False):
+    """
+    Set time limit (CPU time in seconds).
+    """
+    if time_limit is not None:
+        if sys.platform != "win32":
+            soft = time_limit
+            hard = resource.RLIM_INFINITY
+            resource.setrlimit(resource.RLIMIT_CPU, (soft, hard))
+        else:
+            warnings.warn("CPU time limits using `resource` are not supported on Windows. Skipping hard limit.")
+
+def _wall_time(p: psutil.Process):
+    return time.time() - p.create_time()
+
+def _mib_as_bytes(mib: int) -> int:
+    return mib * 1024 * 1024
+
+def _mb_as_bytes(mb: int) -> int:
+    return mb * 1000 * 1000
+
+def _bytes_as_mb(bytes: int) -> int:
+    return bytes // (1000 * 1000)
+
+def _bytes_as_gb(bytes: int) -> int:
+    return bytes // (1000 * 1000 * 1000)
+
+def _bytes_as_mb_float(bytes: int) -> float:
+    return bytes / (1000 * 1000)
+
+def _bytes_as_gb_float(bytes: int) -> float:
+    return bytes / (1000 * 1000 * 1000)
\ No newline at end of file

From 7209c620d6e22660ea98d5074143ef40bb06e16d Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 15:10:58 +0200
Subject: [PATCH 16/46] Print stacktrace of process

---
 cpmpy/tools/benchmark/runner.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py
index 325ac54cd..a83740459 100644
--- a/cpmpy/tools/benchmark/runner.py
+++ b/cpmpy/tools/benchmark/runner.py
@@ -273,15 +273,16 @@ def benchmark_runner(
     with ThreadPoolExecutor(max_workers=workers) as executor:
         # Submit all tasks and track their futures
         futures = [executor.submit(execute_instance,  # below: args
-                                   (instance_runner, filename, metadata, dataset.open(), solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path))
+                                   (instance_runner, filename, metadata, dataset.open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path))
                    for filename, metadata in dataset]
         # Process results as they complete
-        for i,future in enumerate(tqdm(futures, total=len(futures), desc=f"Running {solver}")):
+        for i, future in enumerate(tqdm(futures, total=len(futures), desc=f"Running {solver}")):
             try:
-                _ = future.result(timeout=time_limit+60)  # for cleanliness sake, result is empty
+                _ = future.result(timeout=time_limit + 60)  # for cleanliness sake, result is empty
             except TimeoutError:
                 pass
             except Exception as e:
                 print(f"Job {i}: {dataset[i][1]['name']}, ProcessPoolExecutor caught: {e}")
+                if verbose: traceback.print_exc()
     
     return output_file

From f66c8c554555d178244dacd50ed487d9f3c22401 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 15:21:22 +0200
Subject: [PATCH 17/46] Fix arguments

---
 cpmpy/tools/benchmark/_base.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index 85119a822..c6b4353d9 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -56,14 +56,14 @@ def __init__(self, reader:callable):
         """
         self.reader = reader
         
-    def read_instance(self, instance) -> cp.Model:
+    def read_instance(self, instance, open) -> cp.Model:
         """
         Parse a model instance to a CPMpy model.
 
         Arguments:
             instance (str or os.PathLike): The model instance to parse into a CPMpy model.
         """
-        return self.reader(instance)
+        return self.reader(instance, open=open)
     
     """
     Callback methods which can be overwritten to make a custom benchmark run.
@@ -326,13 +326,13 @@ def solution_count(self):
     Methods which can, bit most likely shouldn't, be overwritten.
     """
     
-    def set_memory_limit(self, mem_limit, verbose=False):
-        set_memory_limit(mem_limit, verbose=verbose)
+    def set_memory_limit(self, mem_limit):
+        set_memory_limit(mem_limit)
 
-    def set_time_limit(self, time_limit, verbose=False):
+    def set_time_limit(self, time_limit):
         p = psutil.Process()
         if time_limit is not None:
-            set_time_limit(int(time_limit - _wall_time(p) + time.process_time()), verbose=verbose)
+            set_time_limit(int(time_limit - _wall_time(p) + time.process_time()))
         else:
             set_time_limit(None)
 
@@ -419,11 +419,11 @@ def run(
 
             # Set memory limit (if provided)
             if mem_limit is not None:
-                self.set_memory_limit(mem_limit, verbose=verbose)
+                self.set_memory_limit(mem_limit)
 
             # Set time limit (if provided)
             if time_limit is not None:
-                self.set_time_limit(time_limit, verbose=verbose) # set remaining process time != wall time
+                self.set_time_limit(time_limit) # set remaining process time != wall time
     
             # ------------------------------ Parse instance ------------------------------ #
 

From 6ab8b32932da152140bd94e168757fa9e4027ad5 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 15:21:48 +0200
Subject: [PATCH 18/46] Fix overwritten open

---
 cpmpy/tools/benchmark/runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py
index a83740459..9d3f4c5f6 100644
--- a/cpmpy/tools/benchmark/runner.py
+++ b/cpmpy/tools/benchmark/runner.py
@@ -107,6 +107,7 @@ def wrapper(instance_runner, conn, kwargs, verbose):
         conn.close()
 
 # exec_args = (instance_runner, filename, metadata, open, solver, time_limit, mem_limit, output_file, verbose) 
+_std_open = open
 def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, int, str, bool, bool, str]) -> None:
     """
     Solve a single benchmark instance and write results to file immediately.
@@ -224,7 +225,7 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i
             # Pre-check if file exists to determine if we need to write header
             write_header = not os.path.exists(output_file)
 
-            with open(output_file, 'a', newline='') as f:
+            with _std_open(output_file, 'a', newline='') as f:
                 writer = csv.DictWriter(f, fieldnames=fieldnames)
                 if write_header:
                     writer.writeheader()

From 34c8a9e75828022003afdbc056068eee14f7078e Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 15:22:12 +0200
Subject: [PATCH 19/46] Read as string instead of StringIO

---
 cpmpy/tools/benchmark/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py
index 9d3f4c5f6..b0edeb655 100644
--- a/cpmpy/tools/benchmark/runner.py
+++ b/cpmpy/tools/benchmark/runner.py
@@ -136,7 +136,7 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i
 
     # Decompress before timers start
     with open(filename) as f:   # <- dataset-specific 'open' callable
-        filename = StringIO(f.read()) # read to memory-mapped file
+        filename = f.read() # read to memory-mapped file
 
     # Start total timing
     total_start = time.time()

From fd55b3a204e4fd8578d88f4316bf7ff49eb74702 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 15:22:32 +0200
Subject: [PATCH 20/46] Read as text instead of binary

---
 cpmpy/tools/dataset/model/mse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py
index 8f395d677..ef31b0d64 100644
--- a/cpmpy/tools/dataset/model/mse.py
+++ b/cpmpy/tools/dataset/model/mse.py
@@ -100,7 +100,7 @@ def download(self):
         zip_path.unlink()
 
     def open(self, instance: os.PathLike) -> callable:
-        return lzma.open if str(instance).endswith(".xz") else open
+        return lzma.open(instance, "rt") if str(instance).endswith(".xz") else open(instance)
 
 if __name__ == "__main__":
     dataset = MSEDataset(year=2024, track="exact-weighted", download=True)

From 2be9fa67820ece6aa1044822a5d88b618faa2f40 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 17:11:27 +0200
Subject: [PATCH 21/46] Sigterm callbacks

---
 cpmpy/tools/benchmark/_base.py  | 47 ++++++++++++++++++++++++++++++++-
 cpmpy/tools/benchmark/mse.py    | 39 ++++++++++++++++++++-------
 cpmpy/tools/benchmark/runner.py | 30 ++++++++++++---------
 3 files changed, 93 insertions(+), 23 deletions(-)

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index c6b4353d9..3522af9e9 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -30,15 +30,25 @@
 
 from abc import ABC
 
+import os
+import signal
+import sys
 import time
 import random
 import psutil
 import warnings
+from enum import Enum
 from typing import Optional
 
 import cpmpy as cp
 from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb
 
+class ExitStatus(Enum):
+    unsupported:str = "unsupported" # instance contains an unsupported feature (e.g. a unsupported global constraint)
+    sat:str = "sat" # CSP : found a solution | COP : found a solution but couldn't prove optimality
+    optimal:str = "optimal" # optimal COP solution found
+    unsat:str = "unsat" # instance is unsatisfiable
+    unknown:str = "unknown" # any other case
 
 class Benchmark(ABC):
     """
@@ -49,12 +59,13 @@ class Benchmark(ABC):
     It is designed to be extended or customized for specific benchmarking needs.    
     """
 
-    def __init__(self, reader:callable):
+    def __init__(self, reader:callable, exit_status:Enum):
         """
         Arguments:
             reader (callable): A parser from a model format to a CPMPy model.
         """
         self.reader = reader
+        self.exit_status = exit_status
         
     def read_instance(self, instance, open) -> cp.Model:
         """
@@ -92,6 +103,12 @@ def handle_exception(self, e):
             if line.strip():
                 self.print_comment(line)
 
+    def handle_sigterm(self):
+        pass
+        
+    def handle_rlimit_cpu(self):
+        pass
+
     """
     Solver arguments (can also be tweaked for a specific benchmark).
     """
@@ -336,6 +353,29 @@ def set_time_limit(self, time_limit):
         else:
             set_time_limit(None)
 
+    def sigterm_handler(self, _signo, _stack_frame):
+        exit_code = self.handle_sigterm()
+        print(flush=True)
+        os._exit(exit_code)
+        
+    def rlimit_cpu_handler(self, _signo, _stack_frame):
+        exit_code = self.handle_rlimit_cpu()
+        print(flush=True)
+        os._exit(exit_code)
+
+    def init_signal_handlers(self):
+        """
+        Configure signal handlers
+        """
+        signal.signal(signal.SIGINT, self.sigterm_handler)
+        signal.signal(signal.SIGTERM, self.sigterm_handler)
+        signal.signal(signal.SIGINT, self.sigterm_handler)
+        signal.signal(signal.SIGABRT, self.sigterm_handler)
+        if sys.platform != "win32":
+            signal.signal(signal.SIGXCPU, self.rlimit_cpu_handler)
+        else:
+            warnings.warn("Windows does not support setting SIGXCPU signal")
+
     def post_model(self, model, solver, solver_args):
         """
         Post the model to the selected backend solver.
@@ -417,6 +457,8 @@ def run(
             if seed is not None:
                 random.seed(seed)
 
+            self.init_signal_handlers()
+
             # Set memory limit (if provided)
             if mem_limit is not None:
                 self.set_memory_limit(mem_limit)
@@ -488,6 +530,9 @@ def run(
         except NotImplementedError as e:
             self.handle_not_implemented(e)
             raise e
+        except TimeoutError as e:
+            self.handle_exception(e) # TODO add callback for timeout?
+            raise e
         except Exception as e:
             self.handle_exception(e)
             raise e
diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py
index a11b1f5cb..3745e4503 100644
--- a/cpmpy/tools/benchmark/mse.py
+++ b/cpmpy/tools/benchmark/mse.py
@@ -56,7 +56,7 @@
 from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
 
 
-class ExitStatus(Enum):
+class MSEExitStatus(Enum):
     unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint)
     sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality
     optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found
@@ -89,12 +89,12 @@ class MSEBenchmark(Benchmark):
     """
 
     def __init__(self):
-        super().__init__(reader=read_wcnf)
+        super().__init__(reader=read_wcnf, exit_status=MSEExitStatus)
     
     def print_comment(self, comment:str):
         print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
 
-    def print_status(self, status: ExitStatus) -> None:
+    def print_status(self, status: MSEExitStatus) -> None:
         print('s' + chr(32) + status.value, end="\n", flush=True)
 
     def print_value(self, value: str) -> None:
@@ -107,27 +107,46 @@ def print_objective(self, objective: int) -> None:
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
             self.print_value(solution_mse(s))
-            self.print_status(ExitStatus.optimal)
+            self.print_status(MSEExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
             self.print_value(solution_mse(s))
-            self.print_status(ExitStatus.sat)
+            self.print_status(MSEExitStatus.sat)
         elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
-            self.print_status(ExitStatus.unsat)
+            self.print_status(MSEExitStatus.unsat)
         else:
             self.print_comment("Solver did not find any solution within the time/memory limit")
-            self.print_status(ExitStatus.unknown)
+            self.print_status(MSEExitStatus.unknown)
 
     def handle_memory_error(self, mem_limit):
         super().handle_memory_error(mem_limit)
-        self.print_status(ExitStatus.unknown)
+        self.print_status(MSEExitStatus.unknown)
 
     def handle_not_implemented(self, e):
         super().handle_not_implemented(e)
-        self.print_status(ExitStatus.unsupported)
+        self.print_status(MSEExitStatus.unsupported)
 
     def handle_exception(self, e):
         super().handle_exception(e)
-        self.print_status(ExitStatus.unknown)
+        self.print_status(MSEExitStatus.unknown)
+
+    
+    def handle_sigterm(self):
+        """
+        Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(MSEExitStatus.unknown)
+        self.print_comment("SIGTERM raised.")
+        return 0
+        
+    def handle_rlimit_cpu(self):
+        """
+        Handles a SIGXCPU.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(MSEExitStatus.unknown)
+        self.print_comment("SIGXCPU raised.")
+        return 0
 
     def parse_output_line(self, line, result):
         if line.startswith('s '):
diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py
index b0edeb655..933dac132 100644
--- a/cpmpy/tools/benchmark/runner.py
+++ b/cpmpy/tools/benchmark/runner.py
@@ -30,8 +30,6 @@
 from filelock import FileLock
 from concurrent.futures import ThreadPoolExecutor
 
-from cpmpy.tools.xcsp3.xcsp3_cpmpy import xcsp3_cpmpy, init_signal_handlers, ExitStatus
-
 class Tee:
     """
     A stream-like object that duplicates writes to multiple underlying streams.
@@ -96,14 +94,22 @@ def wrapper(instance_runner, conn, kwargs, verbose):
         sys.stdout = Tee(original_stdout, pipe_writer) # forward to pipe and console
 
     try:
-        init_signal_handlers() # configure OS signal handlers
         instance_runner.run(**kwargs)
         conn.send({"status": "ok"})
+    except TimeoutError:
+        try:
+            conn.send({"status": "timeout"})
+        except (BrokenPipeError, EOFError):
+            pass
     except Exception as e: # capture exceptions and report in state
         tb_str = traceback.format_exc()
-        conn.send({"status": "error", "exception": e, "traceback": tb_str})
+        try:
+            conn.send({"status": "error", "exception": e, "traceback": tb_str})
+        except (BrokenPipeError, EOFError):
+            pass
+        #conn.send({"status": "error", "exception": e, "traceback": tb_str})
     finally:
-        sys.stdout = original_stdout
+        #sys.stdout = original_stdout
         conn.close()
 
 # exec_args = (instance_runner, filename, metadata, open, solver, time_limit, mem_limit, output_file, verbose) 
@@ -125,7 +131,7 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i
     instance_runner, filename, metadata, open, solver, time_limit, mem_limit, cores, output_file, verbose, intermediate, checker_path = args
 
     # Fieldnames for the CSV file
-    fieldnames = ['instance'] + list(metadata.keys()) + \
+    fieldnames = list(metadata.keys()) + \
                  ['solver',
                   'time_total', 'time_parse', 'time_model', 'time_post', 'time_solve',
                   'status', 'objective_value', 'solution', 'intermediate', 'checker_result']
@@ -194,13 +200,13 @@ def execute_instance(args: Tuple[callable, str, dict, callable, str, int, int, i
             raise()
 
     # Parse the exit status
-    if status["status"] == "error":
+    if status["status"] == "timeout":
         # Ignore timeouts
-        if "TimeoutError" in repr(status["exception"]):
-            pass
-        # All other exceptions, put in solution field
-        elif result['solution'] is None:
-            result['status'] = ExitStatus.unknown.value
+        pass
+    elif status["status"] == "error":
+        # All exceptions, put in solution field
+        if result['solution'] is None:
+            result['status'] = instance_runner.exit_status.unknown.value
             result["solution"] = status["exception"]    
 
     # if checker_path is not None and complete_solution is not None: TODO: generalise 'checkers' for benchmarks

From 2e646231405bd2f410fcfdb47192259b5ebad0c5 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 18:18:02 +0200
Subject: [PATCH 22/46] Attempt at fixing some nested memory exceptions

---
 cpmpy/tools/benchmark/__init__.py | 7 +++++++
 cpmpy/tools/benchmark/_base.py    | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py
index b5a26f62a..54aa8031f 100644
--- a/cpmpy/tools/benchmark/__init__.py
+++ b/cpmpy/tools/benchmark/__init__.py
@@ -13,6 +13,7 @@
 MEMORY_BUFFER_SOLVER = 20 # MB
 
 
+
 def set_memory_limit(mem_limit):
     """
     Set memory limit (Virtual Memory Size). 
@@ -25,6 +26,12 @@ def set_memory_limit(mem_limit):
         else:
             warnings.warn("Memory limits using `resource` are not supported on Windows. Skipping hard limit.")
 
+def disable_memory_limit():
+    if sys.platform != "win32":
+        soft, hard = resource.getrlimit(resource.RLIMIT_AS)
+        # set a very high soft limit
+        resource.setrlimit(resource.RLIMIT_AS, (hard, hard))
+
 def set_time_limit(time_limit, verbose:bool=False):
     """
     Set time limit (CPU time in seconds).
diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index 3522af9e9..2e81505e1 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -41,7 +41,7 @@
 from typing import Optional
 
 import cpmpy as cp
-from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb
+from cpmpy.tools.benchmark import _mib_as_bytes, _wall_time, set_memory_limit, set_time_limit, _bytes_as_mb, _bytes_as_gb, disable_memory_limit
 
 class ExitStatus(Enum):
     unsupported:str = "unsupported" # instance contains an unsupported feature (e.g. a unsupported global constraint)
@@ -525,6 +525,7 @@ def run(
 
             
         except MemoryError as e:
+            disable_memory_limit()
             self.handle_memory_error(mem_limit)
             raise e
         except NotImplementedError as e:

From 5b926807300e0d196197bcded7685085bd73cf4c Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 18:18:52 +0200
Subject: [PATCH 23/46] Overwritable exit status

---
 cpmpy/tools/benchmark/mse.py   |  1 +
 cpmpy/tools/benchmark/opb.py   | 39 +++++++++++++++++++++++++---------
 cpmpy/tools/benchmark/xcsp3.py | 25 +++++++++++-----------
 3 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py
index 3745e4503..b7d645369 100644
--- a/cpmpy/tools/benchmark/mse.py
+++ b/cpmpy/tools/benchmark/mse.py
@@ -31,6 +31,7 @@
 .. autosummary::
     :nosignatures:
 
+    MSEExitStatus
     MSEBenchmark
 
 =================
diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py
index 9d669a075..905d7ab0e 100644
--- a/cpmpy/tools/benchmark/opb.py
+++ b/cpmpy/tools/benchmark/opb.py
@@ -31,6 +31,7 @@
 .. autosummary::
     :nosignatures:
 
+    OPBExitStatus
     OPBBenchmark
 
 =================
@@ -56,7 +57,7 @@
 from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
 
 
-class ExitStatus(Enum):
+class OPBExitStatus(Enum):
     unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint)
     sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality
     optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found
@@ -82,12 +83,12 @@ class OPBBenchmark(Benchmark):
     """
 
     def __init__(self):
-        super().__init__(reader=read_opb)
+        super().__init__(reader=read_opb, exit_status=OPBExitStatus)
     
     def print_comment(self, comment:str):
         print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
 
-    def print_status(self, status: ExitStatus) -> None:
+    def print_status(self, status: OPBExitStatus) -> None:
         print('s' + chr(32) + status.value, end="\n", flush=True)
 
     def print_value(self, value: str) -> None:
@@ -101,27 +102,45 @@ def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
             self.print_result()
             self.print_value(solution_opb(s))
-            self.print_status(ExitStatus.optimal)
+            self.print_status(OPBExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
             self.print_value(solution_opb(s))
-            self.print_status(ExitStatus.sat)
+            self.print_status(OPBExitStatus.sat)
         elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
-            self.print_status(ExitStatus.unsat)
+            self.print_status(OPBExitStatus.unsat)
         else:
             self.print_comment("Solver did not find any solution within the time/memory limit")
-            self.print_status(ExitStatus.unknown)
+            self.print_status(OPBExitStatus.unknown)
 
     def handle_memory_error(self, mem_limit):
         super().handle_memory_error(mem_limit)
-        self.print_status(ExitStatus.unknown)
+        self.print_status(OPBExitStatus.unknown)
 
     def handle_not_implemented(self, e):
         super().handle_not_implemented(e)
-        self.print_status(ExitStatus.unsupported)
+        self.print_status(OPBExitStatus.unsupported)
 
     def handle_exception(self, e):
         super().handle_exception(e)
-        self.print_status(ExitStatus.unknown)
+        self.print_status(OPBExitStatus.unknown)
+
+    def handle_sigterm(self):
+        """
+        Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(OPBExitStatus.unknown)
+        self.print_comment("SIGTERM raised.")
+        return 0
+        
+    def handle_rlimit_cpu(self):
+        """
+        Handles a SIGXCPU.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(OPBExitStatus.unknown)
+        self.print_comment("SIGXCPU raised.")
+        return 0
 
     def parse_output_line(self, line, result):
         if line.startswith('s '):
diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py
index 9601a4530..9dd9849fe 100644
--- a/cpmpy/tools/benchmark/xcsp3.py
+++ b/cpmpy/tools/benchmark/xcsp3.py
@@ -31,6 +31,7 @@
 .. autosummary::
     :nosignatures:
 
+    XCSP3ExitStatus
     XCSP3Benchmark
 
 =================
@@ -60,7 +61,7 @@
 import xml.etree.cElementTree as ET
 
 
-class ExitStatus(Enum):
+class XCSP3ExitStatus(Enum):
     unsupported:str = "UNSUPPORTED" # instance contains an unsupported feature (e.g. a unsupported global constraint)
     sat:str = "SATISFIABLE" # CSP : found a solution | COP : found a solution but couldn't prove optimality
     optimal:str = "OPTIMUM" + chr(32) + "FOUND" # optimal COP solution found
@@ -120,12 +121,12 @@ class XCSP3Benchmark(Benchmark):
     """
 
     def __init__(self):
-        super().__init__(reader=read_xcsp3)
+        super().__init__(reader=read_xcsp3, exit_status=XCSP3ExitStatus)
     
     def print_comment(self, comment:str):
         print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
 
-    def print_status(self, status: ExitStatus) -> None:
+    def print_status(self, status: XCSP3ExitStatus) -> None:
         print('s' + chr(32) + status.value, end="\n", flush=True)
 
     def print_value(self, value: str) -> None:
@@ -139,35 +140,35 @@ def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
             self.print_result()
             self.print_value(solution_xcsp3(s))
-            self.print_status(ExitStatus.optimal)
+            self.print_status(XCSP3ExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
             self.print_value(solution_xcsp3(s))
-            self.print_status(ExitStatus.sat)
+            self.print_status(XCSP3ExitStatus.sat)
         elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
-            self.print_status(ExitStatus.unsat)
+            self.print_status(XCSP3ExitStatus.unsat)
         else:
             self.print_comment("Solver did not find any solution within the time/memory limit")
-            self.print_status(ExitStatus.unknown)
+            self.print_status(XCSP3ExitStatus.unknown)
 
     def handle_memory_error(self, mem_limit):
         super().handle_memory_error(mem_limit)
-        self.print_status(ExitStatus.unknown)
+        self.print_status(XCSP3ExitStatus.unknown)
 
     def handle_not_implemented(self, e):
         super().handle_not_implemented(e)
-        self.print_status(ExitStatus.unsupported)
+        self.print_status(XCSP3ExitStatus.unsupported)
 
     def handle_exception(self, e):
         if isinstance(e, ParseError):
             if "out of memory" in e.msg:
                 self.print_comment(f"MemoryError raised by parser.")
-                self.print_status(ExitStatus.unknown)
+                self.print_status(XCSP3ExitStatus.unknown)
             else:
                 self.print_comment(f"An {type(e)} got raised by the parser: {e}")
-                self.print_status(ExitStatus.unknown)
+                self.print_status(XCSP3ExitStatus.unknown)
         else:
             super().handle_exception(e)
-            self.print_status(ExitStatus.unknown)
+            self.print_status(XCSP3ExitStatus.unknown)
 
     def parse_output_line(self, line, result):
         if line.startswith('s '):

From 8fff25480e8bfdb9f0b7d787d26b4c143fb1fdbd Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 18:19:11 +0200
Subject: [PATCH 24/46] Validate dataset arguments

---
 cpmpy/tools/dataset/model/mse.py   | 6 ++++++
 cpmpy/tools/dataset/model/opb.py   | 6 ++++++
 cpmpy/tools/dataset/model/xcsp3.py | 6 ++++++
 3 files changed, 18 insertions(+)

diff --git a/cpmpy/tools/dataset/model/mse.py b/cpmpy/tools/dataset/model/mse.py
index ef31b0d64..3ddfebf35 100644
--- a/cpmpy/tools/dataset/model/mse.py
+++ b/cpmpy/tools/dataset/model/mse.py
@@ -55,6 +55,12 @@ def __init__(
         self.year = year
         self.track = track
 
+        # Check requested dataset
+        if not str(year).startswith('20'):
+            raise ValueError("Year must start with '20'")
+        if not track:
+            raise ValueError("Track must be specified, e.g. OPT-LIN, DEC-LIN, ...")
+
         dataset_dir = self.root / str(year) / track
 
         super().__init__(
diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py
index 40e6a282d..0915c6509 100644
--- a/cpmpy/tools/dataset/model/opb.py
+++ b/cpmpy/tools/dataset/model/opb.py
@@ -55,6 +55,12 @@ def __init__(
         self.year = year
         self.track = track
 
+        # Check requested dataset
+        if not str(year).startswith('20'):
+            raise ValueError("Year must start with '20'")
+        if not track:
+            raise ValueError("Track must be specified, e.g. exact-weighted, exact-unweighted, ...")
+
         dataset_dir = self.root / str(year) / track
 
         super().__init__(
diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py
index 597a2af55..21b38f35e 100644
--- a/cpmpy/tools/dataset/model/xcsp3.py
+++ b/cpmpy/tools/dataset/model/xcsp3.py
@@ -56,6 +56,12 @@ def __init__(
         self.year = year
         self.track = track
 
+        # Check requested dataset
+        if not str(year).startswith('20'):
+            raise ValueError("Year must start with '20'")
+        if not track:
+            raise ValueError("Track must be specified, e.g. COP, CSP, ...")
+
         dataset_dir = self.root / str(year) / track
 
         super().__init__(

From 2b4a8f02daa648e48d2e806b2c6ce98832323237 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 18:19:25 +0200
Subject: [PATCH 25/46] Check non-empty dataset

---
 cpmpy/tools/dataset/_base.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py
index aa22ae930..a8954aa9f 100644
--- a/cpmpy/tools/dataset/_base.py
+++ b/cpmpy/tools/dataset/_base.py
@@ -36,6 +36,10 @@ def __init__(
                 raise ValueError(f"Dataset not found. Please set download=True to download the dataset.")
             else:
                 self.download()
+
+        files = sorted(list(self.dataset_dir.glob(f"*{self.extension}")))
+        if len(files) == 0:
+            raise ValueError("Cannot find any instances inside dataset. Is it a valid dataset? If so, please report on GitHub.")
                 
     @abstractmethod
     def category(self) -> dict:

From b68144d160c28f0da421e5f9b986b7492aed9716 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 18:24:13 +0200
Subject: [PATCH 26/46] Add feedback finished downloading

---
 cpmpy/tools/dataset/_base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpmpy/tools/dataset/_base.py b/cpmpy/tools/dataset/_base.py
index a8954aa9f..496780b2d 100644
--- a/cpmpy/tools/dataset/_base.py
+++ b/cpmpy/tools/dataset/_base.py
@@ -36,6 +36,8 @@ def __init__(
                 raise ValueError(f"Dataset not found. Please set download=True to download the dataset.")
             else:
                 self.download()
+                files = sorted(list(self.dataset_dir.glob(f"*{self.extension}")))
+                print(f"Finished downloading {len(files)} instances")
 
         files = sorted(list(self.dataset_dir.glob(f"*{self.extension}")))
         if len(files) == 0:

From b08df43dba1e034e7fb88d98ad624161faf534ee Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 12 Sep 2025 18:47:43 +0200
Subject: [PATCH 27/46] Small fixes

---
 cpmpy/tools/benchmark/opb.py       | 1 -
 cpmpy/tools/benchmark/xcsp3.py     | 3 +--
 cpmpy/tools/dataset/model/opb.py   | 2 +-
 cpmpy/tools/dataset/model/xcsp3.py | 2 +-
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py
index 905d7ab0e..5c1e0f606 100644
--- a/cpmpy/tools/benchmark/opb.py
+++ b/cpmpy/tools/benchmark/opb.py
@@ -100,7 +100,6 @@ def print_objective(self, objective: int) -> None:
 
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
-            self.print_result()
             self.print_value(solution_opb(s))
             self.print_status(OPBExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py
index 9dd9849fe..bb2f02410 100644
--- a/cpmpy/tools/benchmark/xcsp3.py
+++ b/cpmpy/tools/benchmark/xcsp3.py
@@ -138,7 +138,6 @@ def print_objective(self, objective: int) -> None:
 
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
-            self.print_result()
             self.print_value(solution_xcsp3(s))
             self.print_status(XCSP3ExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
@@ -176,7 +175,7 @@ def parse_output_line(self, line, result):
         elif line.startswith('v ') and result['solution'] is None:
             # only record first line, contains 'type' and 'cost'
             solution = line.split("\n")[0][2:].strip()
-            result['solution'] = str(solution)
+            result['solution'] = solution
             complete_solution = line
             if "cost" in solution:
                 result['objective_value'] = solution.split('cost="')[-1][:-2]
diff --git a/cpmpy/tools/dataset/model/opb.py b/cpmpy/tools/dataset/model/opb.py
index 0915c6509..201075749 100644
--- a/cpmpy/tools/dataset/model/opb.py
+++ b/cpmpy/tools/dataset/model/opb.py
@@ -139,7 +139,7 @@ def download(self):
         tar_path.unlink()
 
     def open(self, instance: os.PathLike) -> callable:
-        return lzma.open if str(instance).endswith(".xz") else open
+        return lzma.open(instance, 'rt') if str(instance).endswith(".xz") else open(instance)
 
 if __name__ == "__main__":
     dataset = OPBDataset(year=2024, track="DEC-LIN", download=True)
diff --git a/cpmpy/tools/dataset/model/xcsp3.py b/cpmpy/tools/dataset/model/xcsp3.py
index 21b38f35e..f17a4d193 100644
--- a/cpmpy/tools/dataset/model/xcsp3.py
+++ b/cpmpy/tools/dataset/model/xcsp3.py
@@ -129,7 +129,7 @@ def download(self):
         zip_path.unlink()
 
     def open(self, instance: os.PathLike) -> callable:
-        return partial(lzma.open, mode='rt', encoding='utf-8') if str(instance).endswith(".lzma") else open
+        return lzma.open(instance, mode='rt', encoding='utf-8') if str(instance).endswith(".lzma") else open(instance)
 
 
 if __name__ == "__main__":

From 431b065609b3772dfa0bff4aa665f1d49d903548 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 10 Oct 2025 13:39:34 +0200
Subject: [PATCH 28/46] Fix intermediate solutions and time tracking

---
 cpmpy/tools/benchmark/_base.py  |  2 +-
 cpmpy/tools/benchmark/mse.py    | 14 ++++++++------
 cpmpy/tools/benchmark/opb.py    |  3 +++
 cpmpy/tools/benchmark/runner.py |  1 +
 cpmpy/tools/benchmark/xcsp3.py  |  3 +++
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index 2e81505e1..8055d43a6 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -84,7 +84,7 @@ def print_comment(self, comment:str):
         print(comment)
 
     def print_intermediate(self, objective:int):
-        print("Intermediate solution:", objective)
+        self.print_comment("Intermediate solution:", objective)
 
     def print_result(self, s):
         self.print_comment(s.status())
diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py
index b7d645369..656467bf9 100644
--- a/cpmpy/tools/benchmark/mse.py
+++ b/cpmpy/tools/benchmark/mse.py
@@ -99,11 +99,13 @@ def print_status(self, status: MSEExitStatus) -> None:
         print('s' + chr(32) + status.value, end="\n", flush=True)
 
     def print_value(self, value: str) -> None:
-        value = value[:-2].replace("\n", "\nv" + chr(32)) + value[-2:]
         print('v' + chr(32) + value, end="\n", flush=True)
 
     def print_objective(self, objective: int) -> None:
         print('o' + chr(32) + str(objective), end="\n", flush=True)
+    
+    def print_intermediate(self, objective:int):
+        self.print_objective(objective)
 
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
@@ -159,17 +161,17 @@ def parse_output_line(self, line, result):
                 result['solution'] = solution
             else:
                 result['solution'] = result['solution'] + ' ' + str(solution)
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            self._sol_time = float(parts[-1].replace('s', '').rstrip())
         elif line.startswith('o '):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(sol_time, obj)]
+            result['intermediate'] += [(self._sol_time, obj)]
             result['objective_value'] = obj
             obj = None
-        elif line.startswith('c Solution'):
-            parts = line.split(', time = ')
-            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
-            sol_time = float(parts[-1].replace('s', '').rstrip())
         elif line.startswith('c took '):
             # Parse timing information
             parts = line.split(' seconds to ')
diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py
index 5c1e0f606..b92fcb257 100644
--- a/cpmpy/tools/benchmark/opb.py
+++ b/cpmpy/tools/benchmark/opb.py
@@ -98,6 +98,9 @@ def print_value(self, value: str) -> None:
     def print_objective(self, objective: int) -> None:
         print('o' + chr(32) + str(objective), end="\n", flush=True)
 
+    def print_intermediate(self, objective:int):
+        self.print_objective(objective)
+
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
             self.print_value(solution_opb(s))
diff --git a/cpmpy/tools/benchmark/runner.py b/cpmpy/tools/benchmark/runner.py
index 933dac132..6bc85e6ae 100644
--- a/cpmpy/tools/benchmark/runner.py
+++ b/cpmpy/tools/benchmark/runner.py
@@ -94,6 +94,7 @@ def wrapper(instance_runner, conn, kwargs, verbose):
         sys.stdout = Tee(original_stdout, pipe_writer) # forward to pipe and console
 
     try:
+        kwargs["verbose"] = verbose
         instance_runner.run(**kwargs)
         conn.send({"status": "ok"})
     except TimeoutError:
diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py
index bb2f02410..47d0289e4 100644
--- a/cpmpy/tools/benchmark/xcsp3.py
+++ b/cpmpy/tools/benchmark/xcsp3.py
@@ -136,6 +136,9 @@ def print_value(self, value: str) -> None:
     def print_objective(self, objective: int) -> None:
         print('o' + chr(32) + str(objective), end="\n", flush=True)
 
+    def print_intermediate(self, objective:int):
+        self.print_objective(objective)
+
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
             self.print_value(solution_xcsp3(s))

From 7d98c354f8668ca61b1ce9950564d22fa0cf66fe Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 10 Oct 2025 13:39:54 +0200
Subject: [PATCH 29/46] Increase intermediate solution time resolution

---
 cpmpy/tools/benchmark/_base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index 8055d43a6..b7171c6f6 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -156,7 +156,7 @@ def on_solution_callback(self):
                     
                     current_time = time.time()
                     obj = int(self.ObjectiveValue())
-                    _self.print_comment('Solution %i, time = %0.2fs' % 
+                    _self.print_comment('Solution %i, time = %0.4fs' % 
                                 (self.__solution_count, current_time - self.__start_time))
                     _self.print_intermediate(obj)
                     self.__solution_count += 1
@@ -286,7 +286,7 @@ def callback(self, *args, **kwargs):
                         if model.cbGet(GRB.Callback.MIP_SOLCNT) > self.__solution_count: # do we have a new solution?
 
                             obj = int(model.cbGet(GRB.Callback.MIP_OBJBST))
-                            _self.print_comment('Solution %i, time = %0.2fs' % 
+                            _self.print_comment('Solution %i, time = %0.4fs' % 
                                         (self.__solution_count, current_time - self.__start_time))
                             _self.print_intermediate(obj)
                             self.__solution_count = model.cbGet(GRB.Callback.MIP_SOLCNT)
@@ -324,7 +324,7 @@ def result_found(self, solver, sres):
                     current_time = time.time()
                     obj = sres.get_objective_value()
                     if obj is not None:
-                        _self.print_comment('Solution %i, time = %0.2fs' % 
+                        _self.print_comment('Solution %i, time = %0.4fs' % 
                                     (self.__solution_count, current_time - self.__start_time))
                         _self.print_intermediate(obj)
                         self.__solution_count += 1
@@ -472,7 +472,7 @@ def run(
             time_parse = time.time()
             model = self.read_instance(instance, open=open)
             time_parse = time.time() - time_parse
-            if verbose: self.print_comment(f"took {time_parse:.4f} seconds to parse model [{instance}]")
+            if verbose: self.print_comment(f"took {time_parse:.4f} seconds to parse model")
 
             if time_limit and time_limit < _wall_time(p):
                 raise TimeoutError("Time's up after parse")

From 4664051472c3a9e59bb2c7769592737d614329ff Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 10 Oct 2025 15:26:44 +0200
Subject: [PATCH 30/46] Missing default return argument

---
 cpmpy/tools/benchmark/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index b7171c6f6..11d17ed42 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -421,7 +421,7 @@ def solver_arguments(
             return self.cpo_arguments(model=model, cores=cores, seed=seed, intermediate=intermediate, **kwargs)
         else:
             self.print_comment(f"setting parameters of {solver} is not (yet) supported")
-            return dict()
+            return dict(), None
 
     def run(
         self,

From 582fc963e2a5eb6e5189c32ed23f6584fa08d670 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 17 Oct 2025 09:10:31 +0200
Subject: [PATCH 31/46] Only import "resource" when supported

---
 cpmpy/tools/benchmark/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cpmpy/tools/benchmark/__init__.py b/cpmpy/tools/benchmark/__init__.py
index 54aa8031f..ce383c1de 100644
--- a/cpmpy/tools/benchmark/__init__.py
+++ b/cpmpy/tools/benchmark/__init__.py
@@ -1,5 +1,3 @@
-
-import resource
 import sys
 import time
 import warnings
@@ -22,12 +20,14 @@ def set_memory_limit(mem_limit):
         soft = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_SOFT), _mib_as_bytes(MEMORY_BUFFER_SOFT))
         hard = max(_mib_as_bytes(mem_limit) - _mib_as_bytes(MEMORY_BUFFER_HARD), _mib_as_bytes(MEMORY_BUFFER_HARD))
         if sys.platform != "win32":
+            import resource
             resource.setrlimit(resource.RLIMIT_AS, (soft, hard)) # limit memory in number of bytes
         else:
             warnings.warn("Memory limits using `resource` are not supported on Windows. Skipping hard limit.")
 
 def disable_memory_limit():
     if sys.platform != "win32":
+        import resource
         soft, hard = resource.getrlimit(resource.RLIMIT_AS)
         # set a very high soft limit
         resource.setrlimit(resource.RLIMIT_AS, (hard, hard))
@@ -38,6 +38,7 @@ def set_time_limit(time_limit, verbose:bool=False):
     """
     if time_limit is not None:
         if sys.platform != "win32":
+            import resource
             soft = time_limit
             hard = resource.RLIM_INFINITY
             resource.setrlimit(resource.RLIMIT_CPU, (soft, hard))

From 2eea41c23d6bd54db0dc1d5aa399a3da8920354d Mon Sep 17 00:00:00 2001
From: OrestisLomis <orestis.lomisg7@gmail.com>
Date: Thu, 23 Oct 2025 17:49:01 +0200
Subject: [PATCH 32/46] remove var x0 which is not used in opb

---
 cpmpy/tools/opb/parser.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cpmpy/tools/opb/parser.py b/cpmpy/tools/opb/parser.py
index e300a2752..f63db7c7d 100644
--- a/cpmpy/tools/opb/parser.py
+++ b/cpmpy/tools/opb/parser.py
@@ -66,10 +66,10 @@ def _parse_term(line, vars):
 
         for v in vars_str.split():
             if v.startswith("~x"):
-                idx = int(v[2:]) # remove "~x"
+                idx = int(v[2:]) - 1 # remove "~x" and opb is 1-based indexing
                 factors.append(~vars[idx])
             else:
-                idx = int(v[1:]) # remove "x"
+                idx = int(v[1:]) - 1 # remove "x" and opb is 1-based indexing
                 factors.append(vars[idx])
         
         term = int(w) * reduce(mul, factors, 1) # create weighted term
@@ -162,13 +162,15 @@ def read_opb(opb: Union[str, os.PathLike], open=open) -> cp.Model:
         header = HEADER_RE.match(_line)
         if not header:
             raise ValueError(f"Missing or incorrect header: \n0: {line}1: {_line}2: ...")
-    nr_vars = int(header.group(2)) + 1
+    nr_vars = int(header.group(2))
 
     # Generator without comment lines
     reader = (l for l in map(str.strip, f) if l and l[0] != '*')
 
     # CPMpy objects
     vars = cp.boolvar(shape=nr_vars, name="x")
+    if nr_vars == 1:
+        vars = cp.cpm_array([vars]) # ensure vars is indexable even for single variable case
     model = cp.Model()
     
     # Special case for first line -> might contain objective function

From 6111fc43707d6455ce56f0821458638241c3724b Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 24 Oct 2025 15:25:26 +0200
Subject: [PATCH 33/46] rcpsp dataset and benchmark

---
 cpmpy/tools/benchmark/_base.py        |   2 +-
 cpmpy/tools/benchmark/psplib.py       | 213 ++++++++++++++++++++++++++
 cpmpy/tools/dataset/problem/psplib.py | 119 ++++++++++++++
 cpmpy/tools/rcpsp/__init__.py         |  20 +++
 cpmpy/tools/rcpsp/parser.py           | 171 +++++++++++++++++++++
 5 files changed, 524 insertions(+), 1 deletion(-)
 create mode 100644 cpmpy/tools/benchmark/psplib.py
 create mode 100644 cpmpy/tools/dataset/problem/psplib.py
 create mode 100644 cpmpy/tools/rcpsp/__init__.py
 create mode 100644 cpmpy/tools/rcpsp/parser.py

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index 11d17ed42..4e718bc45 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -59,7 +59,7 @@ class Benchmark(ABC):
     It is designed to be extended or customized for specific benchmarking needs.    
     """
 
-    def __init__(self, reader:callable, exit_status:Enum):
+    def __init__(self, reader:callable, exit_status:Enum=ExitStatus):
         """
         Arguments:
             reader (callable): A parser from a model format to a CPMPy model.
diff --git a/cpmpy/tools/benchmark/psplib.py b/cpmpy/tools/benchmark/psplib.py
new file mode 100644
index 000000000..4fab0c99c
--- /dev/null
+++ b/cpmpy/tools/benchmark/psplib.py
@@ -0,0 +1,213 @@
+"""
+PSPLIB as a CPMpy benchmark
+
+This module provides a benchmarking framework for running CPMpy on PSPLIB 
+instances.
+
+Command-line Interface
+----------------------
+This script can be run directly to benchmark solvers on PSPLIB datasets.
+
+Usage:
+    python psplib.py --year 2024 --track exact-weighted --solver ortools
+
+Arguments:
+    --variant       Problem variant (e.g., rcpsp).
+    --family        Problem family (e.g., j30, j120, ...)
+    --solver        Solver name (e.g., ortools, exact, choco, ...).
+    --workers       Number of parallel workers to use.
+    --time-limit    Time limit in seconds per instance.
+    --mem-limit     Memory limit in MB per instance.
+    --cores         Number of cores to assign to a single instance.
+    --output-dir    Output directory for CSV files.
+    --verbose       Show solver output if set.
+    --intermediate  Report intermediate solutions if supported.
+
+===============
+List of classes
+===============
+
+.. autosummary::
+    :nosignatures:
+
+    MSEExitStatus
+    MSEBenchmark
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    solution_mse
+"""
+
+import warnings
+import argparse
+from enum import Enum
+from pathlib import Path
+from datetime import datetime
+
+# CPMpy
+from cpmpy.tools.benchmark.runner import benchmark_runner
+from cpmpy.tools.benchmark._base import Benchmark, ExitStatus
+from cpmpy.tools.rcpsp import read_rcpsp
+from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
+
+
+def solution_psplib(model):
+    """
+    Convert a CPMpy model solution into the solution string format.
+
+    Arguments:
+        model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution
+
+    Returns:
+        str: formatted solution string.
+    """
+    variables = {var.name: var.value() for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace
+    return str(variables)
+
+class PSPLIBBenchmark(Benchmark):
+
+    """
+    PSPLIB as a CPMpy benchmark.
+    """
+
+    def __init__(self):
+        super().__init__(reader=read_rcpsp) # TODO: reader should depend on problem variant
+    
+    def print_comment(self, comment:str):
+        print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
+
+    def print_status(self, status: ExitStatus) -> None:
+        print('s' + chr(32) + status.value, end="\n", flush=True)
+
+    def print_value(self, value: str) -> None:
+        print('v' + chr(32) + value, end="\n", flush=True)
+
+    def print_objective(self, objective: int) -> None:
+        print('o' + chr(32) + str(objective), end="\n", flush=True)
+    
+    def print_intermediate(self, objective:int):
+        self.print_objective(objective)
+
+    def print_result(self, s):
+        if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_value(solution_psplib(s))
+            self.print_status(ExitStatus.optimal)
+        elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_value(solution_psplib(s))
+            self.print_status(ExitStatus.sat)
+        elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
+            self.print_status(ExitStatus.unsat)
+        else:
+            self.print_comment("Solver did not find any solution within the time/memory limit")
+            self.print_status(ExitStatus.unknown)
+
+    def handle_memory_error(self, mem_limit):
+        super().handle_memory_error(mem_limit)
+        self.print_status(ExitStatus.unknown)
+
+    def handle_not_implemented(self, e):
+        super().handle_not_implemented(e)
+        self.print_status(ExitStatus.unsupported)
+
+    def handle_exception(self, e):
+        super().handle_exception(e)
+        self.print_status(ExitStatus.unknown)
+
+    
+    def handle_sigterm(self):
+        """
+        Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(ExitStatus.unknown)
+        self.print_comment("SIGTERM raised.")
+        return 0
+        
+    def handle_rlimit_cpu(self):
+        """
+        Handles a SIGXCPU.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(ExitStatus.unknown)
+        self.print_comment("SIGXCPU raised.")
+        return 0
+
+    def parse_output_line(self, line, result):
+        if line.startswith('s '):
+            result['status'] = line[2:].strip()
+        elif line.startswith('v '):
+            # only record first line, contains 'type' and 'cost'
+            solution = line.split("\n")[0][2:].strip()
+            if solution not in result:
+                result['solution'] = solution
+            else:
+                result['solution'] = result['solution'] + ' ' + str(solution)
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            self.sol_time = float(parts[-1].replace('s', '').rstrip())
+        elif line.startswith('o '):
+            obj = int(line[2:].strip())
+            if result['intermediate'] is None:
+                result['intermediate'] = []
+            result['intermediate'] += [(self.sol_time, obj)]
+            result['objective_value'] = obj
+            obj = None
+        elif line.startswith('c took '):
+            # Parse timing information
+            parts = line.split(' seconds to ')
+            if len(parts) == 2:
+                time_val = float(parts[0].replace('c took ', ''))
+                action = parts[1].strip()
+                if action.startswith('parse'):
+                    result['time_parse'] = time_val
+                elif action.startswith('convert'):
+                    result['time_model'] = time_val
+                elif action.startswith('post'):
+                    result['time_post'] = time_val
+                elif action.startswith('solve'):
+                    result['time_solve'] = time_val
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Benchmark solvers on PSPLIB instances')
+    parser.add_argument('--variant', type=str, required=True, help='Problem variant (e.g., rcpsp)')
+    parser.add_argument('--family', type=str, required=True, help='Problem family (e.g., j30, j120, ...)')
+    parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)')
+    parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers')
+    parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance')
+    parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance')
+    parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance')
+    parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files')
+    parser.add_argument('--verbose', action='store_true', help='Show solver output')
+    parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions')
+    # parser.add_argument('--checker-path', type=str, default=None,
+    #                 help='Path to the XCSP3 solution checker JAR file')
+    args = parser.parse_args()
+
+    if not args.verbose:
+        warnings.filterwarnings("ignore")
+    
+    # Load benchmark instances (as a dataset)
+    from cpmpy.tools.dataset.problem.psplib import PSPLibDataset
+    dataset = PSPLibDataset(variant=args.variant, family=args.family, download=True)
+    
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Get current timestamp in a filename-safe format
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    # Define output file path with timestamp
+    output_file = str(output_dir / "psplib" / f"psplib_{args.variant}_{args.family}_{args.solver}_{timestamp}.csv")
+
+    # Run the benchmark
+    instance_runner = PSPLIBBenchmark()
+    output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args))
+    print(f"Results added to {output_file}")
diff --git a/cpmpy/tools/dataset/problem/psplib.py b/cpmpy/tools/dataset/problem/psplib.py
new file mode 100644
index 000000000..b1cbf70f6
--- /dev/null
+++ b/cpmpy/tools/dataset/problem/psplib.py
@@ -0,0 +1,119 @@
+import os
+import pathlib
+from typing import Tuple, Any
+from urllib.request import urlretrieve
+from urllib.error import HTTPError, URLError
+import zipfile
+
+class PSPLibDataset(object):  # torch.utils.data.Dataset compatible
+
+    """
+    PSPlib Dataset in a PyTorch compatible format.
+    
+    Arguments:
+        root (str): Root directory containing the psplib instances (if 'download', instances will be downloaded to this location)
+        variant (str): scheduling variant (only 'rcpsp' is supported for now)
+        family (str): family name (e.g. j30, j60, etc...)
+        transform (callable, optional): Optional transform to be applied on the instance data
+        target_transform (callable, optional): Optional transform to be applied on the file path
+        download (bool): If True, downloads the dataset from the internet and puts it in `root` directory
+    """
+    
+    def __init__(self, root: str = ".", variant: str = "rcpsp", family: str = "j30", transform=None, target_transform=None, download: bool = False):
+        """
+        Initialize the PSPLib Dataset.
+        """
+        
+        self.root = pathlib.Path(root)
+        self.variant = variant
+        self.family = family
+        self.transform = transform
+        self.target_transform = target_transform
+        self.family_dir = pathlib.Path(os.path.join(self.root, variant, family))
+        
+        self.families = dict(
+            rcpsp = ["j30", "j60", "j90", "j120"]
+        )
+        self.family_codes = dict(rcpsp="sm", mrcpsp="mm")
+
+        if variant != "rcpsp":
+            raise ValueError("Only 'rcpsp' variant is supported for now")
+        if family not in self.families[variant]:
+            raise ValueError(f"Unknown problem family. Must be any of {','.join(self.families[variant])}")
+        # Create root directory if it doesn't exist
+        self.root.mkdir(parents=True, exist_ok=True)
+        
+        if not self.family_dir.exists():
+            if not download:
+                raise ValueError(f"Dataset for variant {variant} and family {family} not found. Please set download=True to download the dataset.")
+            else:
+                print(f"Downloading PSPLib {variant} {family} instances...")
+                
+                zip_name = f"{family}.{self.family_codes[variant]}.zip"
+                url = f"https://www.om-db.wi.tum.de/psplib/files/"
+
+                url_path = url + zip_name
+                zip_path = self.root / zip_name
+                
+                try:
+                    urlretrieve(url_path, str(zip_path))
+                except (HTTPError, URLError) as e:
+                    raise ValueError(f"No dataset available for variant {variant} and family {family}. Error: {str(e)}")
+                
+                # make directory and extract files
+                with zipfile.ZipFile(zip_path, 'r') as zip_ref:                    
+                    # Create track folder in root directory, parents=True ensures recursive creation
+                    self.family_dir.mkdir(parents=True, exist_ok=True)
+                    
+                    # Extract files
+                    for file_info in zip_ref.infolist():
+                        # Extract file to family_dir, removing main_folder/track prefix
+                        filename = pathlib.Path(file_info.filename).name
+                        with zip_ref.open(file_info) as source, open(self.family_dir / filename, 'wb') as target:
+                            target.write(source.read())
+                # Clean up the zip file
+                zip_path.unlink()
+            
+    def open(self, instance: os.PathLike) -> callable:
+        return open(instance, "r")
+
+        
+    def __len__(self) -> int:
+        """Return the total number of instances."""
+        return len(list(self.family_dir.glob(f"*.{self.family_codes[self.variant]}")))
+    
+    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+        """
+        Get a single RCPSP instance filename and metadata.
+
+        Args:
+            index (int): Index of the instance to retrieve
+            
+        Returns:
+            Tuple[Any, Any]: A tuple containing:
+                - The filename of the instance
+                - Metadata dictionary with file name, track, year etc.
+        """
+        if index < 0 or index >= len(self):
+            raise IndexError("Index out of range")
+
+        # Get all instance files and sort for deterministic behavior # TODO: use natsort instead?
+        files = sorted(list(self.family_dir.glob(f"*.{self.family_codes[self.variant]}")))
+        file_path = files[index]
+
+        filename = str(file_path)
+        if self.transform:
+            # does not need to remain a filename...
+            filename = self.transform(filename)
+            
+        # Basic metadata about the instance
+        metadata = dict(
+            variant = self.variant,
+            family = self.family,
+            name = file_path.stem
+        )
+        
+        if self.target_transform:
+            metadata = self.target_transform(metadata)
+            
+        return filename, metadata
\ No newline at end of file
diff --git a/cpmpy/tools/rcpsp/__init__.py b/cpmpy/tools/rcpsp/__init__.py
new file mode 100644
index 000000000..b24d99980
--- /dev/null
+++ b/cpmpy/tools/rcpsp/__init__.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+##
+## __init__.py
+##
+"""
+Set of utilities for working with psplib-formatted rcpsp CP models.
+
+
+==================
+List of submodules
+==================
+
+.. autosummary::
+    :nosignatures:
+
+    parser
+"""
+
+from .parser import read_rcpsp
diff --git a/cpmpy/tools/rcpsp/parser.py b/cpmpy/tools/rcpsp/parser.py
new file mode 100644
index 000000000..cadc32482
--- /dev/null
+++ b/cpmpy/tools/rcpsp/parser.py
@@ -0,0 +1,171 @@
+"""
+Parser for the PSPLIB RCPSP format.
+
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    read_rcpsp
+"""
+
+
+import os
+import sys
+import lzma
+import argparse
+import cpmpy as cp
+from io import StringIO
+from typing import Union
+
+
+_std_open = open
+def read_rcpsp(rcpsp: Union[str, os.PathLike], open=open) -> cp.Model:
+    """
+    Parser for PSPLIB RCPSP format. Reads in an instance and returns its matching CPMpy model.
+
+    Arguments: 
+        rcpsp (str or os.PathLike):
+            - A file path to a PSPLIB RCPSP file
+            - OR a string containing the RCPSP content directly
+        open: (callable):
+            If rcpsp is the path to a file, a callable to "open" that file (default=python standard library's 'open').
+
+    Returns:
+        cp.Model: The CPMpy model of the PSPLIB RCPSP instance.
+    """
+    # If rcpsp is a path to a file -> open file
+    if isinstance(rcpsp, (str, os.PathLike)) and os.path.exists(rcpsp):
+        if open is not None:
+            f = open(rcpsp)
+        else:
+            f = _std_open(rcpsp, "rt")
+    # If rcpsp is a string containing a model -> create a memory-mapped file
+    else:
+        f = StringIO(rcpsp)
+
+
+    table, capacities = _parse_rcpsp(f)
+    model, (start, end, makespan) = _model_rcpsp(job_data=table, capacities=capacities)
+    return model
+
+def _parse_rcpsp(f):
+
+    data = dict()
+
+    line = f.readline()
+    while not line.startswith("PRECEDENCE RELATIONS:"):
+        line = f.readline()
+    
+    f.readline() # skip keyword line
+    line = f.readline() # first line of table, skip
+    while not line.startswith("*****"):
+        jobnr, n_modes, n_succ, *succ = [int(x) for x in line.split(" ") if len(x.strip())]
+        assert len(succ) == n_succ, "Expected %d successors for job %d, got %d" % (n_succ, jobnr, len(succ))
+        data[jobnr] = dict(num_modes=n_modes, successors=succ)
+        line = f.readline()
+
+    # skip to job info
+    while not line.startswith("REQUESTS/DURATIONS:"):
+        line = f.readline()
+
+    line = f.readline()
+    _j, _m, _d, *_r = [x.strip() for x in line.split(" ") if len(x.strip())] # first line of table
+    resource_names = [f"{_r[i]}{_r[i+1]}" for i in range(0,len(_r),2)]
+    line = f.readline() # first line of table
+    if line.startswith("----") or line.startswith("*****"): # intermediate line in table...
+        line = f.readline() # skip
+
+    while not line.startswith("*****"):
+        jobnr, mode, duration, *resources = [int(x) for x in line.split(" ") if len(x.strip())]
+        assert len(resources) == len(resource_names), "Expected %d resources for job %d, got %d" % (len(resource_names), jobnr, len(resources))
+        data[jobnr].update(dict(mode=mode, duration=duration))
+        data[jobnr].update({name : req for name, req in zip(resource_names, resources)})
+        line = f.readline()
+    
+    # read resource availabilities
+    while not line.startswith("RESOURCEAVAILABILITIES:"):
+        line = f.readline()
+    
+    f.readline() # skip header
+    capacities = [int(x) for x in f.readline().split(" ") if len(x)]
+
+    import pandas as pd
+    df =pd.DataFrame([dict(jobnr=k ,**info) for k, info in data.items()], 
+                        columns=["jobnr", "mode", "duration", "successors", *resource_names])
+    df.set_index("jobnr", inplace=True)
+
+    return df, dict(zip(resource_names, capacities))
+
+def _model_rcpsp(job_data, capacities):
+
+    model = cp.Model()
+
+    horizon = job_data.duration.sum() # worst case, all jobs sequential on a machine
+    makespan = cp.intvar(0, horizon, name="makespan")
+
+    start = cp.intvar(0, horizon, name="start", shape=len(job_data))
+    end = cp.intvar(0, horizon, name="end", shape=len(job_data))
+
+    # ensure capacity is not exceeded
+    for rescource, capa in capacities.items():
+        model += cp.Cumulative(
+            start = start,
+            duration = job_data['duration'].tolist(),
+            end = end,
+            demand = job_data[rescource].tolist(),
+            capacity = capa
+        )
+
+    # enforce precedences
+    for idx, (jobnr, info) in enumerate(job_data.iterrows()):
+        for succ in info['successors']:
+            model += end[idx] <= start[succ-1] # job ids start at idx 1
+
+    model += end <= makespan
+    model.minimize(makespan)
+
+    return model, (start, end, makespan)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Parse and solve a PSPLIB RCPSP model using CPMpy")
+    parser.add_argument("model", help="Path to a PSPLIB RCPSP file (or raw RCPSP string if --string is given)")
+    parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)")
+    parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw RCPSP string instead of a file path")
+    parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)")
+    args = parser.parse_args()
+
+    # Build the CPMpy model
+    try:
+        if args.string:
+            model = read_rcpsp(args.model)
+        else:
+            model = read_rcpsp(os.path.expanduser(args.model))
+    except Exception as e:
+        sys.stderr.write(f"Error reading model: {e}\n")
+        sys.exit(1)
+
+    # Solve the model
+    try:
+        if args.solver:
+            result = model.solve(solver=args.solver, time_limit=args.time_limit)
+        else:
+            result = model.solve(time_limit=args.time_limit)
+    except Exception as e:
+        sys.stderr.write(f"Error solving model: {e}\n")
+        sys.exit(1)
+
+    # Print results
+    print("Status:", model.status())
+    if result is not None:
+        if model.has_objective():
+            print("Objective:", model.objective_value())
+    else:
+        print("No solution found.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From af36c877c9d61eae1b595aa1650a4d42caebf9a9 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 24 Oct 2025 15:26:11 +0200
Subject: [PATCH 34/46] opb fix intermediate solutions

---
 cpmpy/tools/benchmark/opb.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py
index b92fcb257..5c0c222cd 100644
--- a/cpmpy/tools/benchmark/opb.py
+++ b/cpmpy/tools/benchmark/opb.py
@@ -154,17 +154,17 @@ def parse_output_line(self, line, result):
                 result['solution'] = solution
             else:
                 result['solution'] = result['solution'] + ' ' + str(solution)
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            self.sol_time = float(parts[-1].replace('s', '').rstrip())
         elif line.startswith('o '):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(sol_time, obj)]
+            result['intermediate'] += [(self.sol_time, obj)]
             result['objective_value'] = obj
             obj = None
-        elif line.startswith('c Solution'):
-            parts = line.split(', time = ')
-            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
-            sol_time = float(parts[-1].replace('s', '').rstrip())
         elif line.startswith('c took '):
             # Parse timing information
             parts = line.split(' seconds to ')

From a834387f7900d7d0289267d57800e2f63bb3824c Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 24 Oct 2025 15:33:43 +0200
Subject: [PATCH 35/46] update docstrings

---
 cpmpy/tools/dataset/problem/psplib.py | 35 ++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/cpmpy/tools/dataset/problem/psplib.py b/cpmpy/tools/dataset/problem/psplib.py
index b1cbf70f6..89f0e93c7 100644
--- a/cpmpy/tools/dataset/problem/psplib.py
+++ b/cpmpy/tools/dataset/problem/psplib.py
@@ -1,3 +1,8 @@
+"""
+PSPlib Dataset
+
+https://www.om-db.wi.tum.de/psplib/getdata_sm.html
+"""
 import os
 import pathlib
 from typing import Tuple, Any
@@ -10,18 +15,25 @@ class PSPLibDataset(object):  # torch.utils.data.Dataset compatible
     """
     PSPlib Dataset in a PyTorch compatible format.
     
-    Arguments:
-        root (str): Root directory containing the psplib instances (if 'download', instances will be downloaded to this location)
-        variant (str): scheduling variant (only 'rcpsp' is supported for now)
-        family (str): family name (e.g. j30, j60, etc...)
-        transform (callable, optional): Optional transform to be applied on the instance data
-        target_transform (callable, optional): Optional transform to be applied on the file path
-        download (bool): If True, downloads the dataset from the internet and puts it in `root` directory
+    More information on PSPlib can be found here: https://www.om-db.wi.tum.de/psplib/main.html
     """
     
     def __init__(self, root: str = ".", variant: str = "rcpsp", family: str = "j30", transform=None, target_transform=None, download: bool = False):
         """
-        Initialize the PSPLib Dataset.
+        Constructor for a dataset object for PSPlib.
+
+        Arguments:
+            root (str): Root directory containing the psplib instances (if 'download', instances will be downloaded to this location)
+            variant (str): scheduling variant (only 'rcpsp' is supported for now)
+            family (str): family name (e.g. j30, j60, etc...)
+            transform (callable, optional): Optional transform to be applied on the instance data
+            target_transform (callable, optional): Optional transform to be applied on the file path
+            download (bool): If True, downloads the dataset from the internet and puts it in `root` directory
+
+
+        Raises:
+            ValueError: If the dataset directory does not exist and `download=False`,
+                or if the requested variant/family combination is not available.
         """
         
         self.root = pathlib.Path(root)
@@ -116,4 +128,9 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]:
         if self.target_transform:
             metadata = self.target_transform(metadata)
             
-        return filename, metadata
\ No newline at end of file
+        return filename, metadata
+    
+if __name__ == "__main__":
+    dataset = PSPLibDataset(variant="rcpsp", family="j30", download=True)
+    print("Dataset size:", len(dataset))
+    print("Instance 0:", dataset[0])
\ No newline at end of file

From 8805cad7fab38bc74d0d7b05698ab406469e8706 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 24 Oct 2025 15:49:29 +0200
Subject: [PATCH 36/46] Fix more docstring

---
 cpmpy/tools/benchmark/psplib.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cpmpy/tools/benchmark/psplib.py b/cpmpy/tools/benchmark/psplib.py
index 4fab0c99c..26046cf84 100644
--- a/cpmpy/tools/benchmark/psplib.py
+++ b/cpmpy/tools/benchmark/psplib.py
@@ -9,7 +9,7 @@
 This script can be run directly to benchmark solvers on PSPLIB datasets.
 
 Usage:
-    python psplib.py --year 2024 --track exact-weighted --solver ortools
+    python psplib.py --year 2024 --variant rcpsp --family j30
 
 Arguments:
     --variant       Problem variant (e.g., rcpsp).
@@ -30,8 +30,7 @@
 .. autosummary::
     :nosignatures:
 
-    MSEExitStatus
-    MSEBenchmark
+    PSPLIBBenchmark
 
 =================
 List of functions
@@ -40,7 +39,7 @@
 .. autosummary::
     :nosignatures:
 
-    solution_mse
+    solution_psplib
 """
 
 import warnings

From ce6b6bcc51604e24014c3ec4a9e482e46606463e Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 24 Oct 2025 15:50:01 +0200
Subject: [PATCH 37/46] Add JSPLib dataset and benchmark

---
 cpmpy/tools/benchmark/jsplib.py       | 209 ++++++++++++++++++++++++
 cpmpy/tools/dataset/problem/jsplib.py | 218 ++++++++++++++++++++++++++
 cpmpy/tools/jsplib/__init__.py        |  20 +++
 cpmpy/tools/jsplib/parser.py          | 148 +++++++++++++++++
 4 files changed, 595 insertions(+)
 create mode 100644 cpmpy/tools/benchmark/jsplib.py
 create mode 100644 cpmpy/tools/dataset/problem/jsplib.py
 create mode 100644 cpmpy/tools/jsplib/__init__.py
 create mode 100644 cpmpy/tools/jsplib/parser.py

diff --git a/cpmpy/tools/benchmark/jsplib.py b/cpmpy/tools/benchmark/jsplib.py
new file mode 100644
index 000000000..30c99da79
--- /dev/null
+++ b/cpmpy/tools/benchmark/jsplib.py
@@ -0,0 +1,209 @@
+"""
+JSPLib as a CPMpy benchmark
+
+This module provides a benchmarking framework for running CPMpy on JSPLib 
+instances.
+
+Command-line Interface
+----------------------
+This script can be run directly to benchmark solvers on JSPLib datasets.
+
+Usage:
+    python jsplib.py --solver ortools
+
+Arguments:
+    --solver        Solver name (e.g., ortools, exact, choco, ...).
+    --workers       Number of parallel workers to use.
+    --time-limit    Time limit in seconds per instance.
+    --mem-limit     Memory limit in MB per instance.
+    --cores         Number of cores to assign to a single instance.
+    --output-dir    Output directory for CSV files.
+    --verbose       Show solver output if set.
+    --intermediate  Report intermediate solutions if supported.
+
+===============
+List of classes
+===============
+
+.. autosummary::
+    :nosignatures:
+
+    MSEExitStatus
+    MSEBenchmark
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    solution_mse
+"""
+
+import warnings
+import argparse
+from enum import Enum
+from pathlib import Path
+from datetime import datetime
+
+# CPMpy
+from cpmpy.tools.benchmark.runner import benchmark_runner
+from cpmpy.tools.benchmark._base import Benchmark, ExitStatus
+from cpmpy.tools.jsplib import read_jsplib
+from cpmpy.solvers.solver_interface import ExitStatus as CPMStatus
+
+
+def solution_psplib(model):
+    """
+    Convert a CPMpy model solution into the solution string format.
+
+    Arguments:
+        model (cp.solvers.SolverInterface): The solver-specific model for which to print its solution
+
+    Returns:
+        str: formatted solution string.
+    """
+    variables = {var.name: var.value() for var in model.user_vars if var.name[:2] not in ["IV", "BV", "B#"]} # dirty workaround for all missed aux vars in user vars TODO fix with Ignace
+    return str(variables)
+
+class JSPLibBenchmark(Benchmark):
+
+    """
+    PSPLIB as a CPMpy benchmark.
+    """
+
+    def __init__(self):
+        super().__init__(reader=read_jsplib)
+    
+    def print_comment(self, comment:str):
+        print('c' + chr(32) + comment.rstrip('\n'), end="\r\n", flush=True)
+
+    def print_status(self, status: ExitStatus) -> None:
+        print('s' + chr(32) + status.value, end="\n", flush=True)
+
+    def print_value(self, value: str) -> None:
+        print('v' + chr(32) + value, end="\n", flush=True)
+
+    def print_objective(self, objective: int) -> None:
+        print('o' + chr(32) + str(objective), end="\n", flush=True)
+    
+    def print_intermediate(self, objective:int):
+        self.print_objective(objective)
+
+    def print_result(self, s):
+        if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_value(solution_psplib(s))
+            self.print_status(ExitStatus.optimal)
+        elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_value(solution_psplib(s))
+            self.print_status(ExitStatus.sat)
+        elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
+            self.print_status(ExitStatus.unsat)
+        else:
+            self.print_comment("Solver did not find any solution within the time/memory limit")
+            self.print_status(ExitStatus.unknown)
+
+    def handle_memory_error(self, mem_limit):
+        super().handle_memory_error(mem_limit)
+        self.print_status(ExitStatus.unknown)
+
+    def handle_not_implemented(self, e):
+        super().handle_not_implemented(e)
+        self.print_status(ExitStatus.unsupported)
+
+    def handle_exception(self, e):
+        super().handle_exception(e)
+        self.print_status(ExitStatus.unknown)
+
+    
+    def handle_sigterm(self):
+        """
+        Handles a SIGTERM. Gives us 1 second to finish the current job before we get killed.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(ExitStatus.unknown)
+        self.print_comment("SIGTERM raised.")
+        return 0
+        
+    def handle_rlimit_cpu(self):
+        """
+        Handles a SIGXCPU.
+        """
+        # Report that we haven't found a solution in time
+        self.print_status(ExitStatus.unknown)
+        self.print_comment("SIGXCPU raised.")
+        return 0
+
+    def parse_output_line(self, line, result):
+        if line.startswith('s '):
+            result['status'] = line[2:].strip()
+        elif line.startswith('v '):
+            # only record first line, contains 'type' and 'cost'
+            solution = line.split("\n")[0][2:].strip()
+            if solution not in result:
+                result['solution'] = solution
+            else:
+                result['solution'] = result['solution'] + ' ' + str(solution)
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            self.sol_time = float(parts[-1].replace('s', '').rstrip())
+        elif line.startswith('o '):
+            obj = int(line[2:].strip())
+            if result['intermediate'] is None:
+                result['intermediate'] = []
+            result['intermediate'] += [(self.sol_time, obj)]
+            result['objective_value'] = obj
+            obj = None
+        elif line.startswith('c took '):
+            # Parse timing information
+            parts = line.split(' seconds to ')
+            if len(parts) == 2:
+                time_val = float(parts[0].replace('c took ', ''))
+                action = parts[1].strip()
+                if action.startswith('parse'):
+                    result['time_parse'] = time_val
+                elif action.startswith('convert'):
+                    result['time_model'] = time_val
+                elif action.startswith('post'):
+                    result['time_post'] = time_val
+                elif action.startswith('solve'):
+                    result['time_solve'] = time_val
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Benchmark solvers on JSPLib instances')
+    parser.add_argument('--solver', type=str, required=True, help='Solver name (e.g., ortools, exact, choco, ...)')
+    parser.add_argument('--workers', type=int, default=4, help='Number of parallel workers')
+    parser.add_argument('--time-limit', type=int, default=300, help='Time limit in seconds per instance')
+    parser.add_argument('--mem-limit', type=int, default=8192, help='Memory limit in MB per instance')
+    parser.add_argument('--cores', type=int, default=1, help='Number of cores to assign tp a single instance')
+    parser.add_argument('--output-dir', type=str, default='results', help='Output directory for CSV files')
+    parser.add_argument('--verbose', action='store_true', help='Show solver output')
+    parser.add_argument('--intermediate', action='store_true', help='Report on intermediate solutions')
+    # parser.add_argument('--checker-path', type=str, default=None,
+    #                 help='Path to the XCSP3 solution checker JAR file')
+    args = parser.parse_args()
+
+    if not args.verbose:
+        warnings.filterwarnings("ignore")
+    
+    # Load benchmark instances (as a dataset)
+    from cpmpy.tools.dataset.problem.jsplib import JSPLibDataset
+    dataset = JSPLibDataset(download=True)
+    
+    # Create output directory
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Get current timestamp in a filename-safe format
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    
+    # Define output file path with timestamp
+    output_file = str(output_dir / "jsplib" / f"psplib_{args.solver}_{timestamp}.csv")
+
+    # Run the benchmark
+    instance_runner = JSPLibBenchmark()
+    output_file = benchmark_runner(dataset=dataset, instance_runner=instance_runner, output_file=output_file, **vars(args))
+    print(f"Results added to {output_file}")
diff --git a/cpmpy/tools/dataset/problem/jsplib.py b/cpmpy/tools/dataset/problem/jsplib.py
new file mode 100644
index 000000000..54cba2890
--- /dev/null
+++ b/cpmpy/tools/dataset/problem/jsplib.py
@@ -0,0 +1,218 @@
+"""
+PyTorch-style Dataset for Jobshop instances from JSPLib
+
+Simply create a dataset instance and start iterating over its contents:
+The `metadata` contains usefull information about the current problem instance.
+
+https://github.com/tamy0612/JSPLIB
+"""
+import os
+import json
+import pathlib
+from os.path import join
+from typing import Tuple, Any
+from urllib.request import urlretrieve
+from urllib.error import HTTPError, URLError
+import zipfile
+import numpy as np
+
+from matplotlib import pyplot as plt
+
+import cpmpy as cp
+
+class JSPLibDataset(object):  # torch.utils.data.Dataset compatible
+
+    """
+    JSP Dataset in a PyTorch compatible format.
+    
+    More information on JSPLib can be found here: https://github.com/tamy0612/JSPLIB
+    """
+    
+    def __init__(self, root: str = ".", transform=None, target_transform=None, download: bool = False):
+        """
+        Initialize the PSPLib Dataset.
+
+        Arguments:
+            root (str): Root directory containing the jsp instances (if 'download', instances will be downloaded to this location)
+            transform (callable, optional): Optional transform to be applied on the instance data
+            target_transform (callable, optional): Optional transform to be applied on the file path
+            download (bool): If True, downloads the dataset from the internet and puts it in `root` directory
+        """
+        
+        self.root = pathlib.Path(root)
+        self.instance_dir = pathlib.Path(join(self.root, "jsplib"))
+        self.metadata_file = "instances.json"
+        self.transform = transform
+        self.target_transform = target_transform
+
+        # Create root directory if it doesn't exist
+        self.root.mkdir(parents=True, exist_ok=True)
+
+        print(self.instance_dir, self.instance_dir.exists(), self.instance_dir.is_dir())
+        if not self.instance_dir.exists():
+            if not download:
+                raise ValueError(f"Dataset not found in local file system. Please set download=True to download the dataset.")
+            else:
+                url = f"https://github.com/tamy0612/JSPLIB/archive/refs/heads/master.zip" # download full repo...
+                url_path = url
+                zip_path = pathlib.Path(join(root,"jsplib-master.zip"))
+
+                print(f"Downloading JSPLib instances..")
+
+                try:
+                    urlretrieve(url_path, str(zip_path))
+                except (HTTPError, URLError) as e:
+                    raise ValueError(f"No dataset available on {url}. Error: {str(e)}")
+                
+                # make directory and extract files
+                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                    self.instance_dir.mkdir(parents=True, exist_ok=True)
+
+                    # Extract files
+                    for file_info in zip_ref.infolist():
+                        if file_info.filename.startswith("JSPLIB-master/instances/") and file_info.file_size > 0:
+                            filename = pathlib.Path(file_info.filename).name
+                            with zip_ref.open(file_info) as source, open(self.instance_dir / filename, 'wb') as target:
+                                target.write(source.read())
+                    # extract metadata file
+                    with zip_ref.open("JSPLIB-master/instances.json") as source, open(self.instance_dir / self.metadata_file, 'wb') as target:
+                        target.write(source.read())
+                                 # Clean up the zip file
+                zip_path.unlink()
+
+        
+    def __len__(self) -> int:
+        """Return the total number of instances."""
+        return len(list(self.instance_dir.glob("*")))
+    
+    def __getitem__(self, index: int|str) -> Tuple[Any, Any]:
+        """
+        Get a single JSPLib instance filename and metadata.
+
+        Args:
+            index (int or str): Index or name of the instance to retrieve
+            
+        Returns:
+            Tuple[Any, Any]: A tuple containing:
+                - The filename of the instance
+                - Metadata dictionary with file name, track, year etc.
+        """
+        if isinstance(index, int) and (index < 0 or index >= len(self)):
+            raise IndexError("Index out of range")
+
+        # Get all instance files and sort for deterministic behavior # TODO: use natsort instead?
+        files = sorted(list(self.instance_dir.glob("*[!.json]"))) # exclude metadata file
+        if isinstance(index, int):
+            file_path = files[index]
+        elif isinstance(index, str):
+            for file_path in files:
+                if file_path.stem == index:
+                    break
+            else:
+                raise IndexError(f"Instance {index} not found in dataset")
+
+        filename = str(file_path)
+        if self.transform:
+            # does not need to remain a filename...
+            filename = self.transform(filename)
+
+        with open(self.instance_dir / self.metadata_file, "r") as f:
+            for entry in json.load(f):
+                if entry["name"] == file_path.stem:
+                    metadata = entry
+                    metadata['path'] = str(file_path)
+                    break
+            else:
+                metadata = dict()
+        
+        if self.target_transform:
+            metadata = self.target_transform(metadata)
+            
+        return filename, metadata
+    
+    def open(self, instance: os.PathLike) -> callable:
+        return open(instance, "r")
+
+
+def parse_jsp(filename: str):
+    """
+    Parse a JSPLib instance file
+    Returns two matrices:
+        - task to machines indicating on which machine to run which task
+        - task durations: indicating the duration of each task
+    """
+
+    with open(filename, "r") as f:
+        line = f.readline()
+        while line.startswith("#"):
+            line = f.readline()
+        n_jobs, n_tasks = map(int, line.strip().split(" "))
+        matrix = np.fromstring(f.read(), sep=" ", dtype=int).reshape((n_jobs, n_tasks*2))
+
+        task_to_machines = np.empty(dtype=int, shape=(n_jobs, n_tasks))
+        task_durations = np.empty(dtype=int, shape=(n_jobs, n_tasks))
+
+        for t in range(n_tasks):
+            task_to_machines[:, t] = matrix[:, t*2]
+            task_durations[:, t] = matrix[:, t*2+1]
+
+        return task_to_machines, task_durations
+
+def jobshop_model(task_to_machines, task_durations):
+
+
+    task_to_machines = np.array(task_to_machines)
+    dur = np.array(task_durations)
+
+    assert task_to_machines.shape == task_durations.shape
+
+    n_jobs, n_tasks = task_to_machines.shape
+
+    start = cp.intvar(0, task_durations.sum(), name="start", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO
+    end = cp.intvar(0, task_durations.sum(), name="end", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO
+    makespan = cp.intvar(0, task_durations.sum(), name="makespan") # extremely bad upperbound... TODO
+
+    model = cp.Model()
+    model += start + dur == end
+    model += end[:,:-1] <= start[:,1:] # precedences
+
+    for machine in set(task_to_machines.flat):
+        model += cp.NoOverlap(start[task_to_machines == machine],
+                              dur[task_to_machines == machine],
+                              end[task_to_machines == machine])
+
+    model += end <= makespan
+    model.minimize(makespan)
+
+    return model, (start, makespan)
+
+
+if __name__ == "__main__":
+
+    dataset = JSPLibDataset(root=".", download=True, transform=parse_jsp)
+    print("Dataset size:", len(dataset))
+    print("Instance 0:")
+    (machines, dur), metadata = dataset[0]
+    print("Machines:", machines)
+    print("Durations:", dur)
+    print("Metadata:", metadata)
+
+    print("Solving", metadata['name'])
+    model, (start, makespan) = jobshop_model(task_to_machines=machines, task_durations=dur)
+    assert model.solve(time_limit=10)
+
+    import pandas as pd
+    import plotly.express as px
+    import plotly.io as pio
+    pio.renderers.default = "browser" # ensure plotly opens figure in browser
+
+    df = pd.DataFrame({"Start": start.value().flat, "Duration": dur.flat, "Machine": machines.flat})
+    df["Job"] = [j for j in range(metadata['jobs']) for _ in range(metadata['machines']) ]
+    df["Task"] = [j for _ in range(metadata['machines']) for j in range(metadata['jobs'])]
+    df["Name"] = "T" + df["Job"].astype(str) + "-" + df["Task"].astype(str)
+    print(df)
+    ghant_fig = px.bar(df, orientation='h',
+                       base="Start", x="Duration", y="Machine", color="Job", text="Name",
+                       title=f"Jobshop instance {metadata['name']}, makespan: {makespan.value()}, status: {model.status()}"
+                       )
+    ghant_fig.show()
\ No newline at end of file
diff --git a/cpmpy/tools/jsplib/__init__.py b/cpmpy/tools/jsplib/__init__.py
new file mode 100644
index 000000000..6ebdec377
--- /dev/null
+++ b/cpmpy/tools/jsplib/__init__.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+##
+## __init__.py
+##
+"""
+Set of utilities for working with JSPLib-formatted CP models.
+
+
+==================
+List of submodules
+==================
+
+.. autosummary::
+    :nosignatures:
+
+    parser
+"""
+
+from .parser import read_jsplib
diff --git a/cpmpy/tools/jsplib/parser.py b/cpmpy/tools/jsplib/parser.py
new file mode 100644
index 000000000..11c820faa
--- /dev/null
+++ b/cpmpy/tools/jsplib/parser.py
@@ -0,0 +1,148 @@
+"""
+Parser for the JSPLib format.
+
+
+=================
+List of functions
+=================
+
+.. autosummary::
+    :nosignatures:
+
+    read_jsplib
+"""
+
+
+import os
+import sys
+import lzma
+import argparse
+import cpmpy as cp
+import numpy as np
+from io import StringIO
+from typing import Union
+
+
+_std_open = open
+def read_jsplib(jsp: Union[str, os.PathLike], open=open) -> cp.Model:
+    """
+    Parser for JSPLib format. Reads in an instance and returns its matching CPMpy model.
+
+    Arguments: 
+        jsp (str or os.PathLike):
+            - A file path to a JSPlib file
+            - OR a string containing the JSPLib content directly
+        open: (callable):
+            If jsp is the path to a file, a callable to "open" that file (default=python standard library's 'open').
+
+    Returns:
+        cp.Model: The CPMpy model of the JSPLib instance.
+    """
+    # If rcpsp is a path to a file -> open file
+    if isinstance(jsp, (str, os.PathLike)) and os.path.exists(jsp):
+        if open is not None:
+            f = open(jsp)
+        else:
+            f = _std_open(jsp, "rt")
+    # If rcpsp is a string containing a model -> create a memory-mapped file
+    else:
+        f = StringIO(jsp)
+
+
+    task_to_machines, task_durations = _parse_jsplib(f)
+    model, (start, makespan) = _model_jsplib(task_to_machines=task_to_machines, task_durations=task_durations)
+    return model
+
+
+def _parse_jsplib(f):
+    """
+    Parse a JSPLib instance file
+    Returns two matrices:
+        - task to machines indicating on which machine to run which task
+        - task durations: indicating the duration of each task
+    """
+
+    line = f.readline()
+    while line.startswith("#"):
+        line = f.readline()
+    n_jobs, n_tasks = map(int, line.strip().split(" "))
+    matrix = np.fromstring(f.read(), sep=" ", dtype=int).reshape((n_jobs, n_tasks*2))
+
+    task_to_machines = np.empty(dtype=int, shape=(n_jobs, n_tasks))
+    task_durations = np.empty(dtype=int, shape=(n_jobs, n_tasks))
+
+    for t in range(n_tasks):
+        task_to_machines[:, t] = matrix[:, t*2]
+        task_durations[:, t] = matrix[:, t*2+1]
+
+    return task_to_machines, task_durations
+
+
+
+def _model_jsplib(task_to_machines, task_durations):
+
+    task_to_machines = np.array(task_to_machines)
+    dur = np.array(task_durations)
+
+    assert task_to_machines.shape == task_durations.shape
+
+    n_jobs, n_tasks = task_to_machines.shape
+
+    start = cp.intvar(0, task_durations.sum(), name="start", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO
+    end = cp.intvar(0, task_durations.sum(), name="end", shape=(n_jobs,n_tasks)) # extremely bad upperbound... TODO
+    makespan = cp.intvar(0, task_durations.sum(), name="makespan") # extremely bad upperbound... TODO
+
+    model = cp.Model()
+    model += start + dur == end
+    model += end[:,:-1] <= start[:,1:] # precedences
+
+    for machine in set(task_to_machines.flat):
+        model += cp.NoOverlap(start[task_to_machines == machine],
+                              dur[task_to_machines == machine],
+                              end[task_to_machines == machine])
+
+    model += end <= makespan
+    model.minimize(makespan)
+
+    return model, (start, makespan)
+
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Parse and solve a JSPLib model using CPMpy")
+    parser.add_argument("model", help="Path to a JSPLib file (or raw RCPSP string if --string is given)")
+    parser.add_argument("-s", "--solver", default=None, help="Solver name to use (default: CPMpy's default)")
+    parser.add_argument("--string", action="store_true", help="Interpret the first argument (model) as a raw JSPLib string instead of a file path")
+    parser.add_argument("-t", "--time-limit", type=int, default=None, help="Time limit for the solver in seconds (default: no limit)")
+    args = parser.parse_args()
+
+    # Build the CPMpy model
+    try:
+        if args.string:
+            model = read_jsplib(args.model)
+        else:
+            model = read_jsplib(os.path.expanduser(args.model))
+    except Exception as e:
+        sys.stderr.write(f"Error reading model: {e}\n")
+        sys.exit(1)
+
+    # Solve the model
+    try:
+        if args.solver:
+            result = model.solve(solver=args.solver, time_limit=args.time_limit)
+        else:
+            result = model.solve(time_limit=args.time_limit)
+    except Exception as e:
+        sys.stderr.write(f"Error solving model: {e}\n")
+        sys.exit(1)
+
+    # Print results
+    print("Status:", model.status())
+    if result is not None:
+        if model.has_objective():
+            print("Objective:", model.objective_value())
+    else:
+        print("No solution found.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 9098299f5ecc2f986e622abb1fb3b3135595b571 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Fri, 24 Oct 2025 16:04:42 +0200
Subject: [PATCH 38/46] Add bounds for all jsplib instances

---
 cpmpy/tools/dataset/problem/jsplib.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cpmpy/tools/dataset/problem/jsplib.py b/cpmpy/tools/dataset/problem/jsplib.py
index 54cba2890..d110c48db 100644
--- a/cpmpy/tools/dataset/problem/jsplib.py
+++ b/cpmpy/tools/dataset/problem/jsplib.py
@@ -120,6 +120,9 @@ def __getitem__(self, index: int|str) -> Tuple[Any, Any]:
             for entry in json.load(f):
                 if entry["name"] == file_path.stem:
                     metadata = entry
+                    if "bounds" not in metadata: 
+                        metadata["bounds"] = {"upper": metadata["optimum"], "lower": metadata["optimum"]}
+                    del metadata['path']
                     metadata['path'] = str(file_path)
                     break
             else:

From 658967d3951e2dc0e7219c755a6b7fc994271399 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Sat, 25 Oct 2025 10:48:47 +0200
Subject: [PATCH 39/46] Fix choco args

---
 cpmpy/tools/benchmark/_base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index 11d17ed42..3f9257d49 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -34,6 +34,7 @@
 import signal
 import sys
 import time
+import math
 import random
 import psutil
 import warnings
@@ -191,7 +192,7 @@ def exact_arguments(
 
         return res, None
 
-    def choco_arguments(): 
+    def choco_arguments(self): 
         # Documentation: https://github.com/chocoteam/pychoco/blob/master/pychoco/solver.py
         return {}, None
 

From 38db2906c84fe70ee0c979b0defedfcae94b4c0e Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Sat, 25 Oct 2025 18:16:40 +0200
Subject: [PATCH 40/46] Fixes

---
 cpmpy/tools/benchmark/jsplib.py |  6 +++++-
 cpmpy/tools/benchmark/psplib.py |  6 +++++-
 cpmpy/tools/benchmark/xcsp3.py  | 10 +++++-----
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/cpmpy/tools/benchmark/jsplib.py b/cpmpy/tools/benchmark/jsplib.py
index 30c99da79..e9dacb7ce 100644
--- a/cpmpy/tools/benchmark/jsplib.py
+++ b/cpmpy/tools/benchmark/jsplib.py
@@ -74,6 +74,7 @@ class JSPLibBenchmark(Benchmark):
     """
 
     def __init__(self):
+        self.sol_time = None
         super().__init__(reader=read_jsplib)
     
     def print_comment(self, comment:str):
@@ -93,9 +94,11 @@ def print_intermediate(self, objective:int):
 
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_objective(s.objective_value())
             self.print_value(solution_psplib(s))
             self.print_status(ExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_objective(s.objective_value())
             self.print_value(solution_psplib(s))
             self.print_status(ExitStatus.sat)
         elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
@@ -153,7 +156,8 @@ def parse_output_line(self, line, result):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(self.sol_time, obj)]
+            if self.sol_time is not None:
+                result['intermediate'] += [(self.sol_time, obj)]
             result['objective_value'] = obj
             obj = None
         elif line.startswith('c took '):
diff --git a/cpmpy/tools/benchmark/psplib.py b/cpmpy/tools/benchmark/psplib.py
index 26046cf84..0f1a1639f 100644
--- a/cpmpy/tools/benchmark/psplib.py
+++ b/cpmpy/tools/benchmark/psplib.py
@@ -75,6 +75,7 @@ class PSPLIBBenchmark(Benchmark):
     """
 
     def __init__(self):
+        self.sol_time = None
         super().__init__(reader=read_rcpsp) # TODO: reader should depend on problem variant
     
     def print_comment(self, comment:str):
@@ -94,9 +95,11 @@ def print_intermediate(self, objective:int):
 
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_objective(s.objective_value())
             self.print_value(solution_psplib(s))
             self.print_status(ExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_objective(s.objective_value())
             self.print_value(solution_psplib(s))
             self.print_status(ExitStatus.sat)
         elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
@@ -154,7 +157,8 @@ def parse_output_line(self, line, result):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(self.sol_time, obj)]
+            if self.sol_time is not None:
+                result['intermediate'] += [(self.sol_time, obj)]
             result['objective_value'] = obj
             obj = None
         elif line.startswith('c took '):
diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py
index 47d0289e4..176d42d18 100644
--- a/cpmpy/tools/benchmark/xcsp3.py
+++ b/cpmpy/tools/benchmark/xcsp3.py
@@ -182,17 +182,17 @@ def parse_output_line(self, line, result):
             complete_solution = line
             if "cost" in solution:
                 result['objective_value'] = solution.split('cost="')[-1][:-2]
+        elif line.startswith('c Solution'):
+            parts = line.split(', time = ')
+            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
+            self._sol_time = float(parts[-1].replace('s', '').rstrip())
         elif line.startswith('o '):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(sol_time, obj)]
+            result['intermediate'] += [(self._sol_time, obj)]
             result['objective_value'] = obj
             obj = None
-        elif line.startswith('c Solution'):
-            parts = line.split(', time = ')
-            # Get solution time from comment for intermediate solution -> used for annotating 'o ...' lines
-            sol_time = float(parts[-1].replace('s', '').rstrip())
         elif line.startswith('c took '):
             # Parse timing information
             parts = line.split(' seconds to ')

From 62b605d99109fa9bafd4b16ca92a0bb812963121 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Mon, 3 Nov 2025 17:08:37 +0100
Subject: [PATCH 41/46] correct jsplib output file name

---
 cpmpy/tools/benchmark/jsplib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/jsplib.py b/cpmpy/tools/benchmark/jsplib.py
index e9dacb7ce..343c2dfdd 100644
--- a/cpmpy/tools/benchmark/jsplib.py
+++ b/cpmpy/tools/benchmark/jsplib.py
@@ -205,7 +205,7 @@ def parse_output_line(self, line, result):
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     
     # Define output file path with timestamp
-    output_file = str(output_dir / "jsplib" / f"psplib_{args.solver}_{timestamp}.csv")
+    output_file = str(output_dir / "jsplib" / f"jsplib_{args.solver}_{timestamp}.csv")
 
     # Run the benchmark
     instance_runner = JSPLibBenchmark()

From ddf69389644bd69c9efa2cc06090a0d399dbef0c Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Mon, 3 Nov 2025 17:09:02 +0100
Subject: [PATCH 42/46] remove matplotlib import

---
 cpmpy/tools/dataset/problem/jsplib.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cpmpy/tools/dataset/problem/jsplib.py b/cpmpy/tools/dataset/problem/jsplib.py
index d110c48db..17453fe32 100644
--- a/cpmpy/tools/dataset/problem/jsplib.py
+++ b/cpmpy/tools/dataset/problem/jsplib.py
@@ -16,8 +16,6 @@
 import zipfile
 import numpy as np
 
-from matplotlib import pyplot as plt
-
 import cpmpy as cp
 
 class JSPLibDataset(object):  # torch.utils.data.Dataset compatible

From 344aaafd94fef5765b6d7baa22e111ad204cf7c4 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Mon, 3 Nov 2025 17:09:23 +0100
Subject: [PATCH 43/46] xcsp3 track intermediate sol time

---
 cpmpy/tools/benchmark/xcsp3.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/xcsp3.py b/cpmpy/tools/benchmark/xcsp3.py
index 176d42d18..1bc70ad9b 100644
--- a/cpmpy/tools/benchmark/xcsp3.py
+++ b/cpmpy/tools/benchmark/xcsp3.py
@@ -121,6 +121,7 @@ class XCSP3Benchmark(Benchmark):
     """
 
     def __init__(self):
+        self._sol_time = None
         super().__init__(reader=read_xcsp3, exit_status=XCSP3ExitStatus)
     
     def print_comment(self, comment:str):
@@ -190,7 +191,8 @@ def parse_output_line(self, line, result):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(self._sol_time, obj)]
+            if self._sol_time is not None:
+                result['intermediate'] += [(self._sol_time, obj)]
             result['objective_value'] = obj
             obj = None
         elif line.startswith('c took '):

From 7cd1bb1630345303c42d49a2dc328572a7e6ad6a Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Mon, 3 Nov 2025 17:09:41 +0100
Subject: [PATCH 44/46] opb print intermediate solutions

---
 cpmpy/tools/benchmark/opb.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/opb.py b/cpmpy/tools/benchmark/opb.py
index 5c0c222cd..0c571a058 100644
--- a/cpmpy/tools/benchmark/opb.py
+++ b/cpmpy/tools/benchmark/opb.py
@@ -83,6 +83,7 @@ class OPBBenchmark(Benchmark):
     """
 
     def __init__(self):
+        self.sol_time = None
         super().__init__(reader=read_opb, exit_status=OPBExitStatus)
     
     def print_comment(self, comment:str):
@@ -103,9 +104,11 @@ def print_intermediate(self, objective:int):
 
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_objective(s.objective_value())
             self.print_value(solution_opb(s))
             self.print_status(OPBExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_objective(s.objective_value())
             self.print_value(solution_opb(s))
             self.print_status(OPBExitStatus.sat)
         elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
@@ -162,7 +165,8 @@ def parse_output_line(self, line, result):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(self.sol_time, obj)]
+            if self.sol_time is not None:
+                result['intermediate'] += [(self.sol_time, obj)]
             result['objective_value'] = obj
             obj = None
         elif line.startswith('c took '):

From a21a0404c22463fae25e945e1cd4f5c688cfad80 Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Mon, 3 Nov 2025 17:09:54 +0100
Subject: [PATCH 45/46] mse print intermediate solutions

---
 cpmpy/tools/benchmark/mse.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpmpy/tools/benchmark/mse.py b/cpmpy/tools/benchmark/mse.py
index 656467bf9..a1936346e 100644
--- a/cpmpy/tools/benchmark/mse.py
+++ b/cpmpy/tools/benchmark/mse.py
@@ -90,6 +90,7 @@ class MSEBenchmark(Benchmark):
     """
 
     def __init__(self):
+        self._sol_time = None
         super().__init__(reader=read_wcnf, exit_status=MSEExitStatus)
     
     def print_comment(self, comment:str):
@@ -109,9 +110,11 @@ def print_intermediate(self, objective:int):
 
     def print_result(self, s):
         if s.status().exitstatus == CPMStatus.OPTIMAL:
+            self.print_objective(s.objective_value())
             self.print_value(solution_mse(s))
             self.print_status(MSEExitStatus.optimal)
         elif s.status().exitstatus == CPMStatus.FEASIBLE:
+            self.print_objective(s.objective_value())
             self.print_value(solution_mse(s))
             self.print_status(MSEExitStatus.sat)
         elif s.status().exitstatus == CPMStatus.UNSATISFIABLE:
@@ -169,7 +172,8 @@ def parse_output_line(self, line, result):
             obj = int(line[2:].strip())
             if result['intermediate'] is None:
                 result['intermediate'] = []
-            result['intermediate'] += [(self._sol_time, obj)]
+            if self._sol_time is not None:
+                result['intermediate'] += [(self._sol_time, obj)]
             result['objective_value'] = obj
             obj = None
         elif line.startswith('c took '):

From eda839c8b9df7df12c238a9e16a4bde6494fa8aa Mon Sep 17 00:00:00 2001
From: ThomSerg <thomas.s2000@hotmail.com>
Date: Mon, 3 Nov 2025 17:13:07 +0100
Subject: [PATCH 46/46] cplex and hexaly solver arguments

---
 cpmpy/tools/benchmark/_base.py | 93 ++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/cpmpy/tools/benchmark/_base.py b/cpmpy/tools/benchmark/_base.py
index 4de8ce816..ce103dcab 100644
--- a/cpmpy/tools/benchmark/_base.py
+++ b/cpmpy/tools/benchmark/_base.py
@@ -338,7 +338,96 @@ def solution_count(self):
             res |= { "solution_callback": CpoSolutionCallback }
 
         return res, None
+
+    def cplex_arguments(
+        self,
+        cores: Optional[int] = None,
+        seed: Optional[int] = None,
+        **kwargs
+    ):
+        res = dict()
+        if cores is not None:
+            res |= {"threads": cores}
+        if seed is not None:
+            res |= {"randomseed": seed}
+
+        return res, None
+    
+    def hexaly_arguments(
+        self,
+        model: cp.Model,
+        cores: Optional[int] = None,
+        seed: Optional[int] = None,
+        intermediate: bool = False,
+        **kwargs
+    ):
+        res = dict()
+        #res |= {"nb_threads": cores}
+        #res |= {"seed": seed}
+
+
+        if intermediate and model.has_objective():
+            # Define custom Hexaly solution callback, then register it
+
+            _self = self
+            class HexSolutionCallback:
     
+                def __init__(self):
+                    self.__start_time = time.time()
+                    self.__solution_count = 0
+          
+
+                def on_solution_callback(self, optimizer, cb_type):
+                    """Called on each new solution."""
+                    # check if solution with different objective (or if verbose)
+                    current_time = time.time()
+                    obj = optimizer.model.objectives[0]
+                    _self.print_comment('Solution %i, time = %0.4fs' % 
+                                (self.__solution_count, current_time - self.__start_time))
+                    _self.print_intermediate(obj)
+                    self.__solution_count += 1
+
+                def solution_count(self):
+                    return self.__solution_count
+                
+            # Register the callback
+            res |= { "solution_callback": HexSolutionCallback().on_solution_callback }
+
+
+        # def internal_options(solver: "CPM_hexaly"):
+        #     # https://github.com/google/or-tools/blob/1c5daab55dd84bca7149236e4b4fa009e5fd95ca/ortools/flatzinc/cp_model_fz_solver.cc#L1688
+        #     #solver.native_model.get_param().set_seed(seed)
+        #     #solver.native_model.get_param().set_nr_threads(cores)
+
+        #     _self = self
+        #     class CallbackExample:
+        #         def __init__(self):
+        #             self.last_best_value = 0
+        #             self.last_best_running_time = 0
+        #             self.__solution_count = 0
+        #             self.__start_time = time.time()
+
+        #         def my_callback(self, optimizer, cb_type):
+        #             stats = optimizer.statistics
+        #             obj = optimizer.model.objectives[0]
+        #             current_time = time.time()
+        #             #obj = int(self.ObjectiveValue())
+        #             #obj = optimizer.get_objective_bound(0).value
+        #             if obj.value > self.last_best_value:
+        #                 self.last_best_running_time = stats.running_time
+        #                 self.last_best_value = obj.value
+        #                 self.__solution_count += 1
+                  
+        #                 _self.print_comment('Solution %i, time = %0.4fs' % 
+        #                         (self.__solution_count, current_time - self.__start_time))
+        #                 _self.print_intermediate(obj.value)
+
+            # optimizer = solver.native_model
+            # cb = CallbackExample()
+            # from hexaly.optimizer import HxCallbackType
+            # optimizer.add_callback(HxCallbackType.TIME_TICKED, cb.my_callback)
+
+        return res, None
 
     """
     Methods which can, bit most likely shouldn't, be overwritten.
@@ -420,6 +509,10 @@ def solver_arguments(
             return self.gurobi_arguments(model, cores=cores, seed=seed, mem_limit=mem_limit, intermediate=intermediate, opt=opt, **kwargs)
         elif solver == "cpo":
             return self.cpo_arguments(model=model, cores=cores, seed=seed, intermediate=intermediate, **kwargs)
+        elif solver == "hexaly":
+            return self.hexaly_arguments(model, cores=cores, seed=seed, intermediate=intermediate, **kwargs)
+        elif solver == "cplex":
+            return self.cplex_arguments(cores=cores, **kwargs) 
         else:
             self.print_comment(f"setting parameters of {solver} is not (yet) supported")
             return dict(), None