From 315b9d43e705a96e0b1ca7e7c24437c66980dbd2 Mon Sep 17 00:00:00 2001 From: Christian Hernandez Date: Fri, 10 Apr 2026 21:58:27 -0600 Subject: [PATCH] feat(emcee): implement get_expected_outputs (closes #804) The Emcee sampler was inheriting the default get_expected_outputs from the base Sampler class, which returns only a single directory. However, emcee actually writes two concrete files during checkpointing via its checkpoint_info property (bilby/core/sampler/emcee.py:256-257): - chain.dat (tab-separated chain history) - sampler.pickle (dill-pickled EnsembleSampler for resume) These were not being reported to bilby_pipe, which uses this method to know which files to transfer via HTCondor. This PR adds an explicit override for Emcee that reports both files plus the run directory. Also adds a unit test in test/core/sampler/emcee_test.py following the same pattern as the existing tests for Dynesty, Nessai, and Bilby_MCMC. --- CHANGELOG.md | 3 +++ bilby/core/sampler/emcee.py | 36 +++++++++++++++++++++++++++++++++ test/core/sampler/emcee_test.py | 15 ++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16c762f38..da84f8310 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ The original MRs are only visible on the [LIGO GitLab repository](https://git.li ## [Unreleased] +### Fixes +* Implemented `get_expected_outputs` for the `Emcee` sampler, which now correctly reports the `chain.dat` and `sampler.pickle` files it writes to the run directory (closes #804) + ## [2.7.1] ### Fixes diff --git a/bilby/core/sampler/emcee.py b/bilby/core/sampler/emcee.py index 143da55e1..add8d832b 100644 --- a/bilby/core/sampler/emcee.py +++ b/bilby/core/sampler/emcee.py @@ -439,3 +439,39 @@ def _generate_result(self): self.result.log_prior_evaluations = log_priors self.result.log_evidence = np.nan self.result.log_evidence_err = np.nan + + @classmethod + def get_expected_outputs(cls, outdir=None, label=None): + """Get lists of the expected outputs directories and files. + + These are used by :code:`bilby_pipe` when transferring files via HTCondor. + The emcee sampler writes its checkpoint information (the serialised + sampler and the tab-separated chain history) inside a per-run + subdirectory ``{outdir}/emcee_{label}/``. The files written there are: + + - ``chain.dat``: tab-separated chain history, one row per step per + walker, updated incrementally as the sampler runs. + - ``sampler.pickle``: a dill-pickled copy of the + :class:`emcee.EnsembleSampler` instance, used to resume from the + last completed step. + + Parameters + ---------- + outdir : str + The output directory. + label : str + The label for the run. + + Returns + ------- + list + List of file names produced by the sampler. + list + List of directory names produced by the sampler. + """ + run_dir = os.path.join(outdir, f"emcee_{label}") + filenames = [ + os.path.join(run_dir, "chain.dat"), + os.path.join(run_dir, "sampler.pickle"), + ] + return filenames, [run_dir] diff --git a/test/core/sampler/emcee_test.py b/test/core/sampler/emcee_test.py index 2861f1f04..169c463d3 100644 --- a/test/core/sampler/emcee_test.py +++ b/test/core/sampler/emcee_test.py @@ -1,3 +1,4 @@ +import os import unittest import bilby @@ -71,5 +72,19 @@ def test_translate_kwargs(self): self.assertDictEqual(expected, self.sampler.kwargs) +def test_get_expected_outputs(): + label = "par0" + outdir = os.path.join("some", "bilby_pipe", "dir") + filenames, directories = bilby.core.sampler.emcee.Emcee.get_expected_outputs( + outdir=outdir, label=label + ) + assert len(filenames) == 2 + assert len(directories) == 1 + run_dir = os.path.join(outdir, f"emcee_{label}") + assert run_dir in directories + assert os.path.join(run_dir, "chain.dat") in filenames + assert os.path.join(run_dir, "sampler.pickle") in filenames + + if __name__ == "__main__": unittest.main()