From 315b9d43e705a96e0b1ca7e7c24437c66980dbd2 Mon Sep 17 00:00:00 2001
From: Christian Hernandez <christianescamilla15@gmail.com>
Date: Fri, 10 Apr 2026 21:58:27 -0600
Subject: [PATCH] feat(emcee): implement get_expected_outputs (closes #804)

The Emcee sampler was inheriting the default get_expected_outputs from
the base Sampler class, which returns only a single directory. However,
emcee actually writes two concrete files during checkpointing via its
checkpoint_info property (bilby/core/sampler/emcee.py:256-257):

- chain.dat      (tab-separated chain history)
- sampler.pickle (dill-pickled EnsembleSampler for resume)

These were not being reported to bilby_pipe, which uses this method to
know which files to transfer via HTCondor. This PR adds an explicit
override for Emcee that reports both files plus the run directory.

Also adds a unit test in test/core/sampler/emcee_test.py following the
same pattern as the existing tests for Dynesty, Nessai, and Bilby_MCMC.
---
 CHANGELOG.md                    |  3 +++
 bilby/core/sampler/emcee.py     | 36 +++++++++++++++++++++++++++++++++
 test/core/sampler/emcee_test.py | 15 ++++++++++++++
 3 files changed, 54 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16c762f38..da84f8310 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,9 @@ The original MRs are only visible on the [LIGO GitLab repository](https://git.li
 
 ## [Unreleased]
 
+### Fixes
+* Implemented `get_expected_outputs` for the `Emcee` sampler, which now correctly reports the `chain.dat` and `sampler.pickle` files it writes to the run directory (closes #804)
+
 ## [2.7.1]
 
 ### Fixes
diff --git a/bilby/core/sampler/emcee.py b/bilby/core/sampler/emcee.py
index 143da55e1..add8d832b 100644
--- a/bilby/core/sampler/emcee.py
+++ b/bilby/core/sampler/emcee.py
@@ -439,3 +439,39 @@ def _generate_result(self):
         self.result.log_prior_evaluations = log_priors
         self.result.log_evidence = np.nan
         self.result.log_evidence_err = np.nan
+
+    @classmethod
+    def get_expected_outputs(cls, outdir=None, label=None):
+        """Get lists of the expected outputs directories and files.
+
+        These are used by :code:`bilby_pipe` when transferring files via HTCondor.
+        The emcee sampler writes its checkpoint information (the serialised
+        sampler and the tab-separated chain history) inside a per-run
+        subdirectory ``{outdir}/emcee_{label}/``. The files written there are:
+
+        - ``chain.dat``: tab-separated chain history, one row per step per
+          walker, updated incrementally as the sampler runs.
+        - ``sampler.pickle``: a dill-pickled copy of the
+          :class:`emcee.EnsembleSampler` instance, used to resume from the
+          last completed step.
+
+        Parameters
+        ----------
+        outdir : str
+            The output directory.
+        label : str
+            The label for the run.
+
+        Returns
+        -------
+        list
+            List of file names produced by the sampler.
+        list
+            List of directory names produced by the sampler.
+        """
+        run_dir = os.path.join(outdir, f"emcee_{label}")
+        filenames = [
+            os.path.join(run_dir, "chain.dat"),
+            os.path.join(run_dir, "sampler.pickle"),
+        ]
+        return filenames, [run_dir]
diff --git a/test/core/sampler/emcee_test.py b/test/core/sampler/emcee_test.py
index 2861f1f04..169c463d3 100644
--- a/test/core/sampler/emcee_test.py
+++ b/test/core/sampler/emcee_test.py
@@ -1,3 +1,4 @@
+import os
 import unittest
 
 import bilby
@@ -71,5 +72,19 @@ def test_translate_kwargs(self):
             self.assertDictEqual(expected, self.sampler.kwargs)
 
 
+def test_get_expected_outputs():
+    label = "par0"
+    outdir = os.path.join("some", "bilby_pipe", "dir")
+    filenames, directories = bilby.core.sampler.emcee.Emcee.get_expected_outputs(
+        outdir=outdir, label=label
+    )
+    assert len(filenames) == 2
+    assert len(directories) == 1
+    run_dir = os.path.join(outdir, f"emcee_{label}")
+    assert run_dir in directories
+    assert os.path.join(run_dir, "chain.dat") in filenames
+    assert os.path.join(run_dir, "sampler.pickle") in filenames
+
+
 if __name__ == "__main__":
     unittest.main()