Add an integration with mymdc

In a nutshell, DAMNIT will now look for mymdc credentials in `usr/mymdc-credentials.yml` whenever a `mymdc#...` argument is used in a variable and use them to retrieve mymdc fields (currently just the sample and run type).
European-XFEL · Mar 18, 2024 · 43ca406 · 43ca406
1 parent 255ac80
commit 43ca406
Show file tree

Hide file tree

Showing 5 changed files with 144 additions and 9 deletions.
diff --git a/damnit/ctxsupport/ctxrunner.py b/damnit/ctxsupport/ctxrunner.py
@@ -34,6 +34,7 @@
 log = logging.getLogger(__name__)
 
 THUMBNAIL_SIZE = 300 # px
+MYMDC_TIMEOUT = 10 # seconds
 
 
 # More specific Python types beyond what HDF5/NetCDF4 know about, so we can
@@ -45,6 +46,45 @@ class DataType(Enum):
     Timestamp = "timestamp"
 
 
+def get_run_info(run, run_no, proposal):
+    import yaml
+    import requests
+
+    proposal_path = get_proposal_path(run)
+
+    with open(proposal_path / "usr/mymdc-credentials.yml") as f:
+        document = yaml.safe_load(f)
+        token = document["token"]
+        server = document["server"]
+
+    headers={ "X-API-key": token }
+    run_res = requests.get(f"{server}/api/mymdc/proposals/by_number/{proposal}/runs/{run_no}",
+                           headers=headers, timeout=MYMDC_TIMEOUT).json()
+    if len(run_res["runs"]) == 0:
+        raise RuntimeError(f"Couldn't get run information from mymdc for p{proposal}, r{run_no}")
+
+    run_info = run_res["runs"][0]
+
+    return headers, server, run_info
+
+def get_sample(run, run_no, proposal):
+    import requests
+
+    headers, server, run_info = get_run_info(run, run_no, proposal)
+    sample_id = run_info["sample_id"]
+    sample_res = requests.get(f"{server}/api/mymdc/samples/{sample_id}",
+                              headers=headers, timeout=MYMDC_TIMEOUT).json()
+    return sample_res["name"]
+
+def get_run_type(run, run_no, proposal):
+    import requests
+
+    headers, server, run_info = get_run_info(run, run_no, proposal)
+    experiment_id = run_info["experiment_id"]
+    run_type_res = requests.get(f"{server}/api/mymdc/experiments/{experiment_id}",
+                                headers=headers, timeout=MYMDC_TIMEOUT).json()
+    return run_type_res["name"]
+
 class ContextFileErrors(RuntimeError):
     def __init__(self, problems):
         self.problems = problems
@@ -107,6 +147,13 @@ def check(self):
                 f"These Variables have duplicate titles between them: {', '.join(bad_variables)}"
             )
 
+        # Check that all mymdc dependencies are valid
+        for name, var in self.vars.items():
+            mymdc_args = var.arg_dependencies("mymdc#")
+            for arg_name, annotation in mymdc_args.items():
+                if annotation not in ["sample", "run_type"]:
+                    problems.append(f"Argument '{arg_name}' of variable '{name}' has an invalid MyMdC dependency: '{annotation}'")
+
         if problems:
             raise ContextFileErrors(problems)
 
@@ -221,6 +268,14 @@ def execute(self, run_data, run_number, proposal, input_vars) -> 'Results':
                         elif param.default is inspect.Parameter.empty:
                             missing_input.append(inp_name)
 
+                    # Mymdc fields
+                    elif annotation.startswith("mymdc#"):
+                        mymdc_field = annotation.removeprefix("mymdc#")
+                        if mymdc_field == "sample":
+                            kwargs[arg_name] = get_sample(run_data, run_number, proposal)
+                        elif mymdc_field == "run_type":
+                            kwargs[arg_name] = get_run_type(run_data, run_number, proposal)
+
                     elif annotation == "meta#run_number":
                         kwargs[arg_name] = run_number
                     elif annotation == "meta#proposal":
@@ -342,7 +397,7 @@ def get_proposal_path(xd_run):
     files = [f.filename for f in xd_run.files]
     p = Path(files[0])
 
-    return Path(*p.parts[:7])
+    return Path(*p.parts[:-3])
 
 
 def add_to_h5_file(path) -> h5py.File:

diff --git a/damnit/gui/editor.py b/damnit/gui/editor.py
@@ -99,12 +99,13 @@ def test_context(self, db, db_dir):
         out_buffer = StringIO()
         reporter = Reporter(out_buffer, out_buffer)
         pyflakes_check(self.text(), "<ctx>", reporter)
-        # Disgusting hack to avoid getting warnings for "var#foo" and "meta#foo"
-        # type annotations. This needs some tweaking to avoid missing real
-        # errors.
+        # Disgusting hack to avoid getting warnings for "var#foo", "meta#foo",
+        # and "mymdc#foo" type annotations. This needs some tweaking to avoid
+        # missing real errors.
         pyflakes_output = "\n".join([line for line in out_buffer.getvalue().split("\n")
                                      if not line.endswith("undefined name 'var'") \
-                                     and not line.endswith("undefined name 'meta'")])
+                                     and not line.endswith("undefined name 'meta'") \
+                                     and not line.endswith("undefined name 'mymdc'")])
 
         if len(pyflakes_output) > 0:
             return ContextTestResult.WARNING, pyflakes_output

diff --git a/docs/backend.md b/docs/backend.md
@@ -84,6 +84,12 @@ arguments if they have the right _annotations_:
 - `meta#proposal_dir`: The root
   [Path](https://docs.python.org/3/library/pathlib.html) to the current
   proposal.
+- `mymdc#sample`: The sample from myMdC.
+- `mymdc#run_type`: The run type from myMdC.
+
+!!! warning
+    The myMdC integration requires a special token to work properly, please
+    contact the DA group if you would like to use this for your experiment.
 
 You can also use annotations to express a dependency between `Variable`'s using
 the `var#<name>` annotation:
@@ -196,6 +202,8 @@ The environment *must* have these dependencies installed for DAMNIT to work:
 
 - `extra_data`
 - `scipy`
+- `pyyaml`
+- `requests`
 
 ## Managing the backend
 The backend is a process running under [Supervisor](http://supervisord.org/). In

diff --git a/pyproject.toml b/pyproject.toml
@@ -20,9 +20,11 @@ dependencies = [
     "numpy",
     "pandas<2",
     "xarray",
+    "requests",
     "scipy",
     "supervisor",
     "termcolor",
+    "pyyaml"
 ]
 
 [project.optional-dependencies]
@@ -34,7 +36,6 @@ gui = [
     "PyQt5",
     "pyflakes",  # for checking context file in editor
     "QScintilla==2.13",
-    "requests",
     "tabulate",  # used in pandas to make markdown tables (for Zulip)
 ]
 test = [

diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -6,10 +6,12 @@
 import textwrap
 import subprocess
 import configparser
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import h5py
+import yaml
 import pytest
+import requests
 import numpy as np
 import xarray as xr
 
@@ -141,6 +143,31 @@ def bar(run, foo: "var#foo"):
     # on a proc variable.
     assert var_promotion_ctx.vars["bar"].data == RunData.PROC
 
+    # Test depending on mymdc fields
+    good_mymdc_code = """
+    from damnit.context import Variable
+
+    @Variable(title="foo")
+    def foo(run, sample: "mymdc#sample", run_type: "mymdc#run_type"):
+        return 42
+    """
+    # This should not raise an exception
+    mkcontext(good_mymdc_code).check()
+
+    bad_mymdc_code = """
+    from damnit.context import Variable
+
+    @Variable(title="foo")
+    def foo(run, quux: "mymdc#quux"):
+        return 42
+    """
+    ctx = mkcontext(bad_mymdc_code)
+
+    # This should raise an exception because it's using an unsupported mymdc
+    # field.
+    with pytest.raises(ContextFileErrors):
+        ctx.check()
+
 def run_ctx_helper(context, run, run_number, proposal, caplog, input_vars=None):
     # Track all error messages during creation. This is necessary because a
     # variable that throws an error will be logged by Results, the exception
@@ -318,6 +345,51 @@ def dataset(run):
     with h5py.File(results_hdf5_path) as f:
         assert f[".reduced/dataset"].asstr()[()].startswith("Dataset")
 
+    # Test getting mymdc fields
+    mymdc_code = """
+    from damnit_ctx import Variable
+
+    @Variable(title="Sample")
+    def sample(run, x: "mymdc#sample"):
+        return x
+
+    @Variable(title="Run type")
+    def run_type(run, x: "mymdc#run_type"):
+        return x
+    """
+    mymdc_ctx = mkcontext(mymdc_code)
+
+    # Create some mock credentials and set the mock_run files to appear to be
+    # under `tmp_path`.
+    (tmp_path / "usr").mkdir()
+    with open(tmp_path / "usr/mymdc-credentials.yml", "w") as f:
+        yaml.dump({
+            "token": "foo",
+            "server": "https://out.xfel.eu/metadata"
+        }, f)
+    mock_run.files = [MagicMock(filename=tmp_path / "raw/r0001/RAW-R0004-DA03-S00000.h5")]
+
+    # Helper function to mock requests.get() for different endpoints
+    def mock_get(url, headers, timeout):
+        assert headers["X-API-key"] == "foo"
+
+        if "proposals/by_number" in url:
+            result = dict(runs=[dict(sample_id=1, experiment_id=1)])
+        elif "samples" in url:
+            result = dict(name="mithril")
+        elif "experiments" in url:
+            result = dict(name="alchemy")
+
+        response = MagicMock()
+        response.json.return_value = result
+        return response
+
+    # Execute the context file and check the results
+    with patch.object(requests, "get", side_effect=mock_get):
+        results = results_create(mymdc_ctx)
+    assert results.data["sample"] == "mithril"
+    assert results.data["run_type"] == "alchemy"
+
 def test_results_bad_obj(mock_run, tmp_path):
     # Test returning an object we can't save in HDF5
     bad_obj_code = """
@@ -333,8 +405,6 @@ def bad(run):
     """
     bad_obj_ctx = mkcontext(bad_obj_code)
     results = bad_obj_ctx.execute(mock_run, 1000, 123, {})
-    print(f"{results.data=}")
-    print(f"{results.reduced=}")
     results_hdf5_path = tmp_path / 'results.h5'
     results.save_hdf5(results_hdf5_path)
     with h5py.File(results_hdf5_path) as f: