Skip to content

Commit

Permalink
Add an integration with mymdc
Browse files Browse the repository at this point in the history
In a nutshell, DAMNIT will now look for mymdc credentials in
`usr/mymdc-credentials.yml` whenever a `mymdc#...` argument is used in a
variable and use them to retrieve mymdc fields (currently just the sample and
run type).
  • Loading branch information
JamesWrigley committed Mar 18, 2024
1 parent 255ac80 commit 43ca406
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 9 deletions.
57 changes: 56 additions & 1 deletion damnit/ctxsupport/ctxrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
log = logging.getLogger(__name__)

THUMBNAIL_SIZE = 300 # px
MYMDC_TIMEOUT = 10 # seconds


# More specific Python types beyond what HDF5/NetCDF4 know about, so we can
Expand All @@ -45,6 +46,45 @@ class DataType(Enum):
Timestamp = "timestamp"


def get_run_info(run, run_no, proposal):
import yaml
import requests

proposal_path = get_proposal_path(run)

with open(proposal_path / "usr/mymdc-credentials.yml") as f:
document = yaml.safe_load(f)
token = document["token"]
server = document["server"]

headers={ "X-API-key": token }
run_res = requests.get(f"{server}/api/mymdc/proposals/by_number/{proposal}/runs/{run_no}",
headers=headers, timeout=MYMDC_TIMEOUT).json()
if len(run_res["runs"]) == 0:
raise RuntimeError(f"Couldn't get run information from mymdc for p{proposal}, r{run_no}")

run_info = run_res["runs"][0]

return headers, server, run_info

def get_sample(run, run_no, proposal):
import requests

headers, server, run_info = get_run_info(run, run_no, proposal)
sample_id = run_info["sample_id"]
sample_res = requests.get(f"{server}/api/mymdc/samples/{sample_id}",
headers=headers, timeout=MYMDC_TIMEOUT).json()
return sample_res["name"]

def get_run_type(run, run_no, proposal):
import requests

headers, server, run_info = get_run_info(run, run_no, proposal)
experiment_id = run_info["experiment_id"]
run_type_res = requests.get(f"{server}/api/mymdc/experiments/{experiment_id}",
headers=headers, timeout=MYMDC_TIMEOUT).json()
return run_type_res["name"]

class ContextFileErrors(RuntimeError):
def __init__(self, problems):
self.problems = problems
Expand Down Expand Up @@ -107,6 +147,13 @@ def check(self):
f"These Variables have duplicate titles between them: {', '.join(bad_variables)}"
)

# Check that all mymdc dependencies are valid
for name, var in self.vars.items():
mymdc_args = var.arg_dependencies("mymdc#")
for arg_name, annotation in mymdc_args.items():
if annotation not in ["sample", "run_type"]:
problems.append(f"Argument '{arg_name}' of variable '{name}' has an invalid MyMdC dependency: '{annotation}'")

if problems:
raise ContextFileErrors(problems)

Expand Down Expand Up @@ -221,6 +268,14 @@ def execute(self, run_data, run_number, proposal, input_vars) -> 'Results':
elif param.default is inspect.Parameter.empty:
missing_input.append(inp_name)

# Mymdc fields
elif annotation.startswith("mymdc#"):
mymdc_field = annotation.removeprefix("mymdc#")
if mymdc_field == "sample":
kwargs[arg_name] = get_sample(run_data, run_number, proposal)
elif mymdc_field == "run_type":
kwargs[arg_name] = get_run_type(run_data, run_number, proposal)

elif annotation == "meta#run_number":
kwargs[arg_name] = run_number
elif annotation == "meta#proposal":
Expand Down Expand Up @@ -342,7 +397,7 @@ def get_proposal_path(xd_run):
files = [f.filename for f in xd_run.files]
p = Path(files[0])

return Path(*p.parts[:7])
return Path(*p.parts[:-3])


def add_to_h5_file(path) -> h5py.File:
Expand Down
9 changes: 5 additions & 4 deletions damnit/gui/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,13 @@ def test_context(self, db, db_dir):
out_buffer = StringIO()
reporter = Reporter(out_buffer, out_buffer)
pyflakes_check(self.text(), "<ctx>", reporter)
# Disgusting hack to avoid getting warnings for "var#foo" and "meta#foo"
# type annotations. This needs some tweaking to avoid missing real
# errors.
# Disgusting hack to avoid getting warnings for "var#foo", "meta#foo",
# and "mymdc#foo" type annotations. This needs some tweaking to avoid
# missing real errors.
pyflakes_output = "\n".join([line for line in out_buffer.getvalue().split("\n")
if not line.endswith("undefined name 'var'") \
and not line.endswith("undefined name 'meta'")])
and not line.endswith("undefined name 'meta'") \
and not line.endswith("undefined name 'mymdc'")])

if len(pyflakes_output) > 0:
return ContextTestResult.WARNING, pyflakes_output
Expand Down
8 changes: 8 additions & 0 deletions docs/backend.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ arguments if they have the right _annotations_:
- `meta#proposal_dir`: The root
[Path](https://docs.python.org/3/library/pathlib.html) to the current
proposal.
- `mymdc#sample`: The sample from myMdC.
- `mymdc#run_type`: The run type from myMdC.

!!! warning
The myMdC integration requires a special token to work properly, please
contact the DA group if you would like to use this for your experiment.

You can also use annotations to express a dependency between `Variable`'s using
the `var#<name>` annotation:
Expand Down Expand Up @@ -196,6 +202,8 @@ The environment *must* have these dependencies installed for DAMNIT to work:

- `extra_data`
- `scipy`
- `pyyaml`
- `requests`

## Managing the backend
The backend is a process running under [Supervisor](http://supervisord.org/). In
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ dependencies = [
"numpy",
"pandas<2",
"xarray",
"requests",
"scipy",
"supervisor",
"termcolor",
"pyyaml"
]

[project.optional-dependencies]
Expand All @@ -34,7 +36,6 @@ gui = [
"PyQt5",
"pyflakes", # for checking context file in editor
"QScintilla==2.13",
"requests",
"tabulate", # used in pandas to make markdown tables (for Zulip)
]
test = [
Expand Down
76 changes: 73 additions & 3 deletions tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import textwrap
import subprocess
import configparser
from unittest.mock import patch
from unittest.mock import MagicMock, patch

import h5py
import yaml
import pytest
import requests
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -141,6 +143,31 @@ def bar(run, foo: "var#foo"):
# on a proc variable.
assert var_promotion_ctx.vars["bar"].data == RunData.PROC

# Test depending on mymdc fields
good_mymdc_code = """
from damnit.context import Variable
@Variable(title="foo")
def foo(run, sample: "mymdc#sample", run_type: "mymdc#run_type"):
return 42
"""
# This should not raise an exception
mkcontext(good_mymdc_code).check()

bad_mymdc_code = """
from damnit.context import Variable
@Variable(title="foo")
def foo(run, quux: "mymdc#quux"):
return 42
"""
ctx = mkcontext(bad_mymdc_code)

# This should raise an exception because it's using an unsupported mymdc
# field.
with pytest.raises(ContextFileErrors):
ctx.check()

def run_ctx_helper(context, run, run_number, proposal, caplog, input_vars=None):
# Track all error messages during creation. This is necessary because a
# variable that throws an error will be logged by Results, the exception
Expand Down Expand Up @@ -318,6 +345,51 @@ def dataset(run):
with h5py.File(results_hdf5_path) as f:
assert f[".reduced/dataset"].asstr()[()].startswith("Dataset")

# Test getting mymdc fields
mymdc_code = """
from damnit_ctx import Variable
@Variable(title="Sample")
def sample(run, x: "mymdc#sample"):
return x
@Variable(title="Run type")
def run_type(run, x: "mymdc#run_type"):
return x
"""
mymdc_ctx = mkcontext(mymdc_code)

# Create some mock credentials and set the mock_run files to appear to be
# under `tmp_path`.
(tmp_path / "usr").mkdir()
with open(tmp_path / "usr/mymdc-credentials.yml", "w") as f:
yaml.dump({
"token": "foo",
"server": "https://out.xfel.eu/metadata"
}, f)
mock_run.files = [MagicMock(filename=tmp_path / "raw/r0001/RAW-R0004-DA03-S00000.h5")]

# Helper function to mock requests.get() for different endpoints
def mock_get(url, headers, timeout):
assert headers["X-API-key"] == "foo"

if "proposals/by_number" in url:
result = dict(runs=[dict(sample_id=1, experiment_id=1)])
elif "samples" in url:
result = dict(name="mithril")
elif "experiments" in url:
result = dict(name="alchemy")

response = MagicMock()
response.json.return_value = result
return response

# Execute the context file and check the results
with patch.object(requests, "get", side_effect=mock_get):
results = results_create(mymdc_ctx)
assert results.data["sample"] == "mithril"
assert results.data["run_type"] == "alchemy"

def test_results_bad_obj(mock_run, tmp_path):
# Test returning an object we can't save in HDF5
bad_obj_code = """
Expand All @@ -333,8 +405,6 @@ def bad(run):
"""
bad_obj_ctx = mkcontext(bad_obj_code)
results = bad_obj_ctx.execute(mock_run, 1000, 123, {})
print(f"{results.data=}")
print(f"{results.reduced=}")
results_hdf5_path = tmp_path / 'results.h5'
results.save_hdf5(results_hdf5_path)
with h5py.File(results_hdf5_path) as f:
Expand Down

0 comments on commit 43ca406

Please sign in to comment.