Skip to content

Commit 6f4a317

Browse files
authored
feat: load login infos from configuration file (#4)
* feat: load login infos from configuration file * feat: added config load tests and code cleaning
1 parent 6822ec3 commit 6f4a317

File tree

7 files changed

+840
-27
lines changed

7 files changed

+840
-27
lines changed

README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,39 @@
11
# DataSHIELD Interface Python
22

33
This DataSHIELD Client Interface is a Python port of the original DataSHIELD Client Interface written in R ([DSI](https://github.com/datashield/DSI)). The provided interface can be implemented for accessing a data repository supporting the DataSHIELD infrastructure: controlled R commands to be executed on the server side are garanteeing that non disclosive information is returned to client side.
4+
5+
## Configuration
6+
7+
The search path for the DataSHIELD configuration file is the following:
8+
9+
1. User general location: `~/.config/datashield/config.yaml`
10+
2. Current project specific location: `./.datashield/config.yaml`
11+
12+
The configurations are merged: any existing entry is replaced by the new one (for instance server names must be unique).
13+
14+
The format of the DataSHIELD configuration file is:
15+
16+
```yaml
17+
servers:
18+
- name: server1
19+
url: https://opal-demo.obiba.org
20+
user: dsuser
21+
password: P@ssw0rd
22+
- name: server2
23+
url: https://opal.example.org
24+
token: your-access-token-here
25+
profile: default
26+
- name: server3
27+
url: https://study.example.org/opal
28+
user: dsuser
29+
password: P@ssw0rd
30+
profile: custom
31+
driver: datashield_opal.OpalDriver
32+
```
33+
34+
Each server entry in the list must have:
35+
- `name`: Unique identifier for the server
36+
- `url`: The server URL
37+
- Authentication: Either `user` and `password`, or `token` (recommended)
38+
- `profile`: DataSHIELD profile name (optional, defaults to "default")
39+
- `driver`: Connection driver class name (optional, defaults to "datashield_opal.OpalDriver")

datashield/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datashield.interface import (
22
DSConnection as DSConnection,
3+
DSConfig as DSConfig,
34
DSLoginInfo as DSLoginInfo,
45
DSDriver as DSDriver,
56
DSError as DSError,

datashield/api.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
import logging
6-
from datashield.interface import DSLoginInfo, DSConnection, DSDriver, DSError
6+
from datashield.interface import DSConfig, DSLoginInfo, DSConnection, DSDriver, DSError
77
import time
88

99

@@ -12,8 +12,24 @@ class DSLoginBuilder:
1212
Helper class to formalize DataSHIELD login arguments for a set of servers.
1313
"""
1414

15-
def __init__(self):
15+
def __init__(self, names: list[str] = None):
16+
"""Create a builder, optionally loading login information from configuration files
17+
for the specified server names.
18+
19+
:param names: The list of server names to load from configuration files, if any. If not defined,
20+
no login information will be loaded from configuration files.
21+
"""
1622
self.items: list[DSLoginInfo] = []
23+
# load login information from configuration files, in order of precedence
24+
if names is not None and len(names) > 0:
25+
config = DSConfig.load()
26+
name_set = set(names)
27+
if config.servers:
28+
items = [x for x in config.servers if x.name in name_set]
29+
if len(items) == 0:
30+
logging.warning(f"No matching server names found in configuration for: {', '.join(names)}")
31+
else:
32+
self.items.extend(items)
1733

1834
def add(
1935
self,
@@ -46,7 +62,9 @@ def add(
4662
raise ValueError(f"Server name must be unique: {name}")
4763
if user is None and token is None:
4864
raise ValueError("Either user or token must be provided")
49-
self.items.append(DSLoginInfo(name, url, user, password, token, profile, driver))
65+
self.items.append(
66+
DSLoginInfo(name=name, url=url, user=user, password=password, token=token, profile=profile, driver=driver)
67+
)
5068
return self
5169

5270
def remove(self, name: str):
@@ -109,7 +127,7 @@ def open(self, restore: str = None, failSafe: bool = False) -> None:
109127
raise e
110128
if self.has_errors():
111129
for name in self.errors:
112-
print(f"Connection to {name} has failed")
130+
logging.error(f"Connection to {name} has failed")
113131

114132
def close(self, save: str = None) -> None:
115133
"""

datashield/interface.py

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,97 @@
33
"""
44

55
import importlib
6+
import logging
7+
import os
8+
import yaml
9+
from pydantic import BaseModel, Field, model_validator
610

11+
# Default configuration file paths to look for DataSHIELD login information, in order of precedence
12+
CONFIG_FILES = ["~/.config/datashield/config.yaml", "./.datashield/config.yaml"]
713

8-
class DSLoginInfo:
14+
15+
class DSLoginInfo(BaseModel):
916
"""
1017
Helper class with DataSHIELD login details.
1118
"""
1219

13-
def __init__(
14-
self,
15-
name: str,
16-
url: str,
17-
user: str = None,
18-
password: str = None,
19-
token: str = None,
20-
profile: str = "default",
21-
driver: str = "datashield_opal.OpalDriver",
22-
):
23-
self.items = []
24-
self.name = name
25-
self.url = url
26-
self.user = user
27-
self.password = password
28-
self.token = token
29-
self.profile = profile if profile is not None else "default"
30-
self.driver = driver if driver is not None else "datashield_opal.OpalDriver"
20+
name: str
21+
url: str
22+
user: str | None = None
23+
password: str | None = None
24+
token: str | None = None
25+
profile: str = "default"
26+
driver: str = "datashield_opal.OpalDriver"
27+
28+
model_config = {"extra": "forbid"}
29+
30+
@model_validator(mode="after")
31+
def validate_credentials(self) -> "DSLoginInfo":
32+
if self.user is None and self.token is None:
33+
raise ValueError("Either user or token must be provided")
34+
return self
35+
36+
37+
class DSConfig(BaseModel):
38+
"""
39+
Helper class with DataSHIELD configuration details.
40+
"""
41+
42+
servers: list[DSLoginInfo] = Field(default_factory=list)
43+
44+
model_config = {"extra": "forbid"}
45+
46+
@classmethod
47+
def load(cls) -> "DSConfig":
48+
"""
49+
Load the DataSHIELD configuration from the default configuration files.
50+
Each file must contain a list of servers with their login details.
51+
All readable configuration files listed in ``CONFIG_FILES`` are processed in
52+
order. Their configurations are merged, with servers identified by their
53+
``name`` field. If the same server name appears in multiple files, the
54+
definition from the later file in the list takes precedence and replaces
55+
the earlier one. Servers that are only present in earlier files are kept.
56+
57+
:return: The DataSHIELD configuration object
58+
"""
59+
merged_config = None
60+
for config_file in CONFIG_FILES:
61+
try:
62+
# check file exists and is readable, if not, silently ignore
63+
path = os.path.expanduser(config_file)
64+
if not os.path.exists(path):
65+
continue
66+
if not os.access(path, os.R_OK):
67+
continue
68+
config = cls.load_from_file(path)
69+
if merged_config is None:
70+
merged_config = config
71+
else:
72+
# merge servers by name, new ones replacing existing ones, and keep the rest of existing ones
73+
existing_servers = {x.name: x for x in merged_config.servers}
74+
for server in config.servers:
75+
existing_servers[server.name] = server
76+
merged_config.servers = list(existing_servers.values())
77+
except Exception:
78+
# log and ignore errors, e.g. file not found or invalid format
79+
logging.error(f"Failed to load login information from {config_file}")
80+
return merged_config if merged_config else cls()
81+
82+
@classmethod
83+
def load_from_file(cls, file: str) -> "DSConfig":
84+
"""
85+
Load the DataSHIELD configuration from a YAML file. The file must contain a list of servers with their login details.
86+
87+
:param file: The path to the YAML file containing the DataSHIELD configuration
88+
:return: The DataSHIELD configuration object
89+
"""
90+
with open(file) as f:
91+
config_data = yaml.safe_load(f)
92+
93+
if config_data is None:
94+
config_data = {}
95+
96+
return cls.model_validate(config_data)
3197

3298

3399
class DSResult:
@@ -409,7 +475,7 @@ def new_connection(cls, args: DSLoginInfo, restore: str = None) -> DSConnection:
409475
raise NotImplementedError("DSConnection function not available")
410476

411477
@classmethod
412-
def load_class(cls, name: str) -> any:
478+
def load_class(cls, name: str) -> type["DSDriver"]:
413479
"""
414480
Load a class from its fully qualified name (dot separated).
415481

pyproject.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "datashield"
3-
version = "0.2.0"
3+
version = "0.3.0"
44
description = "DataSHIELD Client Interface in Python."
55
authors = [
66
{name = "Yannick Marcon", email = "yannick.marcon@obiba.org"}
@@ -22,7 +22,10 @@ classifiers = [
2222
"Programming Language :: Python :: 3.12",
2323
"Programming Language :: Python :: 3.13",
2424
]
25-
dependencies = []
25+
dependencies = [
26+
"pydantic>=2.0",
27+
"PyYAML>=6.0",
28+
]
2629

2730
[project.optional-dependencies]
2831
test = [

0 commit comments

Comments
 (0)