Skip to content

Commit 258f1d7

Browse files
tomolopolisTom Searle
andcommitted
feat(medcat-trainer): improve client api, not importing empty projects (#158)
* feat(medcat-trainer): improve client api, not importing empty projects --------- Co-authored-by: Tom Searle <[email protected]>
1 parent 957d551 commit 258f1d7

File tree

7 files changed

+535
-150
lines changed

7 files changed

+535
-150
lines changed

medcat-trainer/client/mctclient.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ class MCTConceptDB(MCTObj):
6565

6666
def __post_init__(self):
6767
if self.name is not None:
68-
if not self.name[0].islower():
69-
raise ValueError("Name must start with a lowercase letter")
7068
if not self.name.replace('_', '').replace('-', '').isalnum():
7169
raise ValueError("Name must contain only alphanumeric characters and underscores")
7270

@@ -90,26 +88,24 @@ def __str__(self):
9088

9189

9290
@dataclass
93-
class MCTModelPack(MCTObj):
94-
"""A model pack in the MedCATTrainer instance.
91+
class MCTMetaTask(MCTObj):
92+
"""A meta task in the MedCATTrainer instance.
9593
9694
Attributes:
97-
name (str): The name of the model pack.
98-
model_pack_zip (str): The path to the model pack zip file, should be a <modelpack_name>.zip file.
95+
name (str): The name of the meta task.
9996
"""
10097
name: str=None
101-
model_pack_zip: str=None
10298

10399
def __str__(self):
104-
return f'{self.id} : {self.name} \t {self. model_pack_zip}'
100+
return f'{self.id} : {self.name}'
105101

106102

107103
@dataclass
108-
class MCTMetaTask(MCTObj):
109-
"""A meta task in the MedCATTrainer instance.
104+
class MCTRelTask(MCTObj):
105+
"""A relation extraction task in the MedCATTrainer instance.
110106
111107
Attributes:
112-
name (str): The name of the meta task.
108+
name (str): The name of the relation extraction task.
113109
"""
114110
name: str=None
115111

@@ -118,16 +114,22 @@ def __str__(self):
118114

119115

120116
@dataclass
121-
class MCTRelTask(MCTObj):
122-
"""A relation extraction task in the MedCATTrainer instance.
117+
class MCTModelPack(MCTObj):
118+
"""A model pack in the MedCATTrainer instance.
123119
124120
Attributes:
125-
name (str): The name of the relation extraction task.
121+
name (str): The name of the model pack.
122+
model_pack_zip (str): The path to the model pack zip file, should be a <modelpack_name>.zip file.
126123
"""
127124
name: str=None
125+
model_pack_zip: str=None
126+
concept_db: MCTConceptDB=None
127+
vocab: MCTVocab=None
128+
meta_cats: List[MCTMetaTask]=None
128129

129130
def __str__(self):
130-
return f'{self.id} : {self.name}'
131+
return f'{self.id} : {self.name} \t {self. model_pack_zip}'
132+
131133

132134

133135
@dataclass
@@ -520,7 +522,11 @@ def get_model_packs(self) -> List[MCTModelPack]:
520522
List[MCTModelPack]: A list of all MedCAT model packs in the MedCATTrainer instance
521523
"""
522524
resp = json.loads(requests.get(f'{self.server}/api/modelpacks/', headers=self.headers).text)['results']
523-
mct_model_packs = [MCTModelPack(id=mp['id'], name=mp['name'], model_pack_zip=mp['model_pack']) for mp in resp]
525+
mct_model_packs = [MCTModelPack(id=mp['id'], name=mp['name'], model_pack_zip=mp['model_pack'],
526+
concept_db=MCTConceptDB(id=mp['concept_db']),
527+
vocab=MCTVocab(id=mp['vocab']),
528+
meta_cats=[MCTMetaTask(id=mt) for mt in mp['meta_cats']])
529+
for mp in resp]
524530
return mct_model_packs
525531

526532
def get_meta_tasks(self) -> List[MCTMetaTask]:
@@ -595,24 +601,41 @@ def get_project_annos(self, projects: List[MCTProject]):
595601
def upload_projects_export(self, projects: Dict[str, Any],
596602
cdb: Union[MCTConceptDB, str]=None,
597603
vocab: Union[MCTVocab, str]=None,
598-
modelpack: Union[MCTModelPack, str]=None):
604+
modelpack: Union[MCTModelPack, str]=None,
605+
import_project_name_suffix: str=' IMPORTED',
606+
cdb_search_filter: Union[MCTConceptDB, str]=None,
607+
members: Union[List[MCTUser], List[str]]=None,
608+
set_validated_docs: bool=False):
599609
"""Upload Trainer export as a list of projects to a MedCATTrainer instance.
600610
601611
Args:
602612
projects (List[MCTProject]): A list of projects to upload
603613
cdb (Union[MCTConceptDB, str]): The concept database to be used in the project - CDB name or the MCTCDB Object
604614
vocab (Union[MCTVocab, str]): The vocabulary to be used in the project - Vocab name or the MCTVocab Object
605615
modelpack (Union[MCTModelPack, str]): The model pack to be used in the project - ModelPack name or the MCTModelPack Object
616+
import_project_name_suffix (str): The suffix to be added to the project name
617+
cdb_search_filter (Union[MCTConceptDB, str]): The concept database to be used in the project - CDB name or the MCTCDB Object
618+
members (Union[List[MCTUser], List[str]]): The annotators for the project - List of MCTUser objects or list of user names
619+
set_validated_docs (bool): Whether to set the validated documents, e.g. their annotation submit status.
606620
"""
607621
if isinstance(cdb, str):
608622
cdb = [c for c in self.get_concept_dbs() if c.name == cdb].pop()
609623
if isinstance(vocab, str):
610624
vocab = [v for v in self.get_vocabs() if v.name == vocab].pop()
611625
if isinstance(modelpack, str):
612626
modelpack = [m for m in self.get_model_packs() if m.name == modelpack].pop()
627+
if isinstance(cdb_search_filter, str):
628+
cdb_search_filter = [c for c in self.get_concept_dbs() if c.name == cdb_search_filter].pop()
629+
if members and all(isinstance(m, str) for m in members):
630+
members = [m for m in self.get_users() if m.username in members]
613631

614632
payload = {
615-
'exported_projects': projects
633+
'exported_projects': projects,
634+
'project_name_suffix': import_project_name_suffix,
635+
'cdb_search_filter': cdb_search_filter.id if cdb_search_filter else None,
636+
'members': [m.id for m in members] if members else None,
637+
'import_project_name_suffix': import_project_name_suffix,
638+
'set_validated_docs': set_validated_docs,
616639
}
617640

618641
if cdb and vocab:

0 commit comments

Comments
 (0)