@@ -182,6 +182,98 @@ def _get_seperator_symbol_from_file_path(file):
182
182
return None
183
183
184
184
185
+ def _is_check_valid_extension_slot (slot_name , meta ):
186
+ extension_definitions = meta .get ("extension_definitions" , [])
187
+ return any (
188
+ "property" in entry and entry .get ("slot_name" ) == slot_name
189
+ for entry in extension_definitions
190
+ )
191
+
192
+
193
+ def _is_irregular_metadata (metadata_list : List [Dict ]):
194
+ fail_metadata = False
195
+ for m in metadata_list :
196
+ for key in m :
197
+ if key not in _get_sssom_schema_object ().mapping_set_slots :
198
+ if not _is_check_valid_extension_slot (key , m ):
199
+ logging .warning (
200
+ f"Metadata key '{ key } ' is not a standard SSSOM mapping set metadata field. See "
201
+ f"https://mapping-commons.github.io/sssom/spec-model/#non-standard-slots on how to "
202
+ f"specify additional, non-standard fields in a SSSOM file."
203
+ )
204
+ fail_metadata = True
205
+ return fail_metadata
206
+
207
+
208
+ def _check_redefined_builtin_prefixes (sssom_metadata , meta , prefix_map ):
209
+
210
+ # There are three ways in which prefixes can be communicated, so we will check all of them
211
+ # This is a bit overly draconian, as in the end, only the highest priority one gets picked
212
+ # But since this only constitues a (logging) warning, I think its worth reporting
213
+ builtin_converter = _get_built_in_prefix_map ()
214
+ sssom_metadata_converter = _get_converter_pop_replace_curie_map (sssom_metadata )
215
+ meta_converter = _get_converter_pop_replace_curie_map (meta )
216
+ prefix_map_converter = ensure_converter (prefix_map , use_defaults = False )
217
+ is_valid_prefixes = True
218
+
219
+ for converter in [sssom_metadata_converter , meta_converter , prefix_map_converter ]:
220
+ for builtin_prefix , builtin_uri in builtin_converter .bimap .items ():
221
+ if builtin_prefix in converter .bimap :
222
+ if builtin_uri != converter .bimap [builtin_prefix ]:
223
+ logging .warning (
224
+ f"A built-in prefix ({ builtin_prefix } ) was provided, "
225
+ f"but the provided URI expansion ({ converter .bimap [builtin_prefix ]} ) does not correspond "
226
+ f"to the required URI expansion: { builtin_uri } . The prefix will be ignored."
227
+ )
228
+ is_valid_prefixes = False
229
+ # NOTE during refactor replace the following line by https://github.com/biopragmatics/curies/pull/136
230
+ reverse_bimap = {value : key for key , value in builtin_converter .bimap .items ()}
231
+ if builtin_uri in reverse_bimap :
232
+ if builtin_prefix != reverse_bimap [builtin_uri ]:
233
+ logging .warning (
234
+ f"A built-in URI namespace ({ builtin_uri } ) was used in (one of) the provided prefix map(s), "
235
+ f"but the provided prefix ({ reverse_bimap [builtin_uri ]} ) does not correspond to the "
236
+ f"standard prefix: { builtin_prefix } . The prefix will be ignored."
237
+ )
238
+ is_valid_prefixes = False
239
+ return is_valid_prefixes
240
+
241
+
242
+ def _fail_in_strict_parsing_mode (is_valid_built_in_prefixes , is_valid_metadata ):
243
+ report = ""
244
+ if not is_valid_built_in_prefixes :
245
+ report += "STRONG WARNING: The prefix map provided contains built-in prefixes that were redefined.+\n "
246
+ if not is_valid_metadata :
247
+ report += (
248
+ "STRONG WARNING: The metadata provided contains non-standard and undefined metadata.+\n "
249
+ )
250
+
251
+ if report :
252
+ raise ValueError (report )
253
+
254
+
255
+ def _get_converter_pop_replace_curie_map (sssom_metadata ):
256
+ """
257
+ Pop CURIE_MAP from sssom_metadata, process it, and restore it if it existed.
258
+
259
+ Args:
260
+ sssom_metadata (dict): The metadata dictionary.
261
+
262
+ Returns:
263
+ Converter: A Converter object created from the CURIE_MAP.
264
+ """
265
+ curie_map = sssom_metadata .pop (CURIE_MAP , {})
266
+
267
+ # Process the popped value
268
+ sssom_metadata_converter = Converter .from_prefix_map (curie_map )
269
+
270
+ # Reinsert CURIE_MAP if it was present
271
+ if curie_map :
272
+ sssom_metadata [CURIE_MAP ] = curie_map
273
+
274
+ return sssom_metadata_converter
275
+
276
+
185
277
def parse_sssom_table (
186
278
file_path : Union [str , Path , TextIO ],
187
279
prefix_map : ConverterHint = None ,
@@ -197,6 +289,12 @@ def parse_sssom_table(
197
289
if meta is None :
198
290
meta = {}
199
291
292
+ is_valid_built_in_prefixes = _check_redefined_builtin_prefixes (sssom_metadata , meta , prefix_map )
293
+ is_valid_metadata = _is_irregular_metadata ([sssom_metadata , meta ])
294
+
295
+ if kwargs .get ("strict" ):
296
+ _fail_in_strict_parsing_mode (is_valid_built_in_prefixes , is_valid_metadata )
297
+
200
298
# The priority order for combining prefix maps are:
201
299
# 1. Built-in prefix map
202
300
# 2. Internal prefix map inside the document
0 commit comments