@@ -130,7 +130,7 @@ def upload_raw_dataset(
130130 file_name = scicat_metadata .get ("/measurement/sample/file_name" )
131131 description = build_search_terms (file_name )
132132 appended_keywords = description .split ()
133-
133+ logger . info ( f"email: { scicat_metadata . get ( '/measurement/sample/experimenter/email' ) } " )
134134 dataset = RawDataset (
135135 owner = scicat_metadata .get ("/measurement/sample/experiment/pi" ) or "Unknown" ,
136136 contactEmail = clean_email (scicat_metadata .get ("/measurement/sample/experimenter/email" ))
@@ -266,14 +266,58 @@ def _get_data_sample(file, sample_size=10):
266266 return data_sample
267267
268268
269- def clean_email (email : str ):
270- if email :
271- if not email or email .upper () == "NONE" :
272- # this is a brutal case, but the beamline sometimes puts in "None" and
273- # the new scicat backend hates that.
274- return UNKNOWN_EMAIL
275- return email .replace (" " , "" ).replace ("," , "" ).replace ("'" , "" )
276- return None
269+ def clean_email (email : any ) -> str :
270+ """
271+ Clean the provided email address.
272+
273+ This function ensures that the input is a valid email address.
274+ It returns a default email if:
275+ - The input is not a string,
276+ - The input is empty after stripping,
277+ - The input equals "NONE" (case-insensitive), or
278+ - The input does not contain an "@" symbol.
279+
280+ Parameters
281+ ----------
282+ email : any
283+ The raw email value extracted from metadata.
284+
285+ Returns
286+ -------
287+ str
288+ A cleaned email address if valid, otherwise the default unknown email.
289+
290+ Example
291+ -------
292+ >>> clean_email(" user@example.com ")
293+ 'user@example.com'
294+ >>> clean_email("garbage")
295+ 'unknown@example.com'
296+ >>> clean_email(None)
297+ 'unknown@example.com'
298+ """
299+ # Check that the email is a string
300+ if not isinstance (email , str ):
301+ logger .info (f"Input email is not a string. Returning { UNKNOWN_EMAIL } " )
302+ return UNKNOWN_EMAIL
303+
304+ # Remove surrounding whitespace
305+ cleaned = email .strip ()
306+
307+ # Fallback if the email is empty, equals "NONE", or lacks an "@" symbol
308+ if not cleaned or cleaned .upper () == "NONE" or "@" not in cleaned :
309+ logger .info (f"Invalid email address. Returning { UNKNOWN_EMAIL } " )
310+ return UNKNOWN_EMAIL
311+
312+ # Optionally, remove spaces from inside the email (typically invalid in an email address)
313+ cleaned = cleaned .replace (" " , "" )
314+
315+ # Final verification: ensure that the cleaned email contains "@".
316+ if "@" not in cleaned :
317+ logger .info (f"Invalid email address: { cleaned } . Returning { UNKNOWN_EMAIL } " )
318+ return UNKNOWN_EMAIL
319+
320+ return cleaned
277321
278322
279323scicat_metadata_keys = [
@@ -351,16 +395,32 @@ def clean_email(email: str):
351395]
352396
353397
398+ def test_ingest_raw_tomo () -> bool :
399+ from orchestration .flows .scicat .ingest import ingest_dataset
400+ TOMO_INGESTOR_MODULE = "orchestration.flows.bl832.ingest_tomo832"
401+ file_path = "examples/tomo_scan_no_email.h5"
402+ print (f"Ingesting { file_path } with { TOMO_INGESTOR_MODULE } " )
403+ try :
404+ ingest_dataset (file_path , TOMO_INGESTOR_MODULE )
405+ return True
406+ except Exception as e :
407+ print (f"SciCat ingest failed with { e } " )
408+ return False
409+
410+
354411if __name__ == "__main__" :
355- ingest (
356- ScicatClient (
357- # "http://localhost:3000/api/v3",
358- os .environ .get ("SCICAT_API_URL" ),
359- None ,
360- os .environ .get ("SCICAT_INGEST_USER" ),
361- os .environ .get ("SCICAT_INGEST_PASSWORD" ),
362- ),
363- "/Users/dylanmcreynolds/data/beamlines/8.3.2/raw/20231013_065251_MSB_Book1_Proj77_Cell3_Gen2_Li_R2G_FastCharge_DuringCharge0.h5" ,
364- [],
365- log_level = "DEBUG" ,
366- )
412+ # ingest(
413+ # ScicatClient(
414+ # # "http://localhost:3000/api/v3",
415+ # os.environ.get("SCICAT_API_URL"),
416+ # None,
417+ # os.environ.get("SCICAT_INGEST_USER"),
418+ # os.environ.get("SCICAT_INGEST_PASSWORD"),
419+ # ),
420+ # "/Users/dylanmcreynolds/data/beamlines/8.3.2/raw/"
421+ # "20231013_065251_MSB_Book1_Proj77_Cell3_Gen2_Li_R2G_FastCharge_DuringCharge0.h5",
422+ # [],
423+ # log_level="DEBUG",
424+ # )
425+
426+ test_ingest_raw_tomo ()
0 commit comments