|
29 | 29 | import tempfile
|
30 | 30 | import time
|
31 | 31 | import yaml
|
| 32 | +import zipfile |
32 | 33 |
|
33 | 34 | from astropy.io import fits
|
34 | 35 | import boto3
|
@@ -267,12 +268,33 @@ def load_raw_to_temp(temp_dir, ref_dict, pool=None):
|
267 | 268 | -------
|
268 | 269 | result : `dict`
|
269 | 270 | A dictionary with:
|
270 |
| - - "mode": "fits-serial" or "fits-parallel" |
| 271 | + - "mode": "zip", "fits-serial", or "fits-parallel" |
271 | 272 | - "async_result": `multiprocessing.AsyncResult` if multiprocessing is used, else `None`.
|
| 273 | +
|
| 274 | + Notes |
| 275 | + ----- |
| 276 | + A data butler repo can store raw data in two formats: one fits file |
| 277 | + for each detector, or one zip file for each exposure containing all |
| 278 | + detector fits files. This function assumes that either all refs point |
| 279 | + to one same zip file, or each ref is its own fits file. For zip, |
| 280 | + extract all files to the temporary folder. Either way, the temporary |
| 281 | + folder will be loaded with detector-level fits files. |
272 | 282 | """
|
273 | 283 | if not ref_dict:
|
274 | 284 | raise ValueError("ref_dict is empty")
|
275 | 285 |
|
| 286 | + uri = next(iter(ref_dict.values())).primaryURI |
| 287 | + # Determine whether the detector data is stored as zip file |
| 288 | + if uri.fragment and (uri.getExtension() == ".zip"): |
| 289 | + _log.info(f"Extracting zip file {uri.basename()}") |
| 290 | + with uri.open("rb") as fd: |
| 291 | + with zipfile.ZipFile(fd) as zf: |
| 292 | + zf.extractall(temp_dir) |
| 293 | + return { |
| 294 | + "mode": "zip", |
| 295 | + "async_result": None, |
| 296 | + } |
| 297 | + |
276 | 298 | if pool is None:
|
277 | 299 | _log.warning("Multiprocessing pool is not provided; fallback to serial.")
|
278 | 300 | for r in ref_dict:
|
@@ -312,9 +334,14 @@ def upload_images(pool, temp_dir, group_id, ref_dict):
|
312 | 334 | # Non-blocking assignment lets us upload during the next exposure.
|
313 | 335 | # Can't time these tasks directly, but the blocking equivalent took
|
314 | 336 | # 12-20 s depending on tuning, or less than a single exposure.
|
| 337 | + args = [] |
| 338 | + for ref in ref_dict: |
| 339 | + uri = ref_dict[ref].primaryURI |
| 340 | + filename = uri.fragment.partition("=")[-1] if uri.fragment else uri.basename() |
| 341 | + args.append((temp_dir, group_id, ref, filename)) |
315 | 342 | pool.starmap_async(
|
316 | 343 | _upload_one_image,
|
317 |
| - [(temp_dir, group_id, r, ref_dict[r].primaryURI.basename()) for r in ref_dict], |
| 344 | + args, |
318 | 345 | error_callback=_log.exception,
|
319 | 346 | chunksize=5 # Works well across a broad range of # processes
|
320 | 347 | )
|
|
0 commit comments