diff --git a/.github/workflows/bank-compress-workflow.yml b/.github/workflows/bank-compress-workflow.yml index 186a2f90512..e3aa7f1fdcd 100644 --- a/.github/workflows/bank-compress-workflow.yml +++ b/.github/workflows/bank-compress-workflow.yml @@ -34,7 +34,7 @@ jobs: - run: sudo apt-get -o Acquire::Retries=3 install *fftw3* intel-mkl* - name: Install pycbc run: | - python -m pip install --upgrade pip setuptools + python -m pip install --upgrade pip "setuptools<82.0.0" pip install GitPython # This shouldn't really be needed! pip install -r requirements.txt pip install . diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index 955336eec22..81e3d6ddaa6 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -25,7 +25,7 @@ jobs: run: | sudo apt-get -o Acquire::Retries=3 update sudo apt-get -o Acquire::Retries=3 install *fftw3* mpi intel-mkl* graphviz - pip install tox pip setuptools --upgrade + pip install tox pip "setuptools<82.0.0" --upgrade - name: Cache LAL auxiliary data files id: cache-lal-aux-data uses: actions/cache@v4 @@ -41,6 +41,23 @@ jobs: --remote-name https://zenodo.org/records/14999310/files/SEOBNRv4ROM_v2.0.hdf5 \ --remote-name https://zenodo.org/records/14999310/files/SEOBNRv4ROM_v3.0.hdf5 popd + - name: Cache example GW data + id: cache-example-gw-data + uses: actions/cache@v4 + with: + key: example-gw-data + path: | + docs/_include/*_TDI_v2.gwf + docs/_include/*_GWOSC_4KHZ_R1-1126257415-4096.gwf + docs/_include/*_LOSC_CLN_4_V1-1187007040-2048.gwf + examples/inference/lisa_smbhb_ldc/*_psd.txt + examples/inference/lisa_smbhb_ldc/*_TDI_v2.gwf + examples/inference/lisa_smbhb_ldc/MBHB_params_v2_LISA_frame.pkl + examples/inference/margtime/*.gwf + examples/inference/multisignal/*.gwf + examples/inference/relative/*.gwf + examples/inference/relmarg/*.gwf + examples/inference/single/*.gwf - name: run pycbc test suite run: | export LAL_DATA_PATH=$HOME/lal_aux_data diff --git a/.github/workflows/inference-workflow.yml b/.github/workflows/inference-workflow.yml index 6c947a1bd2c..bc852f8c1cc 100644 --- a/.github/workflows/inference-workflow.yml +++ b/.github/workflows/inference-workflow.yml @@ -29,7 +29,7 @@ jobs: - run: sudo apt-get -o Acquire::Retries=3 install *fftw3* intel-mkl* - name: Install pycbc run: | - python -m pip install --upgrade pip setuptools + python -m pip install --upgrade pip "setuptools<82.0.0" pip install GitPython # This shouldn't really be needed! pip install -r requirements.txt pip install . diff --git a/.github/workflows/mac-test.yml b/.github/workflows/mac-test.yml index 8062a81fe6b..74db489bf7a 100644 --- a/.github/workflows/mac-test.yml +++ b/.github/workflows/mac-test.yml @@ -50,7 +50,7 @@ jobs: run: | conda install \ pip \ - setuptools \ + "setuptools<82.0.0" \ tox - name: Run basic pycbc test suite diff --git a/.github/workflows/search-workflow.yml b/.github/workflows/search-workflow.yml index 14006ce0f79..f4f9d8c7fc0 100644 --- a/.github/workflows/search-workflow.yml +++ b/.github/workflows/search-workflow.yml @@ -34,7 +34,7 @@ jobs: - run: sudo apt-get -o Acquire::Retries=3 install *fftw3* intel-mkl* - name: Install pycbc run: | - python -m pip install --upgrade pip setuptools + python -m pip install --upgrade pip "setuptools<82.0.0" pip install GitPython # This shouldn't really be needed! pip install -r requirements.txt pip install . diff --git a/.github/workflows/tmpltbank-workflow.yml b/.github/workflows/tmpltbank-workflow.yml index 1baaae03254..e82d0f5d98e 100644 --- a/.github/workflows/tmpltbank-workflow.yml +++ b/.github/workflows/tmpltbank-workflow.yml @@ -33,7 +33,7 @@ jobs: - run: sudo apt-get -o Acquire::Retries=3 install *fftw3* intel-mkl* - name: Install pycbc run: | - python -m pip install --upgrade pip setuptools + python -m pip install --upgrade pip "setuptools<82.0.0" pip install GitPython # This shouldn't really be needed! pip install -r requirements.txt pip install sbank diff --git a/.github/workflows/tut-test.yml b/.github/workflows/tut-test.yml index 4dfdb418d2c..d7b4bd1fbc2 100644 --- a/.github/workflows/tut-test.yml +++ b/.github/workflows/tut-test.yml @@ -24,7 +24,7 @@ jobs: run: | sudo apt-get -o Acquire::Retries=3 update sudo apt-get -o Acquire::Retries=3 install *fftw3* mpi intel-mkl* - pip install tox pip setuptools notebook --upgrade + pip install tox pip "setuptools<82.0.0" notebook --upgrade pip install . - name: retrieving pycbc tutorials run: | diff --git a/.github/workflows/workflow-tests.yml b/.github/workflows/workflow-tests.yml index 78a6484e5ae..a2afc33f1d4 100644 --- a/.github/workflows/workflow-tests.yml +++ b/.github/workflows/workflow-tests.yml @@ -38,7 +38,7 @@ jobs: - run: sudo apt-get -o Acquire::Retries=3 install *fftw3* intel-mkl* - name: Install pycbc run: | - python -m pip install --upgrade pip setuptools + python -m pip install --upgrade pip "setuptools<82.0.0" pip install GitPython # This shouldn't really be needed! pip install -r requirements.txt pip install . diff --git a/Dockerfile b/Dockerfile index bc2887fd1fa..30c1b162a13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ ADD docker/etc/cvmfs/config-osg.opensciencegrid.org.conf /etc/cvmfs/config-osg.o # Set up extra repositories RUN <= opts.newsnr_threshold if not (trial_cut & sbv_cut).any(): continue - time_veto_max_snr_uncut[slide_id][j] =\ + time_veto_max_snr_aftercuts[slide_id][j] =\ max(trig_data[keys[1]][slide_id][trial_cut & sbv_cut]) # This is the data that will be plotted full_time_veto_max_snr = ppu.sort_stat(time_veto_max_snr) -full_time_veto_max_snr_uncut = ppu.sort_stat(time_veto_max_snr_uncut) +full_time_veto_max_snr_aftercuts = ppu.sort_stat(time_veto_max_snr_aftercuts) _, _, full_time_veto_max_bestnr = \ ppu.max_median_stat(slide_dict, time_veto_max_bestnr, trig_data[keys[2]], total_trials) @@ -151,8 +151,8 @@ _, _, full_time_veto_max_bestnr = \ # returned a no-event (i.e., BestNR = 0) if full_time_veto_max_bestnr[0] != 0.: full_time_veto_max_snr = np.concatenate(([0.], full_time_veto_max_snr)) - full_time_veto_max_snr_uncut = \ - np.concatenate(([0.], full_time_veto_max_snr_uncut)) + full_time_veto_max_snr_aftercuts = \ + np.concatenate(([0.], full_time_veto_max_snr_aftercuts)) full_time_veto_max_bestnr = \ np.concatenate(([0.], full_time_veto_max_bestnr)) @@ -164,10 +164,10 @@ logging.info("SNR and bestNR maxima calculated.") # ========= x_label_dict = {"bestnr": "BestNR", "snr": "SNR", - "snruncut": "SNR after signal based vetoes"} + "snraftercuts": "SNR after signal based vetoes"} data_dict = {"bestnr": full_time_veto_max_bestnr, "snr": full_time_veto_max_snr, - "snruncut": full_time_veto_max_snr_uncut} + "snraftercuts": full_time_veto_max_snr_aftercuts} fig = plt.figure() ax = fig.gca() ax.grid(True) diff --git a/examples/inference/lisa_smbhb_ldc/get.sh b/examples/inference/lisa_smbhb_ldc/get.sh index 4d7962c408b..39d6c835d60 100644 --- a/examples/inference/lisa_smbhb_ldc/get.sh +++ b/examples/inference/lisa_smbhb_ldc/get.sh @@ -1,16 +1,24 @@ set -e +download_if_absent() { + local URL="$1" + local FILENAME=$(basename "$URL") + if [ ! -f "$FILENAME" ]; then + echo "Downloading $FILENAME" + curl -O -L --show-error --silent "$URL" + else + echo "File $FILENAME already exists, download skipped" + fi +} + for channel in A E T do strain_file=${channel}_TDI_v2.gwf - test -f ${strain_file} && continue - curl -LO --show-error --silent https://zenodo.org/record/7497853/files/${strain_file} + download_if_absent https://zenodo.org/record/7497853/files/${strain_file} psd_file=${channel}_psd.txt - test -f ${psd_file} && continue - curl -LO --show-error --silent https://zenodo.org/record/7497853/files/${psd_file} + download_if_absent https://zenodo.org/record/7497853/files/${psd_file} done params_file=MBHB_params_v2_LISA_frame.pkl -test -f ${params_file} && continue -curl -LO --show-error --silent https://zenodo.org/record/7497853/files/${params_file} +download_if_absent https://zenodo.org/record/7497853/files/${params_file} diff --git a/examples/inference/margtime/get.sh b/examples/inference/margtime/get.sh index 54bc8f0abf1..9cae2222af3 100644 --- a/examples/inference/margtime/get.sh +++ b/examples/inference/margtime/get.sh @@ -5,5 +5,5 @@ do file=${ifo}_GWOSC_4KHZ_R1-1126257415-4096.gwf test -f ${file} && continue curl -O -L --show-error --silent \ - https://www.gwosc.org/eventapi/html/GWTC-1-confident/GW150914/v3/${ifo}_GWOSC_4KHZ_R1-1126257415-4096.gwf + https://www.gwosc.org/eventapi/html/GWTC-1-confident/GW150914/v3/${file} done diff --git a/examples/inference/multisignal/get.sh b/examples/inference/multisignal/get.sh index 965fa399430..68a227340ee 100644 --- a/examples/inference/multisignal/get.sh +++ b/examples/inference/multisignal/get.sh @@ -4,5 +4,5 @@ for ifo in H-H1 L-L1 V-V1 do file=${ifo}_LOSC_CLN_4_V1-1187007040-2048.gwf test -f ${file} && continue - curl -O --show-error --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} + curl -O -L --show-error --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} done diff --git a/examples/inference/relative/get.sh b/examples/inference/relative/get.sh index 965fa399430..68a227340ee 100644 --- a/examples/inference/relative/get.sh +++ b/examples/inference/relative/get.sh @@ -4,5 +4,5 @@ for ifo in H-H1 L-L1 V-V1 do file=${ifo}_LOSC_CLN_4_V1-1187007040-2048.gwf test -f ${file} && continue - curl -O --show-error --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} + curl -O -L --show-error --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} done diff --git a/examples/inference/relmarg/get.sh b/examples/inference/relmarg/get.sh index 4d43e2a8287..4a24113e3cd 100644 --- a/examples/inference/relmarg/get.sh +++ b/examples/inference/relmarg/get.sh @@ -2,5 +2,5 @@ for ifo in H-H1 L-L1 V-V1 do file=${ifo}_LOSC_CLN_4_V1-1187007040-2048.gwf test -f ${file} && continue - curl -O --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} + curl -O -L --show-error --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} done diff --git a/examples/inference/single/get.sh b/examples/inference/single/get.sh index 965fa399430..68a227340ee 100644 --- a/examples/inference/single/get.sh +++ b/examples/inference/single/get.sh @@ -4,5 +4,5 @@ for ifo in H-H1 L-L1 V-V1 do file=${ifo}_LOSC_CLN_4_V1-1187007040-2048.gwf test -f ${file} && continue - curl -O --show-error --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} + curl -O -L --show-error --silent https://dcc.ligo.org/public/0146/P1700349/001/${file} done diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index 01e367a9a1f..1428af3c5a7 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -172,24 +172,10 @@ def pygrb_add_bestnr_cut_opt(parser): "Default 0: all events are considered.") -# ============================================================================= -# Wrapper to pick triggers with a given slide_id -# ============================================================================= -# Underscore starts name of functions not called outside this file -def _slide_filter(trig_file, data, slide_id=None): - """ - This function adds the capability to select triggers with specific - slide_ids during the postprocessing stage of PyGRB. - """ - if slide_id is None: - return data - mask = numpy.where(trig_file['network/slide_id'][:] == slide_id)[0] - return data[mask] - - # ============================================================================= # Wrapper to read segments files # ============================================================================= +# An underscore at the name start flags a function called only in this file def _read_seg_files(seg_files): """Read segments txt files""" @@ -333,18 +319,40 @@ def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None, # Do not assume that IFO and network datasets are sorted the same way: # find where each surviving network/event_id is placed in the IFO/event_id ifo_ids_above_thresh_locations = {} + # 1. Determine once the IDs to be found + target_ids = net_ids[above_thresh] for ifo in ifos: - ifo_ids_above_thresh_locations[ifo] = \ - numpy.array([numpy.where(ifo_ids[ifo] == net_id)[0][0] - for net_id in net_ids[above_thresh]]) - - # Apply the cut on all the data by removing points with reweighted SNR = 0 + input_ids = ifo_ids[ifo] + # 2. Do not assume ifo_ids are sorted and produce a sorting tracker + sorter = numpy.argsort(input_ids) + # 3. Find out where the target_ids are in the sorted ifo_ids[ifo] + insert_indices = numpy.searchsorted( + input_ids, + target_ids, + sorter=sorter) + # 4. Safety checks: + # a. searchsorted returns len(arr) when the element is not in the arr + # b. ensure that all target_ids were recovered in ifo_ids[ifo] + assert (insert_indices < len(input_ids)).all() + assert (input_ids[sorter[insert_indices]] == target_ids).all() + # 5. Extract valid original indices + ifo_ids_above_thresh_locations[ifo] = sorter[insert_indices] + + # Removing points with reweighted SNR below threshold and select + # a specific slide, if the user asked for this trigs_dict = {} with HFile(input_file, "r") as trigs: + # Prepare a default mask that will preserve all points after + # the ones below threshold have already been removed + mask = numpy.full(sum(above_thresh), True) + # When necessary and possible, update the mask so it selects a given + # slide id + if slide_id is not None and 'network/slide_id' in trigs.keys(): + mask = numpy.where(trigs['network/slide_id'][above_thresh] == + slide_id)[0] for (path, dset) in _dataset_iterator(trigs): - # The dataset contains search information or missed injections - # information, not properties of triggers or found injections: - # just copy it + # The dataset contains search or missed injections information, + # not properties of triggers or found injections: just copy it if 'search' in path or 'missed' in path or 'gating' in path: trigs_dict[path] = dset[:] # The dataset is trig/inj info at an IFO: @@ -361,9 +369,14 @@ def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None, trigs_dict[path] = dset[above_thresh] if 'network/slide_id' in trigs.keys(): - if trigs_dict[path].size == trigs['network/slide_id'][:].size: - trigs_dict[path] = _slide_filter(trigs, trigs_dict[path], - slide_id=slide_id) + # The slide selection is applied to datasets that contain + # properties of surviving triggers. These datasets are + # identified knowing that each trigger has a slide id, so + # they must have as many entries as the 'network/slide_id' + # dataset once triggers below threshold are removed from it. + if trigs_dict[path].size == \ + trigs['network/slide_id'][above_thresh].size: + trigs_dict[path] = trigs_dict[path][mask] return trigs_dict diff --git a/pycbc/results/table_utils.py b/pycbc/results/table_utils.py index bfc5b1c2389..e74ebde3a56 100644 --- a/pycbc/results/table_utils.py +++ b/pycbc/results/table_utils.py @@ -59,7 +59,7 @@ """) def html_table(columns, names, page_size=None, format_strings=None): - """ Return an html table of this data + """ Return an HTML table of this data. Parameters ---------- @@ -69,14 +69,27 @@ def html_table(columns, names, page_size=None, format_strings=None): page_size : {int, None}, optional The number of items to show on each page of the table format_strings : {lists of strings, None}, optional - The ICU format string for this column, None for no formatting. All - columns must have a format string if provided. + The ICU format string for this column, None for no formatting. + All columns must have a format string if provided. Returns ------- html_table : str A str containing the html code to display a table of this data """ + if len(columns) != len(names): + raise ValueError( + 'I need the same number of columns and names, ' + f'got {len(columns)} and {len(names)} instead' + ) + if format_strings is not None and len(format_strings) != len(columns): + raise ValueError( + 'I need the same number of columns and format strings, ' + f'got {len(columns)} and {len(names)} instead' + ) + if len({len(column) for column in columns}) != 1: + raise ValueError('All columns must have the same length') + if page_size is None: page = 'disable' else: @@ -205,4 +218,3 @@ def static_table(data, titles=None, columns_max=None, row_labels=None): n_rows=n_rows, row_labels=row_labels, ) - diff --git a/pycbc/workflow/minifollowups.py b/pycbc/workflow/minifollowups.py index 82363280601..d9c5506168c 100644 --- a/pycbc/workflow/minifollowups.py +++ b/pycbc/workflow/minifollowups.py @@ -793,7 +793,7 @@ def make_qscan_plot(workflow, ifo, trig_time, out_dir, injection_file=None, data_segments=None, time_window=100, tags=None): """ Generate a make_qscan node and add it to workflow. - This function generates a single node of the singles_timefreq executable + This function generates a single node of the plot_qscan executable and adds it to the current workflow. Parent/child relationships are set by the input/output files automatically. @@ -812,7 +812,7 @@ def make_qscan_plot(workflow, ifo, trig_time, out_dir, injection_file=None, plot. data_segments: igwn_segments.segmentlist (optional, default=None) The list of segments for which data exists and can be read in. If given - the start/end times given to singles_timefreq will be adjusted if + the start/end times given to plot_qscan will be adjusted if [trig_time - time_window, trig_time + time_window] does not completely lie within a valid data segment. A ValueError will be raised if the trig_time is not within a valid segment, or if it is not possible to @@ -820,7 +820,7 @@ def make_qscan_plot(workflow, ifo, trig_time, out_dir, injection_file=None, trigger. This **must** be coalesced. time_window: int (optional, default=None) The amount of data (not including padding) that will be read in by the - singles_timefreq job. The default value of 100s should be fine for most + plot_qscan job. The default value of 100s should be fine for most cases. tags: list (optional, default=None) List of tags to add to the created nodes, which determine file naming. diff --git a/pycbc/workflow/pegasus_sites.py b/pycbc/workflow/pegasus_sites.py index 1fec6aa05fe..d72e85e26eb 100644 --- a/pycbc/workflow/pegasus_sites.py +++ b/pycbc/workflow/pegasus_sites.py @@ -241,7 +241,8 @@ def add_osg_site(sitecat, cp): value=r"\"0,1,2,4,5,7,8,9,10,11,12,13,16,17,24,27,35,36,40\"") site.add_profiles(Namespace.CONDOR, key="Requirements", value="(HAS_SINGULARITY =?= TRUE) && " - "(IS_GLIDEIN =?= True)") + "(IS_GLIDEIN =?= True) && " + "(HAS_CVMFS_singularity_opensciencegrid_org =?= True)") cvmfs_loc = '"/cvmfs/singularity.opensciencegrid.org/pycbc/pycbc-el8:v' cvmfs_loc += sing_version + '"' site.add_profiles(Namespace.CONDOR, key="My.SingularityImage", diff --git a/pyproject.toml b/pyproject.toml index 1a513328dcd..ffce2f4d9c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=70.0.0", +requires = ["setuptools>=70.0.0,<82.0.0", "wheel", "cython>=0.29.21", "numpy>=2.0.0,!=2.2.2", diff --git a/requirements.txt b/requirements.txt index 95f66006229..665cc42a818 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ # requirements for most basic library use astropy>=2.0.3,!=4.2.1,!=4.0.5 Mako>=1.0.1 -scipy>=0.16.0 +scipy>=0.16.0,<1.17.0 matplotlib>=2.0.0 -numpy>=1.16.0,!=1.19.0,!=2.2.2 +numpy>=1.16.0,!=1.19.0,!=2.2.2,<2 pillow h5py>=3.0.0,!=3.7.0 jinja2 diff --git a/setup.py b/setup.py index ff9d10fad62..7e1794a04b5 100755 --- a/setup.py +++ b/setup.py @@ -32,8 +32,8 @@ setup_requires = ['numpy>=1.16.0'] install_requires = setup_requires + [ 'cython>=0.29', - 'numpy>=1.16.0,!=1.19.0,!=2.2.2', - 'scipy>=0.16.0', + 'numpy>=1.16.0,!=1.19.0,!=2.2.2,<2', + 'scipy>=0.16.0,<1.17.0', 'astropy>=2.0.3,!=4.2.1,!=4.0.5', 'matplotlib>=1.5.1', 'mpld3>=0.3', @@ -43,7 +43,7 @@ 'Mako>=1.0.1', 'beautifulsoup4>=4.6.0', 'tqdm', - 'setuptools', + 'setuptools<82.0.0', 'gwdatafind', 'pegasus-wms.api == 5.0.9', 'pegasus-wms.common == 5.0.9', @@ -96,7 +96,7 @@ def __getattr__(self, attr): vinfo = _version_helper.generate_git_version_info() except: vinfo = vdummy() - vinfo.version = '2.8.5' + vinfo.version = '2.8.6' vinfo.release = True version_script = f"""# coding: utf-8