From 489905f613b39f6a546217867be3f3d7841294a8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 16:21:35 +0100 Subject: [PATCH 1/9] Bump pypa/gh-action-pypi-publish in the github-actions group (#4699) Bumps the github-actions group with 1 update: [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish). Updates `pypa/gh-action-pypi-publish` from 1.9.0 to 1.10.0 - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.9.0...v1.10.0) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-minor dependency-group: github-actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/deploy.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 9ce7d4af839..caf07bdfae2 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -142,7 +142,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_source_and_wheels - uses: pypa/gh-action-pypi-publish@v1.9.0 + uses: pypa/gh-action-pypi-publish@v1.10.0 with: skip_existing: true repository_url: https://test.pypi.org/legacy/ @@ -171,7 +171,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_tests - uses: pypa/gh-action-pypi-publish@v1.9.0 + uses: pypa/gh-action-pypi-publish@v1.10.0 with: packages_dir: testsuite/dist skip_existing: true @@ -201,7 +201,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_source_and_wheels - uses: pypa/gh-action-pypi-publish@v1.9.0 + uses: pypa/gh-action-pypi-publish@v1.10.0 upload_pypi_mdanalysistests: if: | @@ -227,7 +227,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_tests - uses: pypa/gh-action-pypi-publish@v1.9.0 + uses: pypa/gh-action-pypi-publish@v1.10.0 with: packages_dir: testsuite/dist From 277b99f712d5bd1b75ae25fb0fff9752a58f8ce4 Mon Sep 17 00:00:00 2001 From: MattTDavies <128810112+MattTDavies@users.noreply.github.com> Date: Sat, 7 Sep 2024 02:57:38 +0100 Subject: [PATCH 2/9] Further fixes to GroupBase high dimensional indexing: Frankenatom fix (#4692) * Moved index dimension check to initialisation, rather than indexing to more robustly catch Frankenatoms * Added improper atomgroup initialisation test * pep8 fixes * Added CHANGELOG info --- package/CHANGELOG | 4 ++-- package/MDAnalysis/core/groups.py | 13 +++++++------ testsuite/MDAnalysisTests/core/test_atom.py | 5 +++++ testsuite/MDAnalysisTests/core/test_atomgroup.py | 6 ++++++ 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index ede4ca29d8d..f2264922088 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -17,12 +17,12 @@ The rules for this file: ??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli, ljwoods2, aditya292002, pstaerk, PicoCentauri, BFedder, tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher, - yuxuanzhuang, PythonFZ, laksh-krishna-sharma, orbeckst + yuxuanzhuang, PythonFZ, laksh-krishna-sharma, orbeckst, MattTDavies * 2.8.0 Fixes - * Catch higher dimensional indexing in GroupBase (Issue #4647) + * Catch higher dimensional indexing in GroupBase & ComponentBase (Issue #4647) * Do not raise an Error reading H5MD files with datasets like `observables//` (part of Issue #4598, PR #4615) * Fix failure in double-serialization of TextIOPicklable file reader. diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index e8304d3b042..91b2c779304 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -570,7 +570,10 @@ def __init__(self, *args): raise TypeError(errmsg) from None # indices for the objects I hold - self._ix = np.asarray(ix, dtype=np.intp) + ix = np.asarray(ix, dtype=np.intp) + if ix.ndim > 1: + raise IndexError('Group index must be 1d') + self._ix = ix self._u = u self._cache = dict() @@ -598,11 +601,6 @@ def __getitem__(self, item): # important for boolean slicing item = np.array(item) - if isinstance(item, np.ndarray) and item.ndim > 1: - # disallow high dimensional indexing. - # this doesnt stop the underlying issue - raise IndexError('Group index must be 1d') - # We specify _derived_class instead of self.__class__ to allow # subclasses, such as UpdatingAtomGroup, to control the class # resulting from slicing. @@ -4252,6 +4250,9 @@ class ComponentBase(_MutableBase): def __init__(self, ix, u): # index of component + if not isinstance(ix, numbers.Integral): + raise IndexError('Component can only be indexed by a single integer') + self._ix = ix self._u = u diff --git a/testsuite/MDAnalysisTests/core/test_atom.py b/testsuite/MDAnalysisTests/core/test_atom.py index 35d5d67477a..d63d0574f06 100644 --- a/testsuite/MDAnalysisTests/core/test_atom.py +++ b/testsuite/MDAnalysisTests/core/test_atom.py @@ -121,6 +121,11 @@ def test_atom_pickle(self, universe, ix): atm_in = pickle.loads(pickle.dumps(atm_out)) assert atm_in == atm_out + def test_improper_initialisation(self, universe): + with pytest.raises(IndexError): + indices = [0, 1] + mda.core.groups.Atom(indices, universe) + class TestAtomNoForceNoVel(object): @staticmethod diff --git a/testsuite/MDAnalysisTests/core/test_atomgroup.py b/testsuite/MDAnalysisTests/core/test_atomgroup.py index 1497456e2da..4456362c498 100644 --- a/testsuite/MDAnalysisTests/core/test_atomgroup.py +++ b/testsuite/MDAnalysisTests/core/test_atomgroup.py @@ -1237,6 +1237,12 @@ def test_bad_make(self): with pytest.raises(TypeError): mda.core.groups.AtomGroup(['these', 'are', 'not', 'atoms']) + def test_invalid_index_initialisation(self, universe): + indices = [[1, 2, 3], + [4, 5, 6]] + with pytest.raises(IndexError): + mda.core.groups.AtomGroup(indices, universe) + def test_n_atoms(self, ag): assert ag.n_atoms == 3341 From d11a1ef77628fd3bfe608cf7b9d8371e76ea2bec Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Sat, 7 Sep 2024 17:20:52 +0100 Subject: [PATCH 3/9] [CI] MDAnalysis self-dependency build fix (#4502) * Fix MDAnalysis cron CI * Fix deployment workflow python and utility versions * Make sure we install mdakit dependencies using no-deps * Fix nightly wheel tests. --- .github/actions/build-src/action.yaml | 15 ++++ .github/actions/setup-deps/action.yaml | 9 --- .github/workflows/deploy.yaml | 10 +-- .github/workflows/gh-ci-cron.yaml | 97 +++++++++++++++----------- .github/workflows/gh-ci.yaml | 3 + 5 files changed, 80 insertions(+), 54 deletions(-) diff --git a/.github/actions/build-src/action.yaml b/.github/actions/build-src/action.yaml index 8abab0f13e9..95f4eb95401 100644 --- a/.github/actions/build-src/action.yaml +++ b/.github/actions/build-src/action.yaml @@ -66,6 +66,15 @@ runs: micromamba info micromamba list + - name: mda_deps + shell: bash -l {0} + run: | + # Install mdakit deps that depend on MDA + python -m pip install --no-deps \ + waterdynamics \ + pathsimanalysis \ + mdahole2 + - name: build_mda_main shell: bash -l {0} run: | @@ -84,6 +93,12 @@ runs: fi python -m pip install ${BUILD_FLAGS} -v -e ./testsuite + - name: post_build_env_check + shell: bash -l {0} + run: | + pip list + micromamba list + - name: build_docs if: ${{ inputs.build-docs == 'true' }} shell: bash -l {0} diff --git a/.github/actions/setup-deps/action.yaml b/.github/actions/setup-deps/action.yaml index cceae40f99c..97112b09159 100644 --- a/.github/actions/setup-deps/action.yaml +++ b/.github/actions/setup-deps/action.yaml @@ -31,8 +31,6 @@ inputs: default: 'hypothesis' matplotlib: default: 'matplotlib-base' - mdahole2: - default: 'mdahole2-base' mda_xdrlib: default: 'mda-xdrlib' mmtf-python: @@ -41,8 +39,6 @@ inputs: default: 'numpy' packaging: default: 'packaging' - pathsimanalysis: - default: 'pathsimanalysis' pip: default: 'pip' pytest: @@ -53,8 +49,6 @@ inputs: default: 'threadpoolctl' tqdm: default: 'tqdm>=4.43.0' - waterdynamics: - default: 'waterdynamics' # conda-installed optional dependencies biopython: default: 'biopython>=1.80' @@ -120,18 +114,15 @@ runs: ${{ inputs.griddataformats }} ${{ inputs.hypothesis }} ${{ inputs.matplotlib }} - ${{ inputs.mdahole2 }} ${{ inputs.mda_xdrlib }} ${{ inputs.mmtf-python }} ${{ inputs.numpy }} ${{ inputs.packaging }} - ${{ inputs.pathsimanalysis }} ${{ inputs.pip }} ${{ inputs.pytest }} ${{ inputs.scipy }} ${{ inputs.threadpoolctl }} ${{ inputs.tqdm }} - ${{ inputs.waterdynamics }} CONDA_OPT_DEPS: | ${{ inputs.biopython }} ${{ inputs.chemfiles-python }} diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index caf07bdfae2..377575ef8c1 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -38,10 +38,10 @@ jobs: matrix: buildplat: - [ubuntu-22.04, manylinux_x86_64, x86_64] - - [macos-11, macosx_*, x86_64] + - [macos-12, macosx_*, x86_64] - [windows-2019, win_amd64, AMD64] - [macos-14, macosx_*, arm64] - python: ["cp39", "cp310", "cp311", "cp312"] + python: ["cp310", "cp311", "cp312"] defaults: run: working-directory: ./package @@ -51,7 +51,7 @@ jobs: fetch-depth: 0 - name: Build wheels - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.20.0 with: package-dir: package env: @@ -142,7 +142,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_source_and_wheels - uses: pypa/gh-action-pypi-publish@v1.10.0 + uses: pypa/gh-action-pypi-publish@v1.10.1 with: skip_existing: true repository_url: https://test.pypi.org/legacy/ @@ -171,7 +171,7 @@ jobs: mv dist/MDAnalysisTests-* testsuite/dist - name: upload_tests - uses: pypa/gh-action-pypi-publish@v1.10.0 + uses: pypa/gh-action-pypi-publish@v1.10.1 with: packages_dir: testsuite/dist skip_existing: true diff --git a/.github/workflows/gh-ci-cron.yaml b/.github/workflows/gh-ci-cron.yaml index 72c7e365385..7d2ecc6f6ae 100644 --- a/.github/workflows/gh-ci-cron.yaml +++ b/.github/workflows/gh-ci-cron.yaml @@ -4,6 +4,11 @@ on: # 3 am Tuesdays and Fridays - cron: "0 3 * * 2,5" workflow_dispatch: + # Uncomment when you need to test on a PR + pull_request: + branches: + - develop + concurrency: # Probably overly cautious group naming. @@ -21,6 +26,7 @@ env: MPLBACKEND: agg jobs: + # a pip only, minimal deps install w/ scipy & numpy nightly upstream wheels numpy_and_scipy_dev: if: "github.repository == 'MDAnalysis/mdanalysis'" runs-on: ubuntu-latest @@ -34,45 +40,51 @@ jobs: with: os-type: "ubuntu" - - name: setup_micromamba - uses: mamba-org/setup-micromamba@v1 - with: - environment-name: mda - create-args: >- - python=3.11 - pip - # using jaime's shim to avoid pulling down the cudatoolkit - condarc: | - channels: - - jaimergp/label/unsupported-cudatoolkit-shim - - conda-forge - - bioconda - - - name: install_deps - uses: ./.github/actions/setup-deps + - uses: actions/setup-python@v4 with: - micromamba: true - full-deps: true + python-version: ${{ matrix.python-version }} - # overwrite installs by picking up nightly wheels + # minimally install nightly wheels & core deps - name: nightly_wheels run: | - pip install --pre -U -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scipy numpy networkx matplotlib pandas + # Nightlies: add in networkx and matplotlib because we can + python -m pip install --pre -U --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ + scipy \ + numpy \ + networkx \ + matplotlib \ + # Base deps + python -m pip install \ + "cython>=0.28" \ + packaging \ + "setuptools>69.4" \ + wheel \ + "griddataformats>=0.4.0" \ + "mmtf-python>=1.0" \ + "joblib>=0.12" \ + "tqdm>=4.43.0" \ + threadpoolctl \ + fasteners \ + mda-xdrlib \ + pytest \ + pytest-xdist \ + pytest-timeout + # deps that depend on MDA + python -m pip install --no-deps \ + waterdynamics \ + pathsimanalysis \ + mdahole2 + + - name: pre_install_list_deps + run: python -m pip list - - name: list_deps + - name: build_srcs run: | - micromamba list - pip list + python -m pip install --no-build-isolation -v -e ./package + python -m pip install --no-build-isolation -v -e ./testsuite - # Intentionally going with setup.py builds so we can build with latest - - name: build_srcs - uses: ./.github/actions/build-src - with: - build-tests: true - build-docs: false - # We don't use build isolation because we want to ensure that we - # test building with brand new versions of NumPy here. - isolation: false + - name: post_install_list_deps + run: python -m pip list - name: run_tests run: | @@ -136,7 +148,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macos-11] + os: [ubuntu-20.04, macos-12] steps: - uses: actions/checkout@v4 @@ -151,7 +163,7 @@ jobs: with: environment-name: mda create-args: >- - python=3.9 + python=3.10 pip condarc: | channels: @@ -210,6 +222,9 @@ jobs: run: | pip install pytest-xdist pytest-timeout + - name: check env + run: pip list + - name: run_tests run: | pytest --timeout=200 -n auto testsuite/MDAnalysisTests --disable-pytest-warnings --durations=50 @@ -218,12 +233,14 @@ jobs: conda-latest-release: # A set of runner to check that the latest conda release works as expected if: "github.repository == 'MDAnalysis/mdanalysis'" - runs-on: ${{ matrix.os }}-latest + runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: - os: [ubuntu, macos] + # Stick to macos-13 because some of our + # optional depss don't support arm64 (i.e. macos-14) + os: [ubuntu-latest, macos-13] python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 @@ -247,16 +264,16 @@ jobs: - conda-forge - bioconda + - name: install_mdanalysis + run: | + micromamba install mdanalysis mdanalysistests + - name: install_deps uses: ./.github/actions/setup-deps with: micromamba: true full-deps: true - - name: install_mdanalysis - run: | - micromamba install mdanalysis mdanalysistests - - name: run_tests run: | pytest --timeout=200 -n auto --pyargs MDAnalysisTests diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 05bda86b3cd..471e3bd20de 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -276,6 +276,9 @@ jobs: python -m pip install mdanalysis-*.tar.gz python -m pip install mdanalysistests-*.tar.gz + - name: check install + run: pip list + - name: run tests working-directory: ./dist run: python -m pytest --timeout=200 -n auto --pyargs MDAnalysisTests From 7618e05176b5f2c3008f375f840396ea8592488a Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Sun, 8 Sep 2024 12:56:14 +0100 Subject: [PATCH 4/9] Update gh-ci-cron.yaml (#4705) --- .github/workflows/gh-ci-cron.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gh-ci-cron.yaml b/.github/workflows/gh-ci-cron.yaml index 7d2ecc6f6ae..4585fc4de71 100644 --- a/.github/workflows/gh-ci-cron.yaml +++ b/.github/workflows/gh-ci-cron.yaml @@ -5,9 +5,9 @@ on: - cron: "0 3 * * 2,5" workflow_dispatch: # Uncomment when you need to test on a PR - pull_request: - branches: - - develop + # pull_request: + # branches: + # - develop concurrency: From b3208b39aab61be53f8b610f1fef628f83262205 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 9 Sep 2024 20:13:42 +0100 Subject: [PATCH 5/9] Implementation of Parallelization to analysis.gnm (#4700) * Fixes #4672 * Changes made in this Pull Request: - Parallelization of the class GNMAnalysis in analysis.gnm.py - Addition of parallelization tests (including fixtures in analysis/conftest.py) - update of CHANGELOG --------- Co-authored-by: Oliver Beckstein --- package/CHANGELOG | 4 +++- package/MDAnalysis/analysis/gnm.py | 22 ++++++++++++++++++- .../MDAnalysisTests/analysis/conftest.py | 6 +++++ .../MDAnalysisTests/analysis/test_gnm.py | 16 +++++++------- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index f2264922088..aecde6e6468 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -17,7 +17,8 @@ The rules for this file: ??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli, ljwoods2, aditya292002, pstaerk, PicoCentauri, BFedder, tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher, - yuxuanzhuang, PythonFZ, laksh-krishna-sharma, orbeckst, MattTDavies + yuxuanzhuang, PythonFZ, laksh-krishna-sharma, orbeckst, MattTDavies, + talagayev * 2.8.0 @@ -55,6 +56,7 @@ Fixes Enhancements * Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class (Issue #4158, PR #4304) + * Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672) * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) diff --git a/package/MDAnalysis/analysis/gnm.py b/package/MDAnalysis/analysis/gnm.py index 86a62fa7b9f..510fb887d01 100644 --- a/package/MDAnalysis/analysis/gnm.py +++ b/package/MDAnalysis/analysis/gnm.py @@ -92,7 +92,7 @@ import numpy as np -from .base import AnalysisBase +from .base import AnalysisBase, ResultsGroup from MDAnalysis.analysis.base import Results @@ -245,8 +245,19 @@ class GNMAnalysis(AnalysisBase): Use :class:`~MDAnalysis.analysis.AnalysisBase` as parent class and store results as attributes ``times``, ``eigenvalues`` and ``eigenvectors`` of the ``results`` attribute. + + .. versionchanged:: 2.8.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ("serial", "multiprocessing", "dask") + def __init__(self, universe, select='protein and name CA', @@ -348,6 +359,15 @@ def _conclude(self): self.results.eigenvalues = np.asarray(self.results.eigenvalues) self.results.eigenvectors = np.asarray(self.results.eigenvectors) + def _get_aggregator(self): + return ResultsGroup( + lookup={ + "eigenvectors": ResultsGroup.ndarray_hstack, + "eigenvalues": ResultsGroup.ndarray_hstack, + "times": ResultsGroup.ndarray_hstack, + } + ) + class closeContactGNMAnalysis(GNMAnalysis): r"""GNMAnalysis only using close contacts. diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 55bae7e6bd8..75d62284b7b 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -8,6 +8,7 @@ ) from MDAnalysis.analysis.rms import RMSD, RMSF from MDAnalysis.lib.util import is_installed +from MDAnalysis.analysis.gnm import GNMAnalysis def params_for_cls(cls, exclude: list[str] = None): @@ -87,3 +88,8 @@ def client_RMSD(request): @pytest.fixture(scope='module', params=params_for_cls(RMSF)) def client_RMSF(request): return request.param + + +@pytest.fixture(scope='module', params=params_for_cls(GNMAnalysis)) +def client_GNMAnalysis(request): + return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_gnm.py b/testsuite/MDAnalysisTests/analysis/test_gnm.py index 6521c08eb86..d8a547a5428 100644 --- a/testsuite/MDAnalysisTests/analysis/test_gnm.py +++ b/testsuite/MDAnalysisTests/analysis/test_gnm.py @@ -38,10 +38,10 @@ def universe(): return mda.Universe(GRO, XTC) -def test_gnm(universe, tmpdir): +def test_gnm(universe, tmpdir, client_GNMAnalysis): output = os.path.join(str(tmpdir), 'output.txt') gnm = mda.analysis.gnm.GNMAnalysis(universe, ReportVector=output) - gnm.run() + gnm.run(**client_GNMAnalysis) result = gnm.results assert len(result.times) == 10 assert_almost_equal(gnm.results.times, np.arange(0, 1000, 100), decimal=4) @@ -51,9 +51,9 @@ def test_gnm(universe, tmpdir): 4.2058769e-15, 3.9839431e-15]) -def test_gnm_run_step(universe): +def test_gnm_run_step(universe, client_GNMAnalysis): gnm = mda.analysis.gnm.GNMAnalysis(universe) - gnm.run(step=3) + gnm.run(step=3, **client_GNMAnalysis) result = gnm.results assert len(result.times) == 4 assert_almost_equal(gnm.results.times, np.arange(0, 1200, 300), decimal=4) @@ -88,9 +88,9 @@ def test_gnm_SVD_fail(universe): mda.analysis.gnm.GNMAnalysis(universe).run(stop=1) -def test_closeContactGNMAnalysis(universe): +def test_closeContactGNMAnalysis(universe, client_GNMAnalysis): gnm = mda.analysis.gnm.closeContactGNMAnalysis(universe, weights="size") - gnm.run(stop=2) + gnm.run(stop=2, **client_GNMAnalysis) result = gnm.results assert len(result.times) == 2 assert_almost_equal(gnm.results.times, (0, 100), decimal=4) @@ -114,9 +114,9 @@ def test_closeContactGNMAnalysis(universe): 0.0, 0.0, -2.263157894736841, -0.24333213169614382]) -def test_closeContactGNMAnalysis_weights_None(universe): +def test_closeContactGNMAnalysis_weights_None(universe, client_GNMAnalysis): gnm = mda.analysis.gnm.closeContactGNMAnalysis(universe, weights=None) - gnm.run(stop=2) + gnm.run(stop=2, **client_GNMAnalysis) result = gnm.results assert len(result.times) == 2 assert_almost_equal(gnm.results.times, (0, 100), decimal=4) From 319b6676a533d6609661b0ce22e88df212fc9060 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Mon, 9 Sep 2024 21:04:49 -0700 Subject: [PATCH 6/9] Remove GSD from optional test dependencies (#4707) * Fixed high dimensional GroupBase indexing. * fixed pep8 issues * Removed sanitisation * Fix #4687 -- rdkit values in azure CI (#4688) * Investigate rdkit issue * Update azure-pipelines.yml * fix numpy 2.0 import block * fix imports * mark analysis.pca.PCA as not parallelizable (#4684) - fix #4680 - PCA explicitly marked as not parallelizable (at least not with simple split-apply-combine) - add tests - update CHANGELOG * disable gsd * disable gsd in azure * reduce timeout and set logical * fix azure * restore timeout to 200 --------- Co-authored-by: Matthew Davies <128810112+MattTDavies@users.noreply.github.com> Co-authored-by: Irfan Alibay Co-authored-by: Oliver Beckstein --- .github/workflows/gh-ci.yaml | 7 +++++-- azure-pipelines.yml | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 471e3bd20de..8a45aec650f 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -85,6 +85,8 @@ jobs: with: micromamba: true full-deps: ${{ matrix.full-deps }} + # disable GSD because it occasionally introduce hanging in testing #4209 + gsd: '' # in most cases will just default to empty, i.e. pick up max version from other deps numpy: ${{ matrix.numpy }} extra-pip-deps: ${{ matrix.extra-pip-deps }} @@ -113,7 +115,7 @@ jobs: PYTEST_FLAGS="${PYTEST_FLAGS} --cov-config=.coveragerc --cov=MDAnalysis --cov-report=xml" fi echo $PYTEST_FLAGS - pytest -n auto --timeout=200 testsuite/MDAnalysisTests $PYTEST_FLAGS + pytest -n logical --timeout=200 testsuite/MDAnalysisTests $PYTEST_FLAGS - name: run_asv if: contains(matrix.name, 'asv_check') @@ -161,6 +163,7 @@ jobs: with: micromamba: true full-deps: true + gsd: '' extra-pip-deps: "docutils sphinx-sitemap sphinxcontrib-bibtex pybtex pybtex-docutils" extra-conda-deps: "mdanalysis-sphinx-theme>=1.3.0" @@ -281,4 +284,4 @@ jobs: - name: run tests working-directory: ./dist - run: python -m pytest --timeout=200 -n auto --pyargs MDAnalysisTests + run: python -m pytest --timeout=200 -n logical --pyargs MDAnalysisTests diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cace2be35ba..250f3ba63b5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -105,7 +105,6 @@ jobs: biopython "chemfiles>=0.10,<0.10.4" duecredit - "gsd>3.0.0" joblib GridDataFormats mmtf-python @@ -114,6 +113,8 @@ jobs: pytng>=0.2.3 rdkit>=2020.03.1 tidynamics>=1.0.0 + # remove from azure to avoid test hanging #4707 + # "gsd>3.0.0" displayName: 'Install additional dependencies for 64-bit tests' condition: and(succeeded(), eq(variables['PYTHON_ARCH'], 'x64')) - script: >- @@ -128,7 +129,7 @@ jobs: displayName: 'Check installed packages' - powershell: | cd testsuite - pytest MDAnalysisTests --disable-pytest-warnings -n auto --timeout=200 -rsx --cov=MDAnalysis + pytest MDAnalysisTests --disable-pytest-warnings -n logical --timeout=200 -rsx --cov=MDAnalysis displayName: 'Run MDAnalysis Test Suite' - script: | curl -s https://codecov.io/bash | bash From 4fafd51de84d5b89be0559a412acefde0040847c Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Sep 2024 02:19:44 +0200 Subject: [PATCH 7/9] Implementation of Parallelization to analysis.bat (#4693) * Fixes #4663 * Changes made in this Pull Request: - parallelization of the class BAT in analysis.bat.py - addition of parallelization tests (including fixtures in analysis/conftest.py) - update docs for analysis class - update of CHANGELOG --- package/CHANGELOG | 1 + package/MDAnalysis/analysis/bat.py | 29 ++++++++++++++++--- .../MDAnalysisTests/analysis/conftest.py | 6 ++++ .../MDAnalysisTests/analysis/test_bat.py | 12 ++++---- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index aecde6e6468..d799b52e4e0 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -58,6 +58,7 @@ Enhancements (Issue #4158, PR #4304) * Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672) * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) + * enables parallelization for analysis.bat.BAT (Issue #4663) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` diff --git a/package/MDAnalysis/analysis/bat.py b/package/MDAnalysis/analysis/bat.py index 0c3e15a32e9..5186cb6c882 100644 --- a/package/MDAnalysis/analysis/bat.py +++ b/package/MDAnalysis/analysis/bat.py @@ -164,7 +164,7 @@ class to calculate dihedral angles for a given set of atoms or residues import copy import MDAnalysis as mda -from .base import AnalysisBase +from .base import AnalysisBase, ResultsGroup from MDAnalysis.lib.distances import calc_bonds, calc_angles, calc_dihedrals from MDAnalysis.lib.mdamath import make_whole @@ -253,10 +253,28 @@ class BAT(AnalysisBase): Bond-Angle-Torsions (BAT) internal coordinates will be computed for the group of atoms and all frame in the trajectory belonging to `ag`. + .. versionchanged:: 2.8.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. + """ - @due.dcite(Doi("10.1002/jcc.26036"), - description="Bond-Angle-Torsions Coordinate Transformation", - path="MDAnalysis.analysis.bat.BAT") + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ( + "serial", + "multiprocessing", + "dask", + ) + + @due.dcite( + Doi("10.1002/jcc.26036"), + description="Bond-Angle-Torsions Coordinate Transformation", + path="MDAnalysis.analysis.bat.BAT", + ) + def __init__(self, ag, initial_atom=None, filename=None, **kwargs): r"""Parameters ---------- @@ -558,3 +576,6 @@ def Cartesian(self, bat_frame): def atoms(self): """The atomgroup for which BAT are computed (read-only property)""" return self._ag + + def _get_aggregator(self): + return ResultsGroup(lookup={'bat': ResultsGroup.ndarray_vstack}) diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 75d62284b7b..f53cdcfaac1 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -7,6 +7,7 @@ OldAPIAnalysis, ) from MDAnalysis.analysis.rms import RMSD, RMSF +from MDAnalysis.analysis.bat import BAT from MDAnalysis.lib.util import is_installed from MDAnalysis.analysis.gnm import GNMAnalysis @@ -93,3 +94,8 @@ def client_RMSF(request): @pytest.fixture(scope='module', params=params_for_cls(GNMAnalysis)) def client_GNMAnalysis(request): return request.param + + +@pytest.fixture(scope='module', params=params_for_cls(BAT)) +def client_BAT(request): + return request.param \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/analysis/test_bat.py b/testsuite/MDAnalysisTests/analysis/test_bat.py index c80353baff6..5fb2603df62 100644 --- a/testsuite/MDAnalysisTests/analysis/test_bat.py +++ b/testsuite/MDAnalysisTests/analysis/test_bat.py @@ -41,16 +41,16 @@ def selected_residues(self): return ag @pytest.fixture() - def bat(self, selected_residues): + def bat(self, selected_residues, client_BAT): R = BAT(selected_residues) - R.run() + R.run(**client_BAT) return R.results.bat @pytest.fixture - def bat_npz(self, tmpdir, selected_residues): + def bat_npz(self, tmpdir, selected_residues, client_BAT): filename = str(tmpdir / 'test_bat_IO.npy') R = BAT(selected_residues) - R.run() + R.run(**client_BAT) R.save(filename) return filename @@ -73,8 +73,8 @@ def test_bat_coordinates(self, bat): atol=1.5e-5, err_msg="error: BAT coordinates should match test values") - def test_bat_coordinates_single_frame(self, selected_residues): - bat = BAT(selected_residues).run(start=1, stop=2).results.bat + def test_bat_coordinates_single_frame(self, selected_residues, client_BAT): + bat = BAT(selected_residues).run(start=1, stop=2, **client_BAT).results.bat test_bat = [np.load(BATArray)[1]] assert_allclose( bat, From a2e3c3916be7f2a9103b86df198bdeade4d26af3 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Thu, 19 Sep 2024 22:16:39 +0200 Subject: [PATCH 8/9] Simple formatting commit --- package/MDAnalysis/analysis/base.py | 130 ++++++++++++++++------------ 1 file changed, 76 insertions(+), 54 deletions(-) diff --git a/package/MDAnalysis/analysis/base.py b/package/MDAnalysis/analysis/base.py index 47a7eccd137..0ae0c977cec 100644 --- a/package/MDAnalysis/analysis/base.py +++ b/package/MDAnalysis/analysis/base.py @@ -122,7 +122,7 @@ class MyAnalysis(AnalysisBase): def get_supported_backends(cls): return ('serial', 'multiprocessing', 'dask',) - + def _get_aggregator(self): return ResultsGroup(lookup={'timeseries': ResultsGroup.ndarray_vstack}) @@ -147,6 +147,7 @@ def _get_aggregator(self): tools if only the single-frame analysis function needs to be written. """ + import inspect import itertools import logging @@ -288,10 +289,10 @@ def get_supported_backends(cls): """ return ("serial",) - # class authors: override _analysis_algorithm_is_parallelizable - # in derived classes and only set to True if you have confirmed - # that your algorithm works reliably when parallelized with - # the split-apply-combine approach (see docs) + # class authors: override _analysis_algorithm_is_parallelizable + # in derived classes and only set to True if you have confirmed + # that your algorithm works reliably when parallelized with + # the split-apply-combine approach (see docs) _analysis_algorithm_is_parallelizable = False @property @@ -301,13 +302,13 @@ def parallelizable(self): :meth:`_single_frame` to multiple workers and then combine them with a proper :meth:`_conclude` call. If set to ``False``, no backends except for ``serial`` are supported. - + .. note:: If you want to check parallelizability of the whole class, without explicitly creating an instance of the class, see :attr:`_analysis_algorithm_is_parallelizable`. Note that you setting it to other value will break things if the algorithm behind the analysis is not trivially parallelizable. - + Returns ------- @@ -417,11 +418,11 @@ def _setup_frames(self, trajectory, start=None, stop=None, step=None, frames=Non .. versionchanged:: 1.0.0 Added .frames and .times arrays as attributes - + .. versionchanged:: 2.2.0 Added ability to iterate through trajectory by passing a list of frame indices in the `frames` keyword argument - + .. versionchanged:: 2.8.0 Split function into two: :meth:`_define_run_frames` and :meth:`_prepare_sliced_trajectory`: first one defines the limits @@ -450,8 +451,8 @@ def _single_frame(self): def _prepare(self): """ - Set things up before the analysis loop begins. - + Set things up before the analysis loop begins. + Notes ----- ``self.results`` is initialized already in :meth:`self.__init__` with an @@ -525,9 +526,12 @@ def _compute(self, indexed_frames: np.ndarray, return self def _setup_computation_groups( - self, n_parts: int, - start: int = None, stop: int = None, step: int = None, - frames: Union[slice, np.ndarray] = None + self, + n_parts: int, + start: int = None, + stop: int = None, + step: int = None, + frames: Union[slice, np.ndarray] = None, ) -> list[np.ndarray]: """ Splits the trajectory frames, defined by ``start/stop/step`` or @@ -581,11 +585,11 @@ def _setup_computation_groups( return np.array_split(enumerated_frames, n_parts) def _configure_backend( - self, - backend: Union[str, BackendBase], - n_workers: int, - unsupported_backend: bool = False - ) -> BackendBase: + self, + backend: Union[str, BackendBase], + n_workers: int, + unsupported_backend: bool = False, + ) -> BackendBase: """Matches a passed backend string value with class attributes :attr:`parallelizable` and :meth:`get_supported_backends()` to check if downstream calculations can be performed. @@ -647,20 +651,28 @@ def _configure_backend( raise ValueError(f"Can not parallelize class {self.__class__}") # make sure user enabled 'unsupported_backend=True' for custom classes - if (not unsupported_backend and self.parallelizable - and backend_class not in supported_backend_classes): - raise ValueError(( - f"Must specify 'unsupported_backend=True'" - f"if you want to use a custom {backend_class=} for {self.__class__}" - )) + if ( + not unsupported_backend + and self.parallelizable + and backend_class not in supported_backend_classes + ): + raise ValueError( + ( + f"Must specify 'unsupported_backend=True'" + f"if you want to use a custom {backend_class=} for {self.__class__}" + ) + ) # check for the presence of parallelizable transformations if backend_class is not BackendSerial and any( - not t.parallelizable - for t in self._trajectory.transformations): - raise ValueError(( - "Trajectory should not have " - "associated unparallelizable transformations")) + not t.parallelizable for t in self._trajectory.transformations + ): + raise ValueError( + ( + "Trajectory should not have " + "associated unparallelizable transformations" + ) + ) # conclude mapping from string to backend class if it's a builtin backend if isinstance(backend, str): @@ -670,21 +682,25 @@ def _configure_backend( if ( isinstance(backend, BackendBase) and n_workers is not None - and hasattr(backend, 'n_workers') + and hasattr(backend, "n_workers") and backend.n_workers != n_workers ): - raise ValueError(( - f"n_workers specified twice: in {backend.n_workers=}" - f"and in run({n_workers=}). Remove it from run()" - )) + raise ValueError( + ( + f"n_workers specified twice: in {backend.n_workers=}" + f"and in run({n_workers=}). Remove it from run()" + ) + ) # or pass along an instance of the class itself # after ensuring it has apply method if not isinstance(backend, BackendBase) or not hasattr(backend, "apply"): - raise ValueError(( - f"{backend=} is invalid: should have 'apply' method " - "and be instance of MDAnalysis.analysis.backends.BackendBase" - )) + raise ValueError( + ( + f"{backend=} is invalid: should have 'apply' method " + "and be instance of MDAnalysis.analysis.backends.BackendBase" + ) + ) return backend def run( @@ -767,9 +783,9 @@ def run( backend = "serial" if backend is None else backend progressbar_kwargs = {} if progressbar_kwargs is None else progressbar_kwargs - if ((progressbar_kwargs or verbose) and - not (backend == "serial" or - isinstance(backend, BackendSerial))): + if (progressbar_kwargs or verbose) and not ( + backend == "serial" or isinstance(backend, BackendSerial) + ): raise ValueError("Can not display progressbar with non-serial backend") # if number of workers not specified, try getting the number from @@ -789,19 +805,22 @@ def run( executor = self._configure_backend( backend=backend, n_workers=n_workers, - unsupported_backend=unsupported_backend) + unsupported_backend=unsupported_backend, + ) if ( hasattr(executor, "n_workers") and n_parts < executor.n_workers ): # using executor's value here for non-default executors - warnings.warn(( - f"Analysis not making use of all workers: " - f"{executor.n_workers=} is greater than {n_parts=}")) + warnings.warn( + ( + f"Analysis not making use of all workers: " + f"{executor.n_workers=} is greater than {n_parts=}" + ) + ) # start preparing the run worker_func = partial( - self._compute, - progressbar_kwargs=progressbar_kwargs, - verbose=verbose) + self._compute, progressbar_kwargs=progressbar_kwargs, verbose=verbose + ) self._setup_frames( trajectory=self._trajectory, start=start, stop=stop, step=step, frames=frames) @@ -813,7 +832,8 @@ def run( # we need `AnalysisBase` classes # since they hold `frames`, `times` and `results` attributes remote_objects: list["AnalysisBase"] = executor.apply( - worker_func, computation_groups) + worker_func, computation_groups + ) self.frames = np.hstack([obj.frames for obj in remote_objects]) self.times = np.hstack([obj.times for obj in remote_objects]) @@ -902,8 +922,9 @@ def get_supported_backends(cls): return ("serial", "multiprocessing", "dask") def __init__(self, function, trajectory=None, *args, **kwargs): - if (trajectory is not None) and (not isinstance( - trajectory, coordinates.base.ProtoReader)): + if (trajectory is not None) and ( + not isinstance(trajectory, coordinates.base.ProtoReader) + ): args = (trajectory,) + args trajectory = None @@ -1036,8 +1057,9 @@ def _filter_baseanalysis_kwargs(function, kwargs): n_base_defaults = len(base_argspec.defaults) base_kwargs = { name: val - for name, val in zip(base_argspec.args[-n_base_defaults:], - base_argspec.defaults) + for name, val in zip( + base_argspec.args[-n_base_defaults:], base_argspec.defaults + ) } try: From 69cf6d3448fa3083f2ec7db05f93dce710182fb9 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Thu, 19 Sep 2024 22:16:52 +0200 Subject: [PATCH 9/9] A commit for review --- package/MDAnalysis/analysis/base.py | 30 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/package/MDAnalysis/analysis/base.py b/package/MDAnalysis/analysis/base.py index 0ae0c977cec..e8b7fc074a8 100644 --- a/package/MDAnalysis/analysis/base.py +++ b/package/MDAnalysis/analysis/base.py @@ -326,9 +326,9 @@ def __init__(self, trajectory, verbose=False, **kwargs): self._verbose = verbose self.results = Results() - def _define_run_frames(self, trajectory, - start=None, stop=None, step=None, frames=None - ) -> Union[slice, np.ndarray]: + def _define_run_frames( + self, trajectory, start=None, stop=None, step=None, frames=None + ) -> Union[slice, np.ndarray]: """Defines limits for the whole run, as passed by self.run() arguments Parameters @@ -478,9 +478,9 @@ def _conclude(self): """ pass # pylint: disable=unnecessary-pass - def _compute(self, indexed_frames: np.ndarray, - verbose: bool = None, - *, progressbar_kwargs={}) -> "AnalysisBase": + def _compute( + self, indexed_frames: np.ndarray, verbose: bool = None, *, progressbar_kwargs={} + ) -> "AnalysisBase": """Perform the calculation on a balanced slice of frames that have been setup prior to that using _setup_computation_groups() @@ -513,10 +513,9 @@ def _compute(self, indexed_frames: np.ndarray, if len(frames) == 0: # if `frames` were empty in `run` or `stop=0` return self - for idx, ts in enumerate(ProgressBar( - self._sliced_trajectory, - verbose=verbose, - **progressbar_kwargs)): + for idx, ts in enumerate( + ProgressBar(self._sliced_trajectory, verbose=verbose, **progressbar_kwargs) + ): self._frame_index = idx # accessed later by subclasses self._ts = ts self.frames[idx] = ts.frame @@ -637,13 +636,12 @@ def _configure_backend( builtin_backends = { "serial": BackendSerial, "multiprocessing": BackendMultiprocessing, - "dask": BackendDask + "dask": BackendDask, } backend_class = builtin_backends.get(backend, backend) supported_backend_classes = [ - builtin_backends.get(b) - for b in self.get_supported_backends() + builtin_backends.get(b) for b in self.get_supported_backends() ] # check for serial-only classes @@ -823,7 +821,11 @@ def run( ) self._setup_frames( trajectory=self._trajectory, - start=start, stop=stop, step=step, frames=frames) + start=start, + stop=stop, + step=step, + frames=frames, + ) computation_groups = self._setup_computation_groups( start=start, stop=stop, step=step, frames=frames, n_parts=n_parts )