diff --git a/.github/actions/create-submit-user/action.yml b/.github/actions/create-submit-user/action.yml new file mode 100644 index 00000000..23470daf --- /dev/null +++ b/.github/actions/create-submit-user/action.yml @@ -0,0 +1,15 @@ +name: 'Create Submit User' +description: 'Create submituser account with sudo privileges' +runs: + using: "composite" + steps: + - name: Create submit user + shell: bash + run: | + id -u submituser >/dev/null 2>&1 || useradd -m -s /bin/bash submituser + { + echo 'Defaults:submituser !requiretty' + echo 'submituser ALL=(ALL) NOPASSWD:ALL' + } > /etc/sudoers.d/submituser + chmod 0440 /etc/sudoers.d/submituser + chown -R submituser:submituser "$GITHUB_WORKSPACE" diff --git a/.github/actions/run-asimov-command/action.yml b/.github/actions/run-asimov-command/action.yml new file mode 100644 index 00000000..5bac17fe --- /dev/null +++ b/.github/actions/run-asimov-command/action.yml @@ -0,0 +1,20 @@ +name: 'Run Asimov Command' +description: 'Execute asimov command as submituser' +inputs: + script: + description: 'Full script to run inside the asimov env (include the asimov CLI if needed)' + required: true + + +runs: + using: "composite" + steps: + - name: Run asimov command + shell: bash -el {0} + run: | + su - submituser <&2; exit 1; } + cat *.err || true + cat *.out || true + cat *.log || true + + # Normalize patterns into one-per-line (split on spaces and newlines) + PATTERN_LINES=$(printf '%s\n' "$PATTERNS_INPUT" | tr ' \t' '\n') + + # Pre-count patterns (ignore empty/comment lines) + PATTERN_COUNT=0 + while IFS= read -r pattern; do + [[ -z "${pattern//[[:space:]]/}" ]] && continue + [[ "$pattern" =~ ^[[:space:]]*# ]] && continue + PATTERN_COUNT=$((PATTERN_COUNT + 1)) + done <<< "$PATTERN_LINES" + + ELAPSED=0 + echo "Waiting for files in $DIRECTORY; patterns: $PATTERN_COUNT; timeout: $TIMEOUT; interval: $INTERVAL" + + while [ "$ELAPSED" -lt "$TIMEOUT" ]; do + # Show condor queue as submituser if requested + if [ "$SHOW_Q" = "true" ]; then + su - submituser -s /bin/bash -c 'condor_q' || true + su - submituser -s /bin/bash -c 'condor_q -hold' || true + fi + + FOUND_COUNT=0 + while IFS= read -r pattern; do + [[ -z "${pattern//[[:space:]]/}" ]] && continue + [[ "$pattern" =~ ^[[:space:]]*# ]] && continue + COUNT=0 + for f in "$DIRECTORY"/$pattern; do + [ -e "$f" ] && COUNT=$((COUNT + 1)) + done + [ "$COUNT" -gt 0 ] && FOUND_COUNT=$((FOUND_COUNT + 1)) + done <<< "$PATTERN_LINES" + + if [ "$REQUIRE_ALL" = "true" ]; then + if [ "$FOUND_COUNT" -eq "$PATTERN_COUNT" ] && [ "$PATTERN_COUNT" -gt 0 ]; then + echo "All file patterns found!" + ls -lh "$DIRECTORY"/ || true + exit 0 + fi + else + if [ "$FOUND_COUNT" -gt 0 ]; then + echo "Files found! ($FOUND_COUNT/$PATTERN_COUNT)" + ls -lh "$DIRECTORY"/ || true + exit 0 + fi + fi + + echo "Elapsed: ${ELAPSED}s / ${TIMEOUT}s - Files not yet present ($FOUND_COUNT/$PATTERN_COUNT patterns matched), waiting..." + sleep "$INTERVAL" + ELAPSED=$((ELAPSED + INTERVAL)) + done + + echo "Timeout waiting for files in $DIRECTORY" + ls -R "$DIRECTORY"/ || true + exit 1 diff --git a/.github/workflows/cbcflow-tests.yml b/.github/workflows/cbcflow-tests.yml new file mode 100644 index 00000000..51f07193 --- /dev/null +++ b/.github/workflows/cbcflow-tests.yml @@ -0,0 +1,53 @@ +name: CBCFlow Integration Tests + +# These tests check the integration between asimov and cbcflow. +# They are allowed to fail (continue-on-error: true) because: +# - cbcflow is an optional external dependency +# - some failures reveal known issues to be fixed incrementally +# +# Run on the same branches as the main test suite so failures are visible. + +on: + push: + branches: [ master, v*-release, v*-preview ] + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+-alpha.[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+-beta.[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' + pull_request: + branches: [ master, v*-release, v*-preview ] + +jobs: + cbcflow-integration: + name: CBCFlow integration (Python 3.11) + runs-on: ubuntu-latest + continue-on-error: true + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install asimov + run: | + python -m pip install --upgrade pip + pip install . + pip install ".[bilby]" + + - name: Install cbcflow + run: pip install cbcflow + + - name: Set up git identity + run: | + git config --global user.email "test@asimov.test" + git config --global user.name "Asimov Test" + git config --global init.defaultBranch main + + - name: Run CBCFlow integration tests + run: python -m unittest tests.test_cbcflow_integration -v diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..8bd8c0fe --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,77 @@ +name: Build and Deploy Documentation + +on: + push: + branches: + - master + - main + pull_request: + branches: + - master + - main + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Needed for sphinx-multiversion to see all branches/tags + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[docs]" + + - name: Build documentation with sphinx-multiversion + run: | + cd docs + make multi + + - name: Create index redirect to master + run: | + mkdir -p docs/build/html + cat > docs/build/html/index.html << 'EOF' + + + + + Asimov Documentation + + + +

Click here to go to the documentation.

+ + + EOF + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: docs/build/html + + deploy: + if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main') + needs: build + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/htcondor-tests.yml b/.github/workflows/htcondor-tests.yml new file mode 100644 index 00000000..b51b76ca --- /dev/null +++ b/.github/workflows/htcondor-tests.yml @@ -0,0 +1,290 @@ +name: Tests with HTCondor +on: [push, pull_request] + +jobs: + tests: + name: "HTCondor Testing" + runs-on: ubuntu-latest + container: + image: htcondor/mini:latest + options: --privileged + defaults: + run: + shell: bash -el {0} + steps: + + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - uses: ./.github/actions/setup-htcondor + + - name: Install base tools + run: | + yum install -y sudo git + + - name: Create submit user + uses: ./.github/actions/create-submit-user + + - name: Ensure submituser can access workspace + run: | + mkdir -p "$GITHUB_WORKSPACE/test_project" + chmod -R a+rwX "$GITHUB_WORKSPACE" + + - name: Setup Asimov Environment + uses: ./.github/actions/setup-asimov-env + with: + python-version: "3.10" + extra-packages: "-U bilby bilby_pipe==1.4.0 git+https://git.ligo.org/asimov/pipelines/gwdata.git@update-htcondor pesummary" + + - name: Set up git + run: | + su - submituser <= 0, "Total should be non-negative" + assert results['total'] == results['project_analyses'] + results['event_analyses'], \ + "Total should equal sum of project and event analyses" + + print("\n=== All programmatic API tests passed! ===") + sys.exit(0) + except Exception as e: + print("\nERROR during programmatic monitor API tests:") + traceback.print_exc() + sys.exit(1) + EOF + + - name: Archive testing artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + path: | + checkouts/ + working/ + retention-days: 5 + + - name: Stop background server + if: always() + run: | + # Kill the background Python process running the server + pkill -f "MockGWDataFindServer" || true \ No newline at end of file diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 2d1286e8..1746c324 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -1,22 +1,29 @@ -name: Python multi-OS +name: Python multi-OS on: push: - branches: [ master ] + branches: [ master, v*-release, v*-preview ] + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+-alpha.[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+-beta.[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' pull_request: - branches: [ master ] + branches: [ master, v*-release, v*-preview ] jobs: - build-ubuntu-python310: + build-ubuntu-python: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9, "3.10"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] os: [ubuntu-latest] # , macOS-latest, windows-latest steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: @@ -24,14 +31,161 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - #- name: Lint with flake8 - # run: | - # # stop the build if there are Python syntax errors or undefined names - # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest + pip install . + pip install .[bilby] + - name: Set up git + run: | + git config --global init.defaultBranch master + git config --global user.email "you@example.com" + git config --global user.name "Your Name" + + - name: Run unittests run: | python -m unittest discover tests/ + + test-external-blueprints: + # This job checks compatibility with the external asimov-data blueprints. + # It is allowed to fail because external data files may lag behind code changes. + runs-on: ubuntu-latest + continue-on-error: true + strategy: + fail-fast: false + matrix: + python-version: ["3.11"] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install . + pip install .[bilby] + - name: Set up git + run: | + git config --global init.defaultBranch master + git config --global user.email "you@example.com" + git config --global user.name "Your Name" + + - name: Run external blueprint compatibility tests + run: | + python -m unittest tests.external_blueprint_compat -v + + publish: + name: Build and publish to PyPI + needs: build-ubuntu-python + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/v') + permissions: + contents: write + id-token: write + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check tag branch + id: check-branch + run: | + git fetch --all + branches=$(git branch -r --contains ${{ github.ref }}) + echo "Branches containing this tag: $branches" + + if echo "$branches" | grep -E "origin/master|origin/.*-preview"; then + echo "Tag is on master or *-preview branch" + echo "publish=true" >> $GITHUB_OUTPUT + else + echo "Tag is not on master or *-preview branch" + echo "publish=false" >> $GITHUB_OUTPUT + fi + + - name: Set up Python + if: steps.check-branch.outputs.publish == 'true' + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install build dependencies + if: steps.check-branch.outputs.publish == 'true' + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + if: steps.check-branch.outputs.publish == 'true' + run: python -m build + + - name: Publish to PyPI + if: steps.check-branch.outputs.publish == 'true' + uses: pypa/gh-action-pypi-publish@release/v1 + + - name: Determine if pre-release + if: steps.check-branch.outputs.publish == 'true' + id: prerelease + run: | + if echo "${{ github.ref_name }}" | grep -E "alpha|beta|rc"; then + echo "prerelease=true" >> $GITHUB_OUTPUT + else + echo "prerelease=false" >> $GITHUB_OUTPUT + fi + + - name: Extract changelog entry + if: steps.check-branch.outputs.publish == 'true' + id: changelog + run: | + # Remove 'v' prefix from tag to match changelog version + VERSION="${{ github.ref_name }}" + VERSION="${VERSION#v}" + + # Extract the changelog section for this version + python3 << 'EOF' > release_notes.md + import re + import sys + + version = "$VERSION" + + try: + with open('CHANGELOG.rst', 'r') as f: + content = f.read() + + # Pattern to match version header (e.g., "0.7.0-alpha1" followed by "=====") + # This captures content until the next version header + pattern = rf'^{re.escape(version)}\s*\n=+\s*\n(.*?)(?=\n\S+\s*\n=+\s*\n|\Z)' + + match = re.search(pattern, content, re.MULTILINE | re.DOTALL) + + if match: + changelog_text = match.group(1).strip() + print(changelog_text) + else: + print(f"Release notes for version {version}") + print("") + print("See [CHANGELOG.rst](CHANGELOG.rst) for details.") + except FileNotFoundError: + print(f"Release {version}") + print("") + print("No changelog found.") + except Exception as e: + print(f"Release {version}") + print("") + print(f"Error extracting changelog: {e}") + EOF + + echo "Extracted changelog for version $VERSION" + cat release_notes.md + + - name: Create GitHub Release + if: steps.check-branch.outputs.publish == 'true' + uses: softprops/action-gh-release@v1 + with: + draft: false + prerelease: ${{ steps.prerelease.outputs.prerelease == 'true' }} + body_path: release_notes.md + files: | + dist/* diff --git a/.github/workflows/testing-pipelines.yml b/.github/workflows/testing-pipelines.yml new file mode 100644 index 00000000..9a79c483 --- /dev/null +++ b/.github/workflows/testing-pipelines.yml @@ -0,0 +1,159 @@ +name: Testing Pipelines with HTCondor +on: [push, pull_request] + +permissions: + contents: read + +jobs: + test-pipelines: + name: "Testing Pipelines HTCondor Test" + runs-on: ubuntu-latest + container: + image: htcondor/mini:latest + options: --privileged + defaults: + run: + shell: bash -el {0} + steps: + + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - uses: ./.github/actions/setup-htcondor + + - name: Install base tools + run: | + yum install -y sudo git + + - name: Create submit user + uses: ./.github/actions/create-submit-user + + - name: Ensure submituser can access workspace + run: | + mkdir -p "$GITHUB_WORKSPACE/test_project" + chmod -R a+rwX "$GITHUB_WORKSPACE" + + - name: Setup Asimov Environment + uses: ./.github/actions/setup-asimov-env + with: + python-version: "3.10" + extra-packages: "" + + - name: Set up git + run: | + su - submituser <`_: Enforce waveform minimum frequency location (breaking change for old blueprints) + +0.7.0-alpha2 +============ + +This is a major feature release that represents a significant evolution of asimov's architecture and capabilities. This alpha release introduces powerful new workflow management features, a modernized monitoring system, and enhanced programmatic control. + +Major New Features +------------------ + +**State Machine Architecture** + The monitor loop has been completely refactored into a state machine pattern, providing better control flow, plugin support, and pipeline-specific handlers. This enables more sophisticated workflow management and better extensibility. + +**Advanced Dependency System** + Implements a flexible dependency specification system with property-based filtering, AND/OR/negation logic, staleness tracking, and workflow graph integration. This allows complex relationships between analyses to be expressed naturally. + +**Strategy Expansion** + New strategy expansion feature enables creating multiple analyses from parameter matrices, making it easy to run parameter studies and systematic variations. + +**Python API** + Comprehensive Python API for project creation and management, including context manager support for programmatic control of asimov workflows. + +**Enhanced HTML Reports** + HTML reports now include graph-based workflow visualization with interactive modal popups, advanced filtering, and improved styling for better workflow monitoring. + +**Blueprint Validator** + Introduces validation for blueprint files to catch configuration errors early and ensure consistent project setup. + +**Modern Prior Handling** + Refactored prior handling with pydantic validation and pipeline-specific interfaces, providing better type safety and clearer error messages. + +Changes +------- + +**Thread-Safe Logging** + File logging setup has been refactored to ensure thread safety with shared locks, and logging is now lazy-loaded to prevent log file creation for read-only commands. + +**PESummary Modernization** + PESummary has been converted to a SubjectAnalysis with optional dependency support, enabling more flexible post-processing workflows. + +**Improved Testing Infrastructure** + Added minimal testing pipelines for SimpleAnalysis, SubjectAnalysis, and ProjectAnalysis. Comprehensive GitHub Actions workflows for HTCondor and LALInference end-to-end testing with concurrent execution. + +**Build System Migration** + Migrated from setup.py to pyproject.toml for modern Python packaging standards. + +**Plugin Flexibility** + Enhanced plugin system with additional flexibility for extending asimov's capabilities. + +**Scheduler Improvements** + Scheduler refresh implementation for better job management. + +**Removed Legacy Assumptions** + Removed calibration categories and fixed hardcoded git branch assumptions for greater flexibility in deployment environments. + +Breaking Changes +---------------- + +This release introduces significant architectural changes. While efforts have been made to maintain backward compatibility where possible, some changes in behavior are expected, particularly in: + +- Monitor loop behavior due to state machine refactoring +- Dependency specification syntax (old syntax may need updating) +- Prior specification format (now uses pydantic models) + +GitHub Pull Requests +-------------------- + ++ `github#3 `_: Scheduler refresh ++ `github#7 `_: Introduce a blueprint validator ++ `github#14 `_: Update licence to MIT ++ `github#15 `_: CI improvements ++ `github#16 `_: Update licence ++ `github#17 `_: Fix interest dict ++ `github#20 `_: Remove pkg-resources ++ `github#21 `_: Workflow to actions ++ `github#23 `_: Update bilby final ++ `github#27 `_: Bug hunt ++ `github#29 `_: Update the PESummary interface ++ `github#36 `_: Refactor prior handling with pydantic validation and pipeline interfaces ++ `github#38 `_: Add GitHub Actions workflow for building and deploying documentation ++ `github#39 `_: Add Python API for project creation and management ++ `github#40 `_: Add LALInference end-to-end testing to HTCondor workflow with concurrent execution ++ `github#43 `_: Make logging lazy and prevent log file creation for read-only commands ++ `github#48 `_: Add minimal testing pipelines for SimpleAnalysis, SubjectAnalysis, and ProjectAnalysis ++ `github#50 `_: Enhance HTML reports with graph-based workflow visualization, modal popups, and advanced filtering ++ `github#52 `_: Implement flexible dependency specification with property-based filtering, AND/OR logic, staleness tracking, and workflow graph integration ++ `github#55 `_: Add review information display to HTML reports and fix review command ++ `github#56 `_: Fix frames in workflow ++ `github#58 `_: Fix bilby priors ++ `github#60 `_: Convert PESummary to SubjectAnalysis with optional dependency support ++ `github#61 `_: Fix bilby tests ++ `github#63 `_: Remove calibration categories and fix hardcoded git branch assumptions ++ `github#65 `_: Fix dependency resolution, graph visualization, and ledger persistence bugs ++ `github#71 `_: Fix SubjectAnalysis dependency resolution bugs ++ `github#72 `_: Refactor monitor loop to state machine pattern with plugin support, programmatic API, and pipeline-specific handlers ++ `github#75 `_: Add strategy expansion for creating multiple analyses from parameter matrices ++ `github#76 `_: Allow additional plugin flexibility ++ `github#83 `_: Refactor file logging setup to ensure thread safety with a shared lock ++ `github#97 `_: Fix issue with git init in project creation + 0.6.1 ===== diff --git a/IMPLEMENTATION.md b/IMPLEMENTATION.md new file mode 100644 index 00000000..e93e7c9a --- /dev/null +++ b/IMPLEMENTATION.md @@ -0,0 +1,266 @@ +# Improved Dependency Management Implementation + +## Overview + +This implementation adds flexible dependency specification to Asimov analyses, supporting property-based filtering, AND/OR logic, negation, and staleness tracking. + +## Features Implemented + +### 1. Property-Based Dependencies +Analyses can now depend on other analyses based on any property, not just names: + +```yaml +needs: + - pipeline: bayeswave + - waveform.approximant: IMRPhenomXPHM + - review.status: approved +``` + +### 2. AND/OR Logic + +**OR Logic (default):** Top-level items are OR'd together +```yaml +needs: + - waveform.approximant: IMRPhenomXPHM + - waveform.approximant: SEOBNRv5PHM +# Matches analyses with EITHER approximant +``` + +**AND Logic:** Use nested lists for AND conditions +```yaml +needs: + - - pipeline: bayeswave + - status: finished +# Matches ONLY analyses that are both bayeswave AND finished +``` + +**Complex Combinations:** +```yaml +needs: + - - pipeline: bayeswave + - review.status: approved + - waveform.approximant: IMRPhenomXPHM +# Matches: (bayeswave AND approved) OR IMRPhenomXPHM +``` + +### 3. Negation +Prefix values with `!` to match everything except that value: + +```yaml +needs: + - pipeline: "!bayeswave" +# Matches all non-bayeswave analyses +``` + +### 4. Staleness Tracking + +When an analysis runs, the resolved dependencies are recorded. If the set of matching analyses changes later (e.g., new analyses added that match the criteria), the analysis is marked as **stale**. + +**Stored in meta:** +- `resolved_dependencies`: List of analysis names that were dependencies when run +- `refreshable`: Boolean flag for auto-refresh + +**Properties added:** +- `is_stale`: True if current dependencies differ from resolved +- `is_refreshable`: Get/set the refreshable flag + +### 5. HTML Report Enhancements + +**New Indicators:** +- **Stale badge** (yellow): Dependencies changed since run +- **Stale (will refresh) badge** (blue): Refreshable analysis is stale + +**Dependency Display:** +- Current dependencies shown in details section +- Resolved dependencies shown (when different from current) +- Clear visual distinction with colored backgrounds + +## Implementation Details + +### Core Functions + +#### `_parse_single_dependency(need)` +Parses a single dependency string into components: +- Returns: `(attribute_list, match_value, is_negated)` +- Examples: + - `"Prod1"` → `(['name'], 'Prod1', False)` + - `"pipeline: bayeswave"` → `(['pipeline'], 'bayeswave', False)` + - `"pipeline: !bayeswave"` → `(['pipeline'], 'bayeswave', True)` + +#### `_process_dependencies(needs)` +Processes the entire needs list: +- Handles nested lists for AND groups +- Returns list of requirements (single or grouped) +- Each requirement is either a tuple or list of tuples + +#### `matches_filter(attribute, match, negate=False)` +Enhanced to support negation: +- `negate=True` inverts the match result +- Works with name, status, review, and metadata properties + +#### `dependencies` property +Resolves dependencies with AND/OR logic: +- Top-level items: OR (union) +- Nested lists: AND (intersection within, then union) +- Returns list of analysis names + +### Data Storage + +All new fields are stored in `analysis.meta`: +- `resolved_dependencies`: List[str] +- `refreshable`: bool + +These are automatically saved to the ledger via existing `to_dict()` method. + +### Backward Compatibility + +Simple name-based dependencies still work exactly as before: +```yaml +needs: + - Prod1 + - Prod2 +``` + +This is internally converted to: +```python +[(['name'], 'Prod1', False), (['name'], 'Prod2', False)] +``` + +## Testing + +### Unit Tests (20 tests, all passing) +Located in `tests/test_dependency_logic.py`: + +1. **Parsing Tests:** + - Simple names + - Property-based + - Nested properties + - Negation + +2. **Matching Tests:** + - By name, status, review + - Nested properties + - Negation + +3. **Dependency Resolution:** + - OR logic + - AND logic + - Complex combinations + - Negation + +4. **State Management:** + - Staleness detection + - Refreshable flag + +## Examples + +See `examples/dependency-examples.yaml` for complete examples of all features. + +### Example 1: OR Logic +```yaml +kind: analysis +name: combiner +pipeline: bilby +needs: + - waveform.approximant: IMRPhenomXPHM + - waveform.approximant: SEOBNRv5PHM +# Depends on all IMRPhenomXPHM and SEOBNRv5PHM analyses +``` + +### Example 2: AND Logic +```yaml +kind: analysis +name: specific-combo +pipeline: bilby +needs: + - - review.status: approved + - pipeline: bayeswave +# Depends ONLY on approved bayeswave analyses +``` + +### Example 3: Refreshable +```yaml +kind: analysis +name: auto-update +pipeline: bilby +refreshable: true +needs: + - review.status: approved +# Will auto-refresh when new approved analyses are added +``` + +## HTML Output + +The demo script `examples/demo_html_output.py` shows the HTML output for: +1. Analysis with dependencies +2. Stale analysis +3. Refreshable stale analysis + +Run it with: +```bash +python examples/demo_html_output.py +``` + +## Files Modified + +### Core Implementation +- `asimov/analysis.py`: Dependency resolution logic +- `asimov/cli/report.py`: HTML report CSS + +### Documentation +- `docs/source/blueprints.rst`: User documentation +- `examples/README.md`: Examples guide + +### Tests +- `tests/test_dependency_logic.py`: Unit tests (20 tests) +- `tests/test_dependencies.py`: Integration tests + +### Examples +- `examples/dependency-examples.yaml`: Example blueprints +- `examples/demo_html_output.py`: HTML demo script + +## Migration Guide + +### For Existing Projects + +No changes required! Old-style dependencies continue to work: +```yaml +needs: + - Prod1 +``` + +### To Use New Features + +Simply update your blueprint needs sections: + +**Before:** +```yaml +needs: + - BayesWave-PSD +``` + +**After (property-based):** +```yaml +needs: + - pipeline: bayeswave +``` + +**After (with conditions):** +```yaml +needs: + - - pipeline: bayeswave + - review.status: approved +``` + +## Future Enhancements + +Possible future improvements: +1. Dependency visualization in graph view +2. Automatic dependency validation +3. Dependency change notifications +4. Dependency history tracking +5. More complex query syntax (e.g., ranges, regex) + +## Conclusion + +This implementation provides a powerful and flexible dependency system while maintaining complete backward compatibility. All 20 unit tests pass, demonstrating robust handling of complex dependency scenarios. diff --git a/LICENSE b/LICENSE index aff8ac51..f7cd15dc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,14 +1,7 @@ -Copyright (c) 2020, Daniel Williams -All rights reserved. +Copyright 2025 Daniel Williams. -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/asimov/__init__.py b/asimov/__init__.py index 5e8d3b77..a5a50f3a 100644 --- a/asimov/__init__.py +++ b/asimov/__init__.py @@ -9,21 +9,27 @@ import os import logging +from logging.handlers import RotatingFileHandler -from pkg_resources import DistributionNotFound, get_distribution, resource_string +try: + from importlib.metadata import version, PackageNotFoundError + from importlib.resources import files +except ImportError: + from importlib_metadata import version, PackageNotFoundError + from importlib_resources import files try: - __version__ = get_distribution(__name__).version -except DistributionNotFound: + __version__ = version(__name__) +except PackageNotFoundError: # package is not installed __version__ = "dev" - pass try: import ConfigParser as configparser except ImportError: import configparser -default_config = resource_string(__name__, "{}.conf".format(__packagename__)) + +default_config = files(__name__).joinpath(f"{__packagename__}.conf").read_bytes() config = configparser.ConfigParser() # if not config_file: @@ -77,16 +83,94 @@ ch.setFormatter(print_formatter) ch.setLevel(PRINT_LEVEL) -logfile = "asimov.log" -fh = logging.FileHandler(logfile) -formatter = logging.Formatter( - "%(asctime)s [%(name)s][%(levelname)s] %(message)s", "%Y-%m-%d %H:%M:%S" -) -fh.setFormatter(formatter) -fh.setLevel(LOGGER_LEVEL) - logger.addHandler(ch) -logger.addHandler(fh) + +# File handler is not added by default - it's lazy-loaded when needed +_file_handler = None +import threading +_file_handler_lock = threading.Lock() + +def setup_file_logging(logfile=None): + """ + Set up file logging for asimov. + + This function should be called by commands that need to write logs to a file. + Read-only commands like --help or --version should not call this. + + Parameters + ---------- + logfile : str, optional + Path to the log file. If None, uses configuration or default location. + """ + global _file_handler + + # Only set up file handler once (thread-safe check) + with _file_handler_lock: + if _file_handler is not None: + return + + # Determine log file location + if logfile is None: + try: + log_directory = config.get("logging", "location") + try: + if not os.path.exists(log_directory): + # Create directory with appropriate permissions + os.makedirs(log_directory, mode=0o755) + logfile = os.path.join(log_directory, "asimov.log") + except OSError as e: + # If we cannot create or use the configured log directory, fall back to current directory + logger.error( + "Failed to create or access log directory '%s': %s. " + "Falling back to current directory for logging.", + log_directory, + e, + ) + logfile = "asimov.log" + except (configparser.NoOptionError, configparser.NoSectionError): + # Fall back to current directory if no config + logfile = "asimov.log" + + # Use RotatingFileHandler to prevent log files from growing too large + # Default: 10 MB per file, keep 5 backup files + max_bytes = 10 * 1024 * 1024 # 10 MB + backup_count = 5 + + try: + # Try to get custom values from config + max_bytes = int(config.get("logging", "max_bytes")) + except (configparser.NoOptionError, configparser.NoSectionError): + # No config value provided, use default + pass + except ValueError as e: + logger.warning(f"Invalid value for logging.max_bytes in config, using default: {e}") + + try: + backup_count = int(config.get("logging", "backup_count")) + except (configparser.NoOptionError, configparser.NoSectionError): + # No config value provided, use default + pass + except ValueError as e: + logger.warning(f"Invalid value for logging.backup_count in config, using default: {e}") + + try: + _file_handler = RotatingFileHandler( + logfile, maxBytes=max_bytes, backupCount=backup_count + ) + formatter = logging.Formatter( + "%(asctime)s [%(name)s][%(levelname)s] %(message)s", "%Y-%m-%d %H:%M:%S" + ) + _file_handler.setFormatter(formatter) + _file_handler.setLevel(LOGGER_LEVEL) + logger.addHandler(_file_handler) + except (OSError, IOError) as e: + # Log to stderr if file logging cannot be set up + import sys + sys.stderr.write( + f"Warning: Failed to set up file logging to '{logfile}': {e}\n" + "Continuing without file logging.\n" + ) + _file_handler = None # Mark as attempted but failed try: diff --git a/asimov/analysis.py b/asimov/analysis.py index 68ee8f86..4e83f783 100644 --- a/asimov/analysis.py +++ b/asimov/analysis.py @@ -30,13 +30,14 @@ from functools import reduce import operator +from typing import TYPE_CHECKING, Any, Optional, List, cast from liquid import Liquid -from asimov import config, logger, LOGGER_LEVEL -from asimov.pipelines import known_pipelines -from asimov.utils import update, diff_dict -from asimov.storage import Store +from . import config, logger, LOGGER_LEVEL +from .pipelines import known_pipelines +from .utils import update, diff_dict +from .storage import Store from .review import Review from .ini import RunConfiguration @@ -70,9 +71,24 @@ class Analysis: The base class for all other types of analysis. """ - meta = {} - meta_defaults = {"scheduler": {}, "sampler": {}, "review": {}, "likelihood": {}} - _reviews = Review() + meta: dict[str, Any] = {} + meta_defaults: dict[str, Any] = {"scheduler": {}, "sampler": {}, "likelihood": {}} + + # These annotations help static analysis without affecting runtime state + if TYPE_CHECKING: + event: Any + subject: Any + name: str + pipeline: Any + comment: Optional[str] + _needs: List[Any] + _reviews: Review + status_str: str + repository: Any + ledger: Any + analyses: List[Any] + productions: List[Any] + _analysis_spec: Any @property def review(self): @@ -82,7 +98,8 @@ def review(self): if "review" in self.meta: if len(self.meta["review"]) > 0: self._reviews = Review.from_dict(self.meta["review"], production=self) - self.meta.pop("review") + # Always remove 'review' from meta since we manage it via _reviews + self.meta.pop("review") return self._reviews def _process_dependencies(self, needs): @@ -92,27 +109,124 @@ def _process_dependencies(self, needs): The dependencies can be provided either as the name of a production, or a query against the analysis's attributes. + The needs list supports complex dependency specifications: + - Simple name: "Prod1" matches analysis with name "Prod1" + - Property query: "waveform.approximant: IMRPhenomXPHM" matches analyses + with that waveform approximant + - Negation: "review.status: !approved" matches analyses that are NOT approved + - Nested lists for AND logic: [["review.status: approved", "waveform.approximant: IMRPhenomXPHM"]] + matches analyses that satisfy ALL conditions in the nested list + - Top-level items are OR'd together + - Optional dependencies: {"optional": true, "pipeline": "bilby"} marks dependency as optional + Parameters ---------- needs : list - A list of all the requirements + A list of all the requirements. Can contain strings (OR'd together), + or lists of strings (AND'd together internally, OR'd with other items), + or dicts with optional flag Returns ------- list A list of all the requirements processed for evaluation. + Each item is either a tuple (attribute, match, negate, optional) for simple filters, + or a list of tuples for AND groups. """ all_requirements = [] for need in deepcopy(needs): - try: - requirement = need.split(":") - requirement = [requirement[0].split("."), requirement[1]] - except IndexError: - requirement = [["name"], need] - except AttributeError: - requirement = need - all_requirements.append(requirement) + # Check if this is an AND group (list of conditions) + if isinstance(need, list): + and_group = [] + for condition in need: + and_group.append(self._parse_single_dependency(condition)) + all_requirements.append(and_group) + else: + # Single condition + all_requirements.append(self._parse_single_dependency(need)) return all_requirements + + def _parse_single_dependency(self, need): + """ + Parse a single dependency specification into (attribute, match, negate, optional) tuple. + + Handles multiple formats: + - String: "waveform.approximant: IMRPhenomXPHM" (with quotes in YAML) + - Dict (simple): {waveform.approximant: IMRPhenomXPHM} (without quotes in YAML) + - Dict (optional): {optional: true, pipeline: bilby} (marks dependency as optional) + + Parameters + ---------- + need : str or dict + A single dependency specification + + Returns + ------- + tuple + (attribute_list, match_value, is_negated, is_optional) + """ + negate = False + optional = False + + # Handle dict format (when YAML parses without quotes) + if isinstance(need, dict): + # Check for optional flag + if "optional" in need: + optional = bool(need.get("optional", False)) + # Remove optional key and process remaining as dependency + dep_dict = {k: v for k, v in need.items() if k != "optional"} + if len(dep_dict) == 1: + key, value = list(dep_dict.items())[0] + key_str = str(key).strip() + attribute = key_str.split(".") + match_value = str(value).strip() + + # Check for negation + if match_value.startswith("!"): + negate = True + match_value = match_value[1:].strip() + + return (attribute, match_value, negate, optional) + else: + raise ValueError( + f"Invalid optional dependency format: expected one dependency key " + f"plus 'optional', got {list(dep_dict.keys())}: {need}" + ) + # Handle simple dict format + elif len(need) == 1: + key, value = list(need.items())[0] + key_str = str(key).strip() + attribute = key_str.split(".") + match_value = str(value).strip() + + # Check for negation + if match_value.startswith("!"): + negate = True + match_value = match_value[1:].strip() + + return (attribute, match_value, negate, optional) + else: + raise ValueError( + f"Invalid dependency dict format: expected a single key-value pair, " + f"got {len(need)} entries: {need}" + ) + + # Handle string format (with quotes in YAML) + try: + # Handle "attribute: value" format + parts = need.split(":", 1) + attribute = parts[0].strip().split(".") + match_value = parts[1].strip() + + # Check for negation + if match_value.startswith("!"): + negate = True + match_value = match_value[1:].strip() + + return (attribute, match_value, negate, optional) + except (IndexError, AttributeError): + # Plain name without colon + return (["name"], need, False, optional) @property def job_id(self): @@ -133,26 +247,246 @@ def job_id(self, value): @property def dependencies(self): - """Return a list of analyses which this analysis depends upon.""" + """ + Return a list of analyses which this analysis depends upon. + + The dependency resolution supports complex logic: + - Top-level items in needs are OR'd together + - Nested lists represent AND conditions (all must match) + - Individual filters can be negated with ! + + Returns + ------- + list + List of analysis names that this analysis depends on + """ all_matches = [] if len(self._needs) == 0: return [] else: - matches = set({}) # set(self.event.analyses) - # matches.remove(self) + matches = set() requirements = self._process_dependencies(deepcopy(self._needs)) - for attribute, match in requirements: + + for requirement in requirements: + if isinstance(requirement, list): + # This is an AND group - all conditions must match + and_matches = set(self.event.analyses) + for parsed_dep in requirement: + # Handle both 3-tuple and 4-tuple formats + if len(parsed_dep) == 4: + attribute, match, negate, optional = parsed_dep + else: + attribute, match, negate = parsed_dep + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), + and_matches, + ) + ) + and_matches = set(filtered_analyses) + matches = set.union(matches, and_matches) + else: + # Single condition + # Handle both 3-tuple and 4-tuple formats + if len(requirement) == 4: + attribute, match, negate, optional = requirement + else: + attribute, match, negate = requirement + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), + self.event.analyses, + ) + ) + matches = set.union(matches, set(filtered_analyses)) + + # Exclude self-dependencies + for analysis in matches: + if analysis.name != self.name: + all_matches.append(analysis.name) + + return all_matches + + @property + def required_dependencies(self): + """ + Return a list of required (non-optional) dependencies. + + This evaluates the needs specification and returns only dependencies + that are not marked as optional. If a required dependency is not found + in the ledger, the analysis should not run. + + Returns + ------- + list + List of dependency specifications that are required + """ + if len(self._needs) == 0: + return [] + + required_specs = [] + requirements = self._process_dependencies(deepcopy(self._needs)) + + for requirement in requirements: + if isinstance(requirement, list): + # This is an AND group - check if all are optional + all_optional = all( + parsed_dep[3] if len(parsed_dep) == 4 else False + for parsed_dep in requirement + ) + if not all_optional: + required_specs.append(requirement) + else: + # Single condition - check if optional + is_optional = requirement[3] if len(requirement) == 4 else False + if not is_optional: + required_specs.append(requirement) + + return required_specs + + @property + def has_required_dependencies_satisfied(self): + """ + Check if all required dependencies are satisfied. + + A required dependency is satisfied if at least one analysis in the ledger + matches its specification. Optional dependencies don't affect this check. + + Returns + ------- + bool + True if all required dependencies are satisfied (or there are no required deps), + False if any required dependency has no matches + """ + required_specs = self.required_dependencies + + if len(required_specs) == 0: + # No required dependencies, so they're all satisfied + return True + + for requirement in required_specs: + if isinstance(requirement, list): + # This is an AND group - all conditions must match at least one analysis + and_matches = set(self.event.analyses) + for parsed_dep in requirement: + if len(parsed_dep) == 4: + attribute, match, negate, optional = parsed_dep + else: + attribute, match, negate = parsed_dep + optional = False + + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), + and_matches, + ) + ) + and_matches = set(filtered_analyses) + + # If no analyses match this AND group, requirement not satisfied + if len(and_matches) == 0: + return False + else: + # Single condition + if len(requirement) == 4: + attribute, match, negate, optional = requirement + else: + attribute, match, negate = requirement + optional = False + filtered_analyses = list( filter( - lambda x: x.matches_filter(attribute, match), + lambda x: x.matches_filter(attribute, match, negate), self.event.analyses, ) ) - matches = set.union(matches, set(filtered_analyses)) - for analysis in matches: - all_matches.append(analysis.name) - - return all_matches + + # If no analyses match this requirement, it's not satisfied + if len(filtered_analyses) == 0: + return False + + # All required dependencies have at least one match + return True + + @property + def resolved_dependencies(self): + """ + Get the list of dependencies that were resolved when this analysis was run. + + This is used to track if dependencies have changed since the analysis ran, + which would make the analysis stale. + + Returns + ------- + list or None + List of analysis names that were dependencies when this ran, or None if not yet run + """ + if "resolved_dependencies" in self.meta: + return self.meta["resolved_dependencies"] + return None + + @resolved_dependencies.setter + def resolved_dependencies(self, value): + """ + Store the resolved dependencies for this analysis. + + Parameters + ---------- + value : list + List of analysis names that are current dependencies + """ + self.meta["resolved_dependencies"] = value + + @property + def is_stale(self): + """ + Check if this analysis is stale (dependencies have changed since it was run). + + An analysis is considered stale if: + 1. It has been run (has resolved_dependencies) + 2. The current dependencies differ from the resolved dependencies + + Returns + ------- + bool + True if the analysis is stale, False otherwise + """ + if self.resolved_dependencies is None: + # Never run, so not stale + return False + + current_deps = set(self.dependencies) + resolved_deps = set(self.resolved_dependencies) + + return current_deps != resolved_deps + + @property + def is_refreshable(self): + """ + Check if this analysis should be automatically refreshed when stale. + + Returns + ------- + bool + True if the analysis is marked as refreshable + """ + if "refreshable" in self.meta: + return self.meta["refreshable"] + return False + + @is_refreshable.setter + def is_refreshable(self, value): + """ + Mark this analysis as refreshable or not. + + Parameters + ---------- + value : bool + Whether the analysis should be automatically refreshed + """ + self.meta["refreshable"] = bool(value) @property def priors(self): @@ -161,6 +495,29 @@ def priors(self): else: priors = None return priors + + @priors.setter + def priors(self, value): + """ + Set priors with validation. + + Parameters + ---------- + value : dict or PriorDict + The prior specification + """ + from asimov.priors import PriorDict + + if value is None: + self.meta["priors"] = None + elif isinstance(value, PriorDict): + self.meta["priors"] = value.to_dict() + elif isinstance(value, dict): + # Validate using pydantic + validated = PriorDict.from_dict(value) + self.meta["priors"] = validated.to_dict() + else: + raise TypeError(f"priors must be dict or PriorDict, got {type(value)}") @property def finished(self): @@ -175,7 +532,7 @@ def status(self): def status(self, value): self.status_str = value.lower() - def matches_filter(self, attribute, match): + def matches_filter(self, attribute, match, negate=False): """ Checks to see if this analysis matches a given filtering criterion. @@ -187,6 +544,8 @@ def matches_filter(self, attribute, match): - processing status + - pipeline + - name In addition, any quantity contained in the analysis metadata @@ -197,10 +556,12 @@ def matches_filter(self, attribute, match): Parameters ---------- - attribute : str - The name of the attribute to be tested + attribute : list + The attribute path to be tested (e.g., ["waveform", "approximant"]) match : str The string to be matched against the value of the attribute + negate : bool, optional + If True, invert the match result (default: False) Returns ------- @@ -211,20 +572,38 @@ def matches_filter(self, attribute, match): is_review = False is_status = False is_name = False + is_pipeline = False in_meta = False + if attribute[0] == "review": is_review = match.lower() == str(self.review.status).lower() elif attribute[0] == "status": is_status = match.lower() == self.status.lower() elif attribute[0] == "name": is_name = match == self.name + elif attribute[0] == "pipeline": + # Check pipeline.name attribute first + if hasattr(self, 'pipeline'): + if hasattr(self.pipeline, 'name'): + is_pipeline = match.lower() == self.pipeline.name.lower() + elif isinstance(self.pipeline, str): + is_pipeline = match.lower() == self.pipeline.lower() + # Also check in metadata as fallback + if not is_pipeline and 'pipeline' in self.meta: + is_pipeline = match.lower() == self.meta['pipeline'].lower() else: try: - in_meta = reduce(operator.getitem, attribute, self.meta) == match - except KeyError: + meta_value = reduce(operator.getitem, attribute, self.meta) + in_meta = str(meta_value).lower() == str(match).lower() + except (KeyError, TypeError, AttributeError): in_meta = False - return is_name | in_meta | is_status | is_review + result = is_name | in_meta | is_status | is_review | is_pipeline + + # Apply negation if requested + if negate: + return not result + return result def results(self, filename=None, handle=False, hash=None): store = Store(root=config.get("storage", "results_store")) @@ -246,7 +625,7 @@ def rundir(self): """ Return the run directory for this analysis. """ - if "rundir" in self.meta: + if "rundir" in self.meta and self.meta["rundir"] is not None: return os.path.abspath(self.meta["rundir"]) elif "working directory" in self.subject.meta: value = os.path.join(self.subject.meta["working directory"], self.name) @@ -314,9 +693,12 @@ def make_config(self, filename, template_directory=None, dryrun=False): if hasattr(pipeline, "config_template"): template_file = pipeline.config_template else: - from pkg_resources import resource_filename + try: + from importlib.resources import files + except ImportError: + from importlib_resources import files - template_file = resource_filename("asimov", f"configs/{template}") + template_file = str(files("asimov").joinpath(f"configs/{template}")) liq = Liquid(template_file) rendered = liq.render(production=self, analysis=self, config=config) @@ -336,6 +718,17 @@ def html(self): card = "" card += f"
" + + # Add running indicator for active analyses + if self.status in ["running", "processing"]: + card += """""" + + # Add stale indicator if applicable + if self.is_stale: + stale_class = "stale-refreshable" if self.is_refreshable else "stale" + stale_text = "Stale (will refresh)" if self.is_refreshable else "Stale" + card += f"""{stale_text}""" + card += f"

{self.name}" if self.comment: @@ -343,35 +736,92 @@ def html(self): f""" {self.comment}""" ) card += "

" + if self.status: card += f"""

{self.status}

""" if self.pipeline: - card += f"""

{self.pipeline.name}

""" - - if self.pipeline: - # self.pipeline.collect_pages() - card += self.pipeline.html() - - if self.rundir: - card += f"""

{production.rundir}

""" - else: - card += """ """ - - if "approximant" in production.meta: - card += f"""

Waveform approximant: + card += f"""

Pipeline: {self.pipeline.name}

""" + + # Build collapsible details section + has_details = bool( + self.rundir or + "approximant" in production.meta or + "sampler" in production.meta or + "quality" in production.meta or + self.pipeline or + self.dependencies or + self.resolved_dependencies or + (hasattr(self, 'analyses') and self.analyses) + ) + + if has_details: + card += """▶ Show details""" + card += """
""" + + # Show source analyses for SubjectAnalysis + if hasattr(self, 'analyses') and self.analyses: + if hasattr(self.analyses, "__iter__"): + card += """

Source Analyses:
""" + source_analysis_html = [] + for analysis in self.analyses: + status_color = status_map.get(analysis.status, 'secondary') + source_analysis_html.append( + f"""{analysis.name}""" + ) + card += " ".join(source_analysis_html) + card += """

""" + + # Show dependencies + if self.dependencies: + if hasattr(self.dependencies, "__iter__"): + card += """

Current Dependencies:
""" + card += ", ".join(self.dependencies) + card += """

""" + + # Show resolved dependencies if different from current + if self.resolved_dependencies and self.resolved_dependencies != self.dependencies: + if hasattr(self.dependencies, "__iter__"): + card += """

Resolved Dependencies (when run):
""" + card += ", ".join(self.resolved_dependencies) + card += """

""" + + if self.pipeline: + # self.pipeline.collect_pages() + card += self.pipeline.html() + + if self.rundir: + card += f"""

Run directory:
{production.rundir}

""" + + if "approximant" in production.meta: + card += f"""

Waveform approximant: {production.meta['approximant']}

""" - card += """ """ + # Add more metadata if available + if "sampler" in production.meta and production.meta["sampler"]: + if isinstance(production.meta["sampler"], dict): + for key, value in production.meta["sampler"].items(): + card += f"""

{key}: {value}

""" + + if "quality" in production.meta: + card += f"""

Quality: {production.meta['quality']}

""" + + card += """
""" + card += """
""" - if len(self.review) > 0: - for review in self.review: - card += review.html() + try: + if len(self.review) > 0: + for review in self.review: + card += review.html() + except TypeError: + # The mocked review object doesn't support len() + pass + return card def to_dict(self, event=True): @@ -414,12 +864,22 @@ def to_dict(self, event=True): dictionary[key] = value dictionary["status"] = self.status - dictionary["job id"] = self.job_id + # dictionary["job id"] = self.job_id # Remove duplicates of pipeline defaults - if self.pipeline.name.lower() in self.event.ledger.data["pipelines"]: + pipeline_obj = getattr(self, "pipeline", None) + if ( + hasattr(self, "event") + and self.event + and hasattr(self.event, "ledger") + and self.event.ledger + and "pipelines" in self.event.ledger.data + and pipeline_obj is not None + and hasattr(pipeline_obj, "name") + and pipeline_obj.name.lower() in self.event.ledger.data["pipelines"] + ): defaults = deepcopy( - self.event.ledger.data["pipelines"][self.pipeline.name.lower()] + self.event.ledger.data["pipelines"][pipeline_obj.name.lower()] ) else: defaults = {} @@ -431,6 +891,12 @@ def to_dict(self, event=True): defaults = update(defaults, deepcopy(self.event.meta)) dictionary = diff_dict(defaults, dictionary) + + # Ensure critical fields are always saved, even if they match defaults + # This is necessary to support old ledgers and ensure status updates persist + dictionary["status"] = self.status + if self.job_id is not None: + dictionary["job id"] = self.job_id if "repository" in self.meta: dictionary["repository"] = self.repository.url @@ -461,7 +927,7 @@ def __init__(self, subject, name, pipeline, status=None, comment=None, **kwargs) self.name = name pathlib.Path( - os.path.join(config.get("logging", "directory"), self.event.name, name) + os.path.join(config.get("logging", "location"), self.event.name, name) ).mkdir(parents=True, exist_ok=True) self.logger = logger.getChild("analysis").getChild( @@ -480,6 +946,9 @@ def __init__(self, subject, name, pipeline, status=None, comment=None, **kwargs) self.status_str = "none" self.meta = deepcopy(self.meta_defaults) + + # Initialize review object for this instance + self._reviews = Review() # Start by adding pipeline defaults if "pipelines" in self.event.ledger.data: @@ -505,7 +974,7 @@ def __init__(self, subject, name, pipeline, status=None, comment=None, **kwargs) self.pipeline = known_pipelines[pipeline.lower()](self) if "needs" in self.meta: - self._needs = self.meta.pop("needs") + self._needs = cast(List[Any], self.meta.pop("needs")) else: self._needs = [] @@ -566,42 +1035,145 @@ def __init__(self, subject, name, pipeline, status=None, comment=None, **kwargs) self.status_str = "none" self.meta = deepcopy(self.meta_defaults) + + # Initialize review object for this instance + self._reviews = Review() + self.meta = update(self.meta, deepcopy(self.subject.meta)) + # Avoid inheriting full productions/analyses blobs from the subject; they bloat the ledger + for noisy_key in ["productions", "analyses"]: + if noisy_key in self.meta: + self.meta.pop(noisy_key) self.meta = update(self.meta, deepcopy(kwargs)) - self._analysis_spec = self.meta.get("needs") - + self._analysis_spec = self.meta.get("needs") or self.meta.get("analyses") + # Store the analysis spec names for refresh checking (if it's just a list of names). + # This lets us detect when dependencies have changed without blocking submission. + self._analysis_spec_names = [] if self._analysis_spec: - requirements = self._process_dependencies(self._analysis_spec) - self.analyses = [] - for attribute, match in requirements: - matches = set(self.subject.analyses) - filtered_analyses = list( - filter( - lambda x: x.matches_filter(attribute, match), subject.analyses - ) - ) - matches = set.intersection(matches, set(filtered_analyses)) - - for analysis in matches: - self.analyses.append(analysis) - self.productions = self.analyses + if isinstance(self._analysis_spec, list): + for spec_item in self._analysis_spec: + if isinstance(spec_item, str): + self._analysis_spec_names.append(spec_item) + elif isinstance(spec_item, dict) and len(spec_item) == 1: + # Single-key dict, add the value if it's a string + key, val = list(spec_item.items())[0] + if isinstance(val, str): + self._analysis_spec_names.append(val) + elif isinstance(self._analysis_spec, str): + self._analysis_spec_names.append(self._analysis_spec) + + # SubjectAnalysis does not participate in the dependency graph. + # Its _needs remain empty so it doesn't block submission. + self._needs = [] + + # Remove needs and analyses from meta to prevent duplication later if "needs" in self.meta: self.meta.pop("needs") + if "analyses" in self.meta: + self.meta.pop("analyses") + + # Initialize analyses lists (will be populated by resolve_analyses) + self.analyses = [] + self.productions = [] + + # Resolve analyses from smart dependencies + # Note: This may be incomplete if not all analyses are loaded yet. + # Event.update_graph() will call resolve_analyses() again after all productions are loaded. + if self._analysis_spec: + self.resolve_analyses() self.pipeline = pipeline.lower() self.pipeline = known_pipelines[pipeline.lower()](self) - if "needs" in self.meta: - self._needs = self.meta.pop("needs") - else: - self._needs = [] - if "comment" in kwargs: self.comment = kwargs["comment"] else: self.comment = None + def resolve_analyses(self): + """ + Resolve analyses from smart dependencies. + + This method evaluates the _analysis_spec (smart dependencies) against + the current set of analyses in the subject/event and populates self.analyses + with the matching analyses. + + This can be called multiple times safely: + - During __init__ (may be incomplete if not all analyses are loaded) + - After all productions are loaded (via Event.update_graph) + - When dependencies change + + Returns + ------- + None + """ + if not self._analysis_spec: + return + + requirements = self._process_dependencies(self._analysis_spec) + self.analyses = [] + + for requirement in requirements: + if isinstance(requirement, list): + # This is an AND group - all conditions must match + and_matches = set(self.subject.analyses) + for parsed_dep in requirement: + # Handle both 3-tuple and 4-tuple formats + if len(parsed_dep) == 4: + attribute, match, negate, optional = parsed_dep + else: + attribute, match, negate = parsed_dep + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), and_matches + ) + ) + and_matches = set(filtered_analyses) + # Add all matches from this AND group + for analysis in and_matches: + if analysis not in self.analyses: + self.analyses.append(analysis) + else: + # Single condition + # Handle both 3-tuple and 4-tuple formats + if len(requirement) == 4: + attribute, match, negate, optional = requirement + else: + attribute, match, negate = requirement + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), self.subject.analyses + ) + ) + # Add all matches from this single condition + for analysis in filtered_analyses: + if analysis not in self.analyses: + self.analyses.append(analysis) + + # Keep productions in sync + self.productions = self.analyses + + def source_analyses_ready(self): + """ + Check if all source analyses are finished and ready for processing. + + Returns + ------- + bool + True if all source analyses have finished status, False otherwise + """ + if not hasattr(self, 'analyses') or not self.analyses: + return False + + finished_statuses = {"finished", "uploaded", "processing"} + for analysis in self.analyses: + if analysis.status not in finished_statuses: + return False + return True + def to_dict(self, event=True): """ Return this production as a dictionary. @@ -615,6 +1187,11 @@ def to_dict(self, event=True): dictionary = {} dictionary = update(dictionary, self.meta) + # Keep resolved_dependencies in serialization for staleness detection + # This tracks which analyses were actually used when the job was run, + # allowing the refresh logic to detect when new analyses match the criteria + # Note: resolved_dependencies is set by PESummary.submit_dag() during submission + if not event: dictionary["event"] = self.event.name dictionary["name"] = self.name @@ -626,7 +1203,15 @@ def to_dict(self, event=True): dictionary["pipeline"] = self.pipeline.name.lower() dictionary["comment"] = self.comment - dictionary["analyses"] = self._analysis_spec + # Always persist the original analysis specification (smart dependencies) + # rather than the resolved list of analysis names. + # This ensures that smart dependencies are re-evaluated on each load, + # and the ledger doesn't get polluted with resolved names. + if hasattr(self, "_analysis_spec") and self._analysis_spec: + dictionary["analyses"] = self._analysis_spec + elif hasattr(self, "analyses") and self.analyses: + # Fallback: if no _analysis_spec but we have analyses, save as names + dictionary["analyses"] = [analysis.name for analysis in self.analyses] if self.review: dictionary["review"] = self.review.to_dicts() @@ -637,9 +1222,36 @@ def to_dict(self, event=True): dictionary["quality"] = self.meta["quality"] if "priors" in self.meta: dictionary["priors"] = self.meta["priors"] + + # Include remaining meta fields for key, value in self.meta.items(): + # Do not allow a meta-level "analyses" entry to overwrite the + # explicitly constructed analyses list above. + if key in ["analyses"]: + continue dictionary[key] = value - if "repository" in self.meta: + + # Remove duplicated defaults to keep the ledger minimal, mirroring Analysis.to_dict + defaults = {} + pipeline_obj = getattr(self, "pipeline", None) + if ( + hasattr(self.event, "ledger") + and self.event.ledger + and "pipelines" in self.event.ledger.data + and pipeline_obj is not None + and hasattr(pipeline_obj, "name") + and pipeline_obj.name.lower() in self.event.ledger.data["pipelines"] + ): + defaults = deepcopy( + self.event.ledger.data["pipelines"][pipeline_obj.name.lower()] + ) + + # Subject-level defaults + defaults = update(defaults, deepcopy(self.subject.meta)) + + dictionary = diff_dict(defaults, dictionary) + + if "repository" in dictionary: dictionary["repository"] = self.repository.url if "ledger" in dictionary: dictionary.pop("ledger") @@ -672,13 +1284,34 @@ def from_dict(cls, parameters, subject): return cls(subject, name, pipeline, **parameters) + @property + def rundir(self): + """ + Return the run directory for this subject analysis. + """ + if "rundir" in self.meta: + return os.path.abspath(self.meta["rundir"]) + elif "working directory" in self.subject.meta: + value = os.path.join(self.subject.meta["working directory"], self.name) + self.meta["rundir"] = value + return os.path.abspath(self.meta["rundir"]) + else: + return None + + @rundir.setter + def rundir(self, value): + """ + Set the run directory. + """ + self.meta["rundir"] = value + class ProjectAnalysis(Analysis): """ A multi-subject analysis. """ - meta_defaults = {"scheduler": {}, "sampler": {}, "review": {}} + meta_defaults = {"scheduler": {}, "sampler": {}} def __init__(self, name, pipeline, ledger=None, **kwargs): """ """ @@ -696,7 +1329,8 @@ def __init__(self, name, pipeline, ledger=None, **kwargs): self._analysis_spec = kwargs["analyses"] else: self._analysis_spec = {} - requirements = self._process_dependencies(self._analysis_spec) + + # Initialize analyses list (will be populated by resolve_analyses) self.analyses = [] # set up the working directory @@ -712,19 +1346,11 @@ def __init__(self, name, pipeline, ledger=None, **kwargs): self.repository = None self._subject_obs = [] - for subject in self.subjects: - if self._analysis_spec: - matches = set(subject.analyses) - for attribute, match in requirements: - filtered_analyses = list( - filter( - lambda x: x.matches_filter(attribute, match), - subject.analyses, - ) - ) - matches = set.intersection(matches, set(filtered_analyses)) - for analysis in matches: - self.analyses.append(analysis) + + # Resolve analyses from smart dependencies across subjects + if self._analysis_spec: + self.resolve_analyses() + if "status" in kwargs: self.status_str = kwargs["status"].lower() else: @@ -738,7 +1364,7 @@ def __init__(self, name, pipeline, ledger=None, **kwargs): self.logger.warning(f"The pipeline {pipeline} could not be found.") if "needs" in self.meta: - self._needs = self.meta.pop("needs") + self._needs = cast(List[Any], self.meta.pop("needs")) else: self._needs = [] @@ -748,6 +1374,9 @@ def __init__(self, name, pipeline, ledger=None, **kwargs): self.comment = None self.meta = deepcopy(self.meta_defaults) + + # Initialize review object for this instance + self._reviews = Review() # Start by adding pipeline defaults if "pipelines" in self.ledger.data: @@ -776,7 +1405,66 @@ def subjects(self): @property def events(self): - return self.subjects() + return self.subjects + + def resolve_analyses(self): + """ + Resolve analyses from smart dependencies across all subjects. + + This method evaluates the _analysis_spec (smart dependencies) against + the analyses in each subject and populates self.analyses with matches. + + Returns + ------- + None + """ + if not self._analysis_spec: + return + + requirements = self._process_dependencies(self._analysis_spec) + self.analyses = [] + + for subject in self.subjects: + for requirement in requirements: + if isinstance(requirement, list): + # This is an AND group - all conditions must match + and_matches = set(subject.analyses) + for parsed_dep in requirement: + # Handle both 3-tuple and 4-tuple formats + if len(parsed_dep) == 4: + attribute, match, negate, optional = parsed_dep + else: + attribute, match, negate = parsed_dep + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), + and_matches, + ) + ) + and_matches = set(filtered_analyses) + # Add all matches from this AND group + for analysis in and_matches: + if analysis not in self.analyses: + self.analyses.append(analysis) + else: + # Single condition + # Handle both 3-tuple and 4-tuple formats + if len(requirement) == 4: + attribute, match, negate, optional = requirement + else: + attribute, match, negate = requirement + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), + subject.analyses, + ) + ) + # Add all matches from this single condition + for analysis in filtered_analyses: + if analysis not in self.analyses: + self.analyses.append(analysis) @classmethod def from_dict(cls, parameters, ledger=None): @@ -803,13 +1491,24 @@ def from_dict(cls, parameters, ledger=None): @property def dependencies(self): - """Return a list of analyses which this analysis depends upon.""" + """ + Return a list of analyses which this analysis depends upon. + + The dependency resolution supports complex logic: + - Top-level items in needs are OR'd together + - Nested lists represent AND conditions (all must match) + - Individual filters can be negated with ! + + Returns + ------- + list + List of analysis names that this analysis depends on + """ all_matches = [] if len(self._needs) == 0: return [] else: - matches = set({}) # set(self.event.analyses) - # matches.remove(self) + matches = set() requirements = self._process_dependencies(deepcopy(self._needs)) analyses = [] for subject in self._subjects: @@ -817,20 +1516,48 @@ def dependencies(self): self._subject_obs.append(sub) for analysis in sub.analyses: analyses.append(analysis) - for attribute, match in requirements: - filtered_analyses = list( - filter( - lambda x: x.matches_filter(attribute, match), - analyses, + + for requirement in requirements: + if isinstance(requirement, list): + # This is an AND group - all conditions must match + and_matches = set(analyses) + for parsed_dep in requirement: + # Handle both 3-tuple and 4-tuple formats + if len(parsed_dep) == 4: + attribute, match, negate, optional = parsed_dep + else: + attribute, match, negate = parsed_dep + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), + and_matches, + ) + ) + and_matches = set(filtered_analyses) + matches = set.union(matches, and_matches) + else: + # Single condition + # Handle both 3-tuple and 4-tuple formats + if len(requirement) == 4: + attribute, match, negate, optional = requirement + else: + attribute, match, negate = requirement + optional = False + filtered_analyses = list( + filter( + lambda x: x.matches_filter(attribute, match, negate), + analyses, + ) ) - ) - matches = set.union(matches, set(filtered_analyses)) + matches = set.union(matches, set(filtered_analyses)) + for analysis in matches: all_matches.append(analysis.name) return all_matches - def to_dict(self): + def to_dict(self, event=True): """ Return this project production as a dictionary. @@ -852,7 +1579,7 @@ def to_dict(self): dictionary["comment"] = self.comment if self.review: - dictionary["review"] = self.review.copy() # .to_dicts() + dictionary["review"] = self.review.to_dicts() dictionary["needs"] = self.dependencies @@ -860,8 +1587,10 @@ def to_dict(self): dictionary["quality"] = self.meta["quality"] if "priors" in self.meta: dictionary["priors"] = self.meta["priors"] + for key, value in self.meta.items(): dictionary[key] = value + if "repository" in self.meta: dictionary["repository"] = self.repository.url if "ledger" in dictionary: @@ -872,21 +1601,40 @@ def to_dict(self): dictionary["subjects"] = self._subjects dictionary["analyses"] = self._analysis_spec - output = dictionary + # Remove duplicated defaults: pipeline defaults + any project-level defaults + defaults = {} + pipeline_obj = getattr(self, "pipeline", None) + if ( + hasattr(self, "ledger") + and self.ledger + and "pipelines" in self.ledger.data + and pipeline_obj is not None + and hasattr(pipeline_obj, "name") + and pipeline_obj.name.lower() in self.ledger.data["pipelines"] + ): + defaults = deepcopy( + self.ledger.data["pipelines"][pipeline_obj.name.lower()] + ) - return output + # Project-level defaults if present + if hasattr(self, "ledger") and self.ledger and "project" in self.ledger.data: + defaults = update(defaults, deepcopy(self.ledger.data["project"])) + + dictionary = diff_dict(defaults, dictionary) + + return dictionary @property def rundir(self): """ - Returns the rundir for this event + Returns the rundir for this project analysis """ if "rundir" in self.meta: - return self.meta["rundir"] + return os.path.abspath(self.meta["rundir"]) elif self.work_dir: self.meta["rundir"] = self.work_dir - return self.meta["rundir"] + return os.path.abspath(self.meta["rundir"]) else: return None @@ -925,6 +1673,37 @@ def __init__(self, subject, name, pipeline, **kwargs): """ self.category = config.get("general", "calibration_directory") + + # Early validation: Check for minimum frequency in wrong locations (v0.7) + # We need to check both the subject (event) metadata and the kwargs + # First, build the effective metadata as it will be in super().__init__ + temp_meta = deepcopy(Analysis.meta_defaults) + + # Add pipeline defaults if available + if hasattr(subject, 'ledger') and subject.ledger and "pipelines" in subject.ledger.data: + if pipeline in subject.ledger.data["pipelines"]: + temp_meta = update(temp_meta, deepcopy(subject.ledger.data["pipelines"][pipeline])) + + # Add subject defaults + temp_meta = update(temp_meta, deepcopy(subject.meta)) + + # Add kwargs + temp_meta = update(temp_meta, deepcopy(kwargs)) + + # Now validate + if "quality" in temp_meta and "minimum frequency" in temp_meta["quality"]: + raise ValueError( + "Minimum frequency must be specified in the 'waveform' section, " + "not in the 'quality' section. Please update your blueprint to move " + "'minimum frequency' from 'quality' to 'waveform'." + ) + if "likelihood" in temp_meta and "minimum frequency" in temp_meta["likelihood"]: + raise ValueError( + "Minimum frequency must be specified in the 'waveform' section, " + "not in the 'likelihood' section. Please update your blueprint to move " + "'minimum frequency' from 'likelihood' to 'waveform'." + ) + super().__init__(subject, name, pipeline, **kwargs) self._checks() @@ -953,11 +1732,9 @@ def __init__(self, subject, name, pipeline, **kwargs): self.meta["sampler"]["lmax"] = self.meta["lmax"] # Check that the upper frequency is included, otherwise calculate it - if "quality" in self.meta: - if ("maximum frequency" not in self.meta["quality"]) and ( - "sample rate" in self.meta["likelihood"] - ): - self.meta["quality"]["maximum frequency"] = {} + if "sample rate" in self.meta["likelihood"] and "interferometers" in self.meta: + if "maximum frequency" not in self.meta.get("quality", {}): + self.meta.setdefault("quality", {})["maximum frequency"] = {} # Account for the PSD roll-off with the 0.875 factor for ifo in self.meta["interferometers"]: self.meta["quality"]["maximum frequency"][ifo] = int( @@ -1092,6 +1869,13 @@ def get_configuration(self): return ini + def _check_compatible(self, previous_analysis): + """ + Placeholder compatibility check between analyses. + Extend when additional metadata comparisons are needed. + """ + return True + def _collect_psds(self, format="ascii"): """ Collect the required psds for this production. diff --git a/asimov/asimov.conf b/asimov/asimov.conf index c988ba0d..d6b344fd 100644 --- a/asimov/asimov.conf +++ b/asimov/asimov.conf @@ -2,7 +2,7 @@ git_default = . rundir_default = working calibration=C01 -calibration_directory=C01_offline +calibration_directory=analyses webroot = pages/ logger = file diff --git a/asimov/blueprints.py b/asimov/blueprints.py new file mode 100644 index 00000000..f5e11098 --- /dev/null +++ b/asimov/blueprints.py @@ -0,0 +1,323 @@ +""" +Code to handle blueprints and their associated specification. +""" + +import pydantic +from pydantic import BaseModel, ConfigDict, model_validator +import yaml + +def select_blueprint_kind(file_path: str) -> tuple[type, dict]: + + with open(file_path, "r") as f: + blueprint_data = yaml.safe_load(f) + + kind = blueprint_data.pop("kind", None) + if kind is None: + raise ValueError("Blueprint 'kind' is missing from the blueprint data.") + if kind.lower() == "analysis": + return Analysis, blueprint_data + elif kind.lower() in {"event", "subject"}: + return Subject, blueprint_data + else: + raise ValueError(f"Unknown blueprint kind: {kind}") + +class Blueprint(BaseModel): + pass + + +class Waveform(Blueprint): + """ + A blueprint defining the configuration for a waveform model. + """ + enforce_signal_duration: bool | None = pydantic.Field( + alias="enforce signal duration", + description="Whether to enforce the signal duration in the waveform model.", + default=None, + ) + generator: str | None = pydantic.Field( + alias="generator", + description="The waveform generator to use.", + default=None, + ) + reference_frequency: float | None = pydantic.Field( + alias="reference frequency", + description="The reference frequency for the waveform model.", + default=None, + ) + start_frequency: float | None = pydantic.Field( + alias="start frequency", + description="The start frequency for the waveform model.", + default=None, + ) + conversion_function: str | None = pydantic.Field( + alias="conversion function", + description="The conversion function to use in the waveform model.", + default=None, + ) + approximant: str | None = pydantic.Field( + alias="approximant", + description="The approximant to use in the waveform model.", + default=None, + ) + pn_spin_order: int | None = pydantic.Field( + alias="pn spin order", + description="The post-Newtonian spin order to use in the waveform model.", + default=None, + ) + pn_phase_order: int | None = pydantic.Field( + alias="pn phase order", + description="The post-Newtonian phase order to use in the waveform model.", + default=None, + ) + pn_amplitude_order: int | None = pydantic.Field( + alias="pn amplitude order", + description="The post-Newtonian amplitude order to use in the waveform model.", + default=None, + ) + file: str | None = pydantic.Field( + alias="file", + description="The file containing an NR waveform.", + default=None, + ) + arguments: dict | None = pydantic.Field( + alias="arguments", + description="Additional arguments for the waveform model.", + default=None, + ) + mode_array: list[str] | None = pydantic.Field( + alias="mode array", + description="The mode array to use in the waveform model.", + default=None, + ) + minimum_frequency: dict[str, float] | None = pydantic.Field( + alias="minimum frequency", + description="The minimum frequency for the waveform model, given as a dictionary of values per interferometer.", + default=None, + ) + + + model_config = ConfigDict(extra='forbid') + +class Calibration(Blueprint): + """ + A blueprint defining the configuration for calibration. + """ + sample: bool | None = pydantic.Field( + default=None, + description="Whether to sample calibration parameters. If set to True the likelihood will sample over the calibration uncertainty." + ) + + model_config = ConfigDict(extra='forbid') + +class Marginalisation(Blueprint): + """ + A blueprint defining the configuration for marginalisation. + """ + time: bool | None = pydantic.Field( + default=None, + description="Whether to marginalise over time." + ) + phase: bool | None = pydantic.Field( + default=None, + description="Whether to marginalise over phase." + ) + distance: bool | None = pydantic.Field( + default=None, + description="Whether to marginalise over distance." + ) + calibration: bool | None = pydantic.Field( + default=None, + alias="Calibration", + description="Whether to marginalise over calibration." + ) + + model_config = ConfigDict(extra='forbid') + +class ROQ(Blueprint): + """ + A blueprint defining the configuration for Reduced Order Quadrature (ROQ). + """ + folder: str | None = pydantic.Field( + default=None, + description="The folder containing the ROQ basis." + ) + weights: str | None = pydantic.Field( + default=None, + description="The file containing the ROQ weights." + ) + scale: float | None = pydantic.Field( + default=None, + description="The scale factor for the ROQ." + ) + linear_matrix: str | None = pydantic.Field( + default=None, + alias="linear matrix", + description="The file containing the linear matrix for the ROQ." + ) + quadratic_matrix: str | None = pydantic.Field( + default=None, + alias="quadratic matrix", + description="The file containing the quadratic matrix for the ROQ." + ) + + model_config = ConfigDict(extra='forbid') + +class RelativeBinning(Blueprint): + """ + A blueprint defining the configuration for Relative Binning. + """ + fiducial_parameters: dict | None = pydantic.Field( + default=None, + alias="fiducial parameters", + description="The fiducial parameters for relative binning." + ) + update_fiducial_parameters: bool | None = pydantic.Field( + default=None, + alias="update fiducial parameters", + description="Whether to update the fiducial parameters during the analysis." + ) + epsilon: float | None = pydantic.Field( + default=None, + description="The epsilon parameter for relative binning." + ) + + model_config = ConfigDict(extra='forbid') + + +class Likelihood(Blueprint): + """ + Configuration parameters for the likelihood. + """ + sample_rate: int = pydantic.Field( + alias="sample rate", + description="The sample rate for the likelihood." + ) + psd_length: int | None = pydantic.Field( + alias="psd length", + description="The length of the data segment used to calculate the PSD. Normally, and by default, this should be the same as the sample rate.", + default=None, + ) + time_domain_source_model: str | None = pydantic.Field( + alias="time domain source model", + description="The time domain source model to use in the likelihood.", + default=None, + ) + frequency_domain_source_model: str | None = pydantic.Field( + alias="frequency domain source model", + description="The frequency domain source model to use in the likelihood.", + default=None, + ) + coherence_test: bool | None = pydantic.Field( + alias="coherence test", + description="Whether to perform a coherence test in the likelihood.", + default=None, + ) + post_trigger_time: float | None = pydantic.Field( + alias="post trigger time", + description="The amount of time after the trigger to include in the likelihood (in seconds).", + default=None, + ) + roll_off_time: float | None = pydantic.Field( + alias="roll off", + description="The amount of time to roll off the window (in seconds).", + default=1.0, + ) + time_reference: str | None = pydantic.Field( + alias="time reference", + description="The time reference for the likelihood.", + default=None, + ) + reference_frame: str | None = pydantic.Field( + alias="reference frame", + description="The reference frame for the likelihood.", + default=None, + ) + type: str | None = pydantic.Field( + alias="type", + description="The type of likelihood to use.", + default=None, + ) + kwargs: dict | None = pydantic.Field( + alias="kwargs", + description="Additional keyword arguments for the likelihood.", + default=None, + ) + marginalisation: Marginalisation | None = pydantic.Field( + alias="marginalisation", + description="Configuration parameters for marginalisation in the likelihood.", + default=None, + ) + roq: ROQ | None = pydantic.Field( + alias="roq", + description="Configuration parameters for Reduced Order Quadrature (ROQ) in the likelihood.", + default=None, + ) + relative_binning: RelativeBinning | None = pydantic.Field( + alias="relative binning", + description="Configuration parameters for Relative Binning in the likelihood.", + default=None, + ) + + + model_config = ConfigDict(extra='forbid') + + @model_validator(mode="after") + def default_psd_length(self) -> "Likelihood": + if self.psd_length is None: + self.psd_length = self.sample_rate + return self + + +class Analysis(Blueprint): + """ + A blueprint defining the configuration for an analysis task. + """ + name: str + comment: str + likelihood: Likelihood | None = pydantic.Field( + default=None, + description="Configuration parameters for the likelihood." + ) + waveform: Waveform | None = pydantic.Field( + default=None, + description="Configuration parameters for the waveform model." + ) + + model_config = ConfigDict(extra='forbid') + +class Prior(Blueprint): + """ + A blueprint defining the configuration for a prior. + """ + name: str | None = pydantic.Field( + default=None, + description="The name of the prior distribution." + ) + minimum: float | None = pydantic.Field( + default=None, + description="The minimum value for the prior." + ) + maximum: float | None = pydantic.Field( + default=None, + description="The maximum value for the prior." + ) + + model_config = ConfigDict(extra='forbid') + +class Subject(Blueprint): + """ + A blueprint defining the configuration for a subject. + """ + name: str + event_time: float = pydantic.Field( + alias="event time", + description="The GPS time of the event." + ) + priors: dict[str, Prior] | None = pydantic.Field( + default=None, + description="A dictionary of prior configurations for the subject." + ) + + model_config = ConfigDict(extra='forbid') + + diff --git a/asimov/cli/application.py b/asimov/cli/application.py index ca7185f2..3e3e37e8 100644 --- a/asimov/cli/application.py +++ b/asimov/cli/application.py @@ -3,6 +3,12 @@ Inspired by the kubectl apply approach from kubernetes. """ +import os +import sys +from copy import deepcopy +from datetime import datetime +from pathlib import Path + import click import requests import yaml @@ -10,9 +16,9 @@ from asimov import LOGGER_LEVEL, logger import asimov.event from asimov.analysis import ProjectAnalysis -from asimov import current_ledger as ledger from asimov.ledger import Ledger from asimov.utils import update +from asimov.strategies import expand_strategy from copy import deepcopy from datetime import datetime import sys @@ -27,8 +33,33 @@ logger.setLevel(LOGGER_LEVEL) -def apply_page(file, event, ledger=ledger, update_page=False): - if file[:4] == "http": +def get_ledger(): + """ + Get the current ledger instance. + + Reloads the ledger to ensure we have the latest state, + preventing issues where the ledger is cached at import time. + + Returns + ------- + Ledger + The current ledger instance. + """ + from asimov import config + if config.get("ledger", "engine") == "yamlfile": + from asimov.ledger import YAMLLedger + return YAMLLedger(config.get("ledger", "location")) + else: + from asimov import current_ledger + return current_ledger + + +def apply_page(file, event=None, ledger=None, update_page=False): + # Get ledger if not provided + if ledger is None: + ledger = get_ledger() + + if file.startswith("http://") or file.startswith("https://"): r = requests.get(file) if r.status_code == 200: data = r.text @@ -47,91 +78,125 @@ def apply_page(file, event, ledger=ledger, update_page=False): if document["kind"] == "event": logger.info("Found an event") document.pop("kind") - event = asimov.event.Event.from_yaml(yaml.dump(document)) + event_obj = asimov.event.Event.from_yaml(yaml.dump(document)) + # Check if the event is in the ledger already - if event.name in ledger.events and update_page is True: - old_event = deepcopy(ledger.events[event.name]) + # ledger.events is a dict with event names as keys + event_exists = event_obj.name in ledger.events + + if event_exists and update_page is True: + old_event = deepcopy(ledger.events[event_obj.name]) for key in ["name", "productions", "working directory", "repository", "ledger"]: old_event.pop(key, None) - analyses = [ - # I appreciate this looks insane, but the way the yaml stores these - # is poorly designed. - {list(prod.keys())[0]: update(list(prod.values())[0], old_event)} - for prod in ledger.events[event.name]["productions"] - ] + analyses = [] + for prod in ledger.events[event_obj.name].get("productions", []): + prod_name = None + prod_data = None + + if isinstance(prod, dict) and len(prod) == 1: + prod_name, prod_data = next(iter(prod.items())) + elif isinstance(prod, dict): + prod_name = prod.get("name") + if prod_name: + prod_data = {k: v for k, v in prod.items() if k != "name"} + else: + prod_data = prod + + if prod_data is None: + prod_data = {} + + merged = update(prod_data, old_event, inplace=False) + + if prod_name: + analyses.append({prod_name: merged}) + else: + analyses.append(merged) # Add the old version to the history if "history" not in ledger.data: ledger.data["history"] = {} - history = ledger.data["history"].get(event.name, {}) + history = ledger.data["history"].get(event_obj.name, {}) version = f"version-{len(history)+1}" history[version] = old_event history[version]["date changed"] = datetime.now() - ledger.data["history"][event.name] = history + ledger.data["history"][event_obj.name] = history ledger.save() - update(ledger.events[event.name], event.meta) - ledger.events[event.name]["productions"] = analyses - ledger.events[event.name].pop("ledger", None) + update(ledger.events[event_obj.name], event_obj.meta) + ledger.events[event_obj.name]["productions"] = analyses + ledger.events[event_obj.name].pop("ledger", None) click.echo( - click.style("●", fg="green") + f" Successfully updated {event.name}" + click.style("●", fg="green") + f" Successfully updated {event_obj.name}" ) - elif event.name not in ledger.events and update_page is False: - ledger.update_event(event) + elif not event_exists and update_page is False: + ledger.update_event(event_obj) click.echo( - click.style("●", fg="green") + f" Successfully added {event.name}" + click.style("●", fg="green") + f" Successfully added {event_obj.name}" ) - logger.info(f"Added {event.name} to project") + logger.info(f"Added {event_obj.name} to project") - elif event.name not in ledger.events and update_page is True: + elif not event_exists and update_page is True: click.echo( click.style("●", fg="red") - + f" {event.name} cannot be updated as there is no record of it in the project." + + f" {event_obj.name} cannot be updated as there is no record of it in the project." ) else: click.echo( click.style("●", fg="red") - + f" {event.name} already exists in this project." + + f" {event_obj.name} already exists in this project." ) elif document["kind"] == "analysis": logger.info("Found an analysis") document.pop("kind") + + # Expand strategy if present + expanded_documents = expand_strategy(document) + + # Determine event once for all expanded analyses if event: event_s = event else: if "event" in document: event_s = document["event"] else: - prompt = "Which event should these be applied to?" + num_analyses = len(expanded_documents) + if num_analyses > 1: + prompt = f"Which event should these {num_analyses} analyses be applied to?" + else: + prompt = "Which event should these be applied to?" event_s = str(click.prompt(prompt)) - try: - event_o = ledger.get_event(event_s)[0] - except KeyError as e: - click.echo( - click.style("●", fg="red") - + f" Could not apply a production, couldn't find the event {event}" - ) - logger.exception(e) - production = asimov.event.Production.from_dict( - parameters=document, subject=event_o, ledger=ledger - ) - try: - ledger.add_analysis(production, event=event_o) - click.echo( - click.style("●", fg="green") - + f" Successfully applied {production.name} to {event_o.name}" - ) - logger.info(f"Added {production.name} to {event_o.name}") - except ValueError as e: - click.echo( - click.style("●", fg="red") - + f" Could not apply {production.name} to {event_o.name} as " - + "an analysis already exists with this name" - ) - logger.exception(e) + + for expanded_doc in expanded_documents: + + try: + event_obj = ledger.get_event(event_s)[0] + except KeyError as e: + click.echo( + click.style("●", fg="red") + + f" Could not apply a production, couldn't find the event {event}" + ) + logger.exception(e) + continue + production = asimov.event.Production.from_dict( + parameters=expanded_doc, subject=event_obj, ledger=ledger + ) + try: + ledger.add_analysis(production, event=event_obj) + click.echo( + click.style("●", fg="green") + + f" Successfully applied {production.name} to {event_obj.name}" + ) + logger.info(f"Added {production.name} to {event_obj.name}") + except ValueError as e: + click.echo( + click.style("●", fg="red") + + f" Could not apply {production.name} to {event_obj.name} as " + + "an analysis already exists with this name" + ) + logger.exception(e) elif document["kind"].lower() == "postprocessing": # Handle a project analysis @@ -142,8 +207,8 @@ def apply_page(file, event, ledger=ledger, update_page=False): if event: try: - event_o = ledger.get_event(event_s)[0] - level = event_o + event_obj = ledger.get_event(event_s)[0] + level = event_obj except KeyError as e: click.echo( click.style("●", fg="red") @@ -205,6 +270,128 @@ def apply_page(file, event, ledger=ledger, update_page=False): ) logger.exception(e) + elif document["kind"].lower() == "analysisbundle": + # Handle analysis bundle - a collection of analysis references + logger.info("Found an analysis bundle") + bundle_name = document.get("name", "unnamed bundle") + analyses_refs = document.get("analyses", []) + + if not event: + click.echo( + click.style("●", fg="red") + + f" Analysis bundle '{bundle_name}' requires an event to be specified with -e" + ) + logger.error(f"Analysis bundle '{bundle_name}' requires an event to be specified") + continue + + try: + event_obj = ledger.get_event(event)[0] + except KeyError as e: + click.echo( + click.style("●", fg="red") + + f" Could not apply bundle '{bundle_name}', couldn't find the event {event}" + ) + logger.exception(e) + continue + + click.echo( + click.style("●", fg="cyan") + + f" Applying bundle '{bundle_name}' ({len(analyses_refs)} analyses) to {event_obj.name}" + ) + + # Resolve and apply each analysis in the bundle + for analysis_ref in analyses_refs: + # Analysis ref can be: + # - A string: "bayeswave-psd" (references file stem) + # - A dict: {"name": "...", ...} (inline definition) + + if isinstance(analysis_ref, str): + # Reference by file stem - need to find and load the file + analysis_file_name = f"{analysis_ref}.yaml" + + # Try to find the file in common locations + search_paths = [ + Path.cwd(), # Current directory + Path.cwd() / "analyses", # Local analyses dir + ] + + # Also check ASIMOV_DATA_PATH if set + if "ASIMOV_DATA_PATH" in os.environ: + data_path = Path(os.environ["ASIMOV_DATA_PATH"]) + search_paths.append(data_path / "analyses") + + # Check default asimov-data location + home = Path.home() + search_paths.append(home / ".asimov" / "gwdata" / "asimov-data" / "analyses") + + analysis_file = None + for search_path in search_paths: + candidate = search_path / analysis_file_name + # Ensure the resolved path is within the expected search path + try: + candidate = candidate.resolve() + search_path_resolved = search_path.resolve() + if candidate.is_relative_to(search_path_resolved) and candidate.exists(): + analysis_file = candidate + break + except (ValueError, OSError): + # Skip if path resolution fails or is invalid + continue + + if not analysis_file: + click.echo( + click.style(" ●", fg="yellow") + + f" Could not find analysis file '{analysis_file_name}', skipping" + ) + logger.warning(f"Could not find analysis file '{analysis_file_name}'") + continue + + # Load and apply the analysis file + with open(analysis_file, "r") as f: + analysis_content = f.read() + + # Parse the analysis file (might be multi-document) + for analysis_doc in yaml.safe_load_all(analysis_content): + if analysis_doc and analysis_doc.get("kind") == "analysis": + try: + production = asimov.event.Production.from_dict( + parameters=analysis_doc, subject=event_obj, ledger=ledger + ) + ledger.add_analysis(production, event=event_obj) + click.echo( + click.style(" ●", fg="green") + + f" Applied {production.name} from {analysis_ref}" + ) + except ValueError as e: + click.echo( + click.style(" ●", fg="yellow") + + f" {analysis_doc.get('name', 'analysis')} from {analysis_ref} already exists, skipping" + ) + logger.warning(f"Analysis {analysis_doc.get('name', 'analysis')} already exists: {e}") + + elif isinstance(analysis_ref, dict): + # Inline analysis definition + try: + production = asimov.event.Production.from_dict( + parameters=analysis_ref, subject=event_obj, ledger=ledger + ) + ledger.add_analysis(production, event=event_obj) + click.echo( + click.style(" ●", fg="green") + + f" Applied {production.name} (inline)" + ) + except ValueError as e: + click.echo( + click.style(" ●", fg="yellow") + + f" {analysis_ref.get('name', 'analysis')} already exists, skipping" + ) + logger.warning(f"Analysis {analysis_ref.get('name', 'analysis')} already exists: {e}") + + click.echo( + click.style("●", fg="green") + + f" Successfully applied bundle '{bundle_name}' to {event_obj.name}" + ) + elif document["kind"] == "configuration": logger.info("Found configurations") document.pop("kind") @@ -218,9 +405,10 @@ def apply_page(file, event, ledger=ledger, update_page=False): def apply_via_plugin(event, hookname, **kwargs): discovered_hooks = entry_points(group="asimov.hooks.applicator") + current_ledger = get_ledger() for hook in discovered_hooks: if hook.name in hookname: - hook.load()(ledger).run(event) + hook.load()(current_ledger).run(event) click.echo(click.style("●", fg="green") + f"{event} has been applied.") break @@ -251,7 +439,10 @@ def apply_via_plugin(event, hookname, **kwargs): help="Update the project with this blueprint rather than adding a new record.", ) def apply(file, event, plugin, update): + from asimov import setup_file_logging + current_ledger = get_ledger() + setup_file_logging() if plugin: apply_via_plugin(event, hookname=plugin) elif file: - apply_page(file, event, update_page=update) + apply_page(file, event, ledger=current_ledger, update_page=update) diff --git a/asimov/cli/blueprint.py b/asimov/cli/blueprint.py new file mode 100644 index 00000000..af1e8130 --- /dev/null +++ b/asimov/cli/blueprint.py @@ -0,0 +1,21 @@ +import click +from ..blueprints import select_blueprint_kind + +@click.group() +def blueprint(): + pass + +@blueprint.command() +@click.argument("file_path", type=click.Path(exists=True)) +def validate(file_path): + """ + Validate a blueprint file. + """ + + try: + model, data = select_blueprint_kind(file_path) + model.model_validate(data, strict=True) + click.secho(f"Blueprint '{file_path}' is valid.", fg="green") + + except Exception as e: + click.secho(f"Blueprint '{file_path}' is invalid: {e}", fg="red") \ No newline at end of file diff --git a/asimov/cli/manage.py b/asimov/cli/manage.py index c705d81f..7650f0c6 100644 --- a/asimov/cli/manage.py +++ b/asimov/cli/manage.py @@ -59,6 +59,7 @@ def build(event, dryrun): Create the run configuration files for a given event for jobs which are ready to run. If no event is specified then all of the events will be processed. """ + asimov.setup_file_logging() logger = asimov.logger.getChild("cli").getChild("manage.build") logger.setLevel(LOGGER_LEVEL) @@ -190,6 +191,7 @@ def submit(event, update, dryrun): Submit the run configuration files for a given event for jobs which are ready to run. If no event is specified then all of the events will be processed. """ + asimov.setup_file_logging() logger = asimov.logger.getChild("cli").getChild("manage.submit") logger.setLevel(LOGGER_LEVEL) @@ -323,8 +325,9 @@ def submit(event, update, dryrun): ) click.echo("Try running `asimov manage build` first.") try: - pipe.submit_dag(dryrun=dryrun) + cluster_id = pipe.submit_dag(dryrun=dryrun) if not dryrun: + analysis.job_id = int(cluster_id) click.echo( click.style("●", fg="green") + f" Submitted {analysis.name}" ) @@ -440,6 +443,18 @@ def submit(event, update, dryrun): + f" {production.name} is marked as {production.status.lower()} so no action will be performed" ) continue + + # For SubjectAnalysis, check if all source analyses are finished + from asimov.analysis import SubjectAnalysis + if isinstance(production, SubjectAnalysis): + if not production.source_analyses_ready(): + if dryrun: + click.echo( + click.style("●", fg="yellow") + + f" {production.name} is waiting on source analyses to finish" + ) + continue + if production.status.lower() == "restart": pipe = production.pipeline try: @@ -455,84 +470,57 @@ def submit(event, update, dryrun): production.status = "running" else: pipe = production.pipeline - # check the priority status to see if we need to start - # the analysis - to_analyse = True - if production.status not in {"ready"}: - to_analyse = False - else: - # verify priority method to be used - priority_method = check_priority_method(production) - if priority_method == "vanilla": - N_ok = 0 - for prod in production._needs: - if interest_dict_single_analysis[production.event.name][prod]['done']: - N_ok += 1 - if N_ok < len(production._needs): - to_analyse = False - elif priority_method == "is_interesting": - if "minimum" in production.meat["needs settings"].keys(): - N_target = int(production.meta["needs settings"]["minimum"]) + + try: + pipe.build_dag(dryrun=dryrun) + except PipelineException as e: + logger.error( + "failed to build a DAG file.", + ) + logger.exception(e) + click.echo( + click.style("●", fg="red") + + f" Unable to submit {production.name}" + ) + except ValueError: + logger.info("Unable to submit an unbuilt production") + click.echo( + click.style("●", fg="red") + + f" Unable to submit {production.name} as it hasn't been built yet." + ) + click.echo("Try running `asimov manage build` first.") + try: + cluster_id = pipe.submit_dag(dryrun=dryrun) + if not dryrun: + # cluster_id may be a scalar or a sequence; normalize it + if isinstance(cluster_id, (list, tuple)): + job_id_value = cluster_id[0] else: - # all pipelines should indicate the run as interesting - N_target = len(production._needs) - for prod in production._needs: - if interest_dict_single_analysis[production.event.name][prod]['interest status']: - N_ok += 1 - if N_ok < N_target: - to_analyse = False - else: - raise ValueError(f"Priority method {priority_method} not recognized") - if to_analyse: - try: - pipe.build_dag(dryrun=dryrun) - except PipelineException as e: - logger.error( - "failed to build a DAG file.", - ) - logger.exception(e) - click.echo( - click.style("●", fg="red") - + f" Unable to submit {production.name}" - ) - except ValueError: - logger.info("Unable to submit an unbuilt production") + job_id_value = cluster_id + production.job_id = int(job_id_value) click.echo( - click.style("●", fg="red") - + f" Unable to submit {production.name} as it hasn't been built yet." + click.style("●", fg="green") + + f" Submitted {production.event.name}/{production.name}" ) - click.echo("Try running `asimov manage build` first.") - try: - pipe.submit_dag(dryrun=dryrun) - if not dryrun: - click.echo( - click.style("●", fg="green") - + f" Submitted {production.event.name}/{production.name}" - ) - production.status = "running" + production.status = "running" - except PipelineException as e: - production.status = "stuck" - click.echo( - click.style("●", fg="red") - + f" Unable to submit {production.name}" - ) - logger.exception(e) - ledger.update_event(event) - logger.error( - f"The pipeline failed to submit the DAG file to the cluster. {e}", - ) - if not dryrun: - # Refresh the job list - job_list = condor.CondorJobList() - job_list.refresh() - # Update the ledger - ledger.update_event(event) - else: + except PipelineException as e: + production.status = "stuck" click.echo( - click.style("●", fg="yellow") - + f"Production {production.name} not ready to submit" + click.style("●", fg="red") + + f" Unable to submit {production.name}" ) + logger.exception(e) + ledger.update_event(event) + logger.error( + f"The pipeline failed to submit the DAG file to the cluster. {e}", + ) + if not dryrun: + # Refresh the job list + job_list = condor.CondorJobList() + job_list.refresh() + # Update the ledger + ledger.update_event(event) @click.option( "--event", diff --git a/asimov/cli/monitor.py b/asimov/cli/monitor.py index 027def29..92a60156 100644 --- a/asimov/cli/monitor.py +++ b/asimov/cli/monitor.py @@ -5,10 +5,13 @@ import os import click from copy import deepcopy +from pathlib import Path from asimov import condor, config, logger, LOGGER_LEVEL from asimov import current_ledger as ledger from asimov.cli import ACTIVE_STATES, manage, report +from asimov.scheduler_utils import get_configured_scheduler, create_job_from_dict, get_job_list +from asimov.monitor_helpers import monitor_analysis logger = logger.getChild("cli").getChild("monitor") logger.setLevel(LOGGER_LEVEL) @@ -20,9 +23,13 @@ @click.option("--dry-run", "-n", "dry_run", is_flag=True) +@click.option("--use-scheduler-api", is_flag=True, default=False, + help="Use the new scheduler API directly (experimental)") @click.command() -def start(dry_run): +def start(dry_run, use_scheduler_api): """Set up a cron job on condor to monitor the project.""" + from asimov import setup_file_logging + setup_file_logging() try: minute_expression = config.get("condor", "cron_minute") @@ -63,7 +70,21 @@ def start(dry_run): " some clusters." ) - cluster = condor.submit_job(submit_description) + # Use the new scheduler API if requested, otherwise use the legacy interface + if use_scheduler_api: + logger.info("Using new scheduler API") + try: + scheduler = get_configured_scheduler() + job = create_job_from_dict(submit_description) + cluster = scheduler.submit(job) + except Exception as e: + logger.error(f"Failed to submit using scheduler API: {e}") + logger.info("Falling back to legacy condor.submit_job") + cluster = condor.submit_job(submit_description) + else: + # Use legacy interface (which internally uses the scheduler API) + cluster = condor.submit_job(submit_description) + ledger.data["cronjob"] = cluster ledger.save() click.secho(f" \t ● Asimov is running ({cluster})", fg="green") @@ -71,11 +92,29 @@ def start(dry_run): @click.option("--dry-run", "-n", "dry_run", is_flag=True) +@click.option("--use-scheduler-api", is_flag=True, default=False, + help="Use the new scheduler API directly (experimental)") @click.command() -def stop(dry_run): +def stop(dry_run, use_scheduler_api): """Set up a cron job on condor to monitor the project.""" + from asimov import setup_file_logging + setup_file_logging() cluster = ledger.data["cronjob"] - condor.delete_job(cluster) + + # Use the new scheduler API if requested, otherwise use the legacy interface + if use_scheduler_api: + logger.info("Using new scheduler API") + try: + scheduler = get_configured_scheduler() + scheduler.delete(cluster) + except Exception as e: + logger.error(f"Failed to delete using scheduler API: {e}") + logger.info("Falling back to legacy condor.delete_job") + condor.delete_job(cluster) + else: + # Use legacy interface (which internally uses the scheduler API) + condor.delete_job(cluster) + click.secho(" \t ● Asimov has been stopped", fg="red") logger.info(f"Stopped asimov cronjob {cluster}") @@ -102,6 +141,25 @@ def monitor(ctx, event, update, dry_run, chain): """ Monitor condor jobs' status, and collect logging information. """ + from asimov import setup_file_logging + setup_file_logging() + + def _webdir_for(subject_name, production_name): + webroot = Path(config.get("general", "webroot")) + if not webroot.is_absolute(): + webroot = Path(config.get("project", "root")) / webroot + return webroot / subject_name / production_name / "pesummary" + + def _has_pesummary_outputs(webdir: Path) -> bool: + """Detect PESummary outputs when the default sentinel is missing.""" + posterior = webdir / "samples" / "posterior_samples.h5" + if posterior.exists(): + return True + # Accept legacy pesummary.dat as fallback + legacy = webdir / "samples" / f"{webdir.parent.name}_pesummary.dat" + if legacy.exists(): + return True + return False logger.info("Running asimov monitor") @@ -111,233 +169,48 @@ def monitor(ctx, event, update, dry_run, chain): ctx.invoke(manage.submit, event=event) try: - # First pull the condor job listing - job_list = condor.CondorJobList() - except condor.htcondor.HTCondorLocateError: - click.echo(click.style("Could not find the condor scheduler", bold=True)) + # Get the job listing using the new scheduler API + job_list = get_job_list() + except RuntimeError as e: + click.echo(click.style(f"Could not query the scheduler: {e}", bold=True)) click.echo( "You need to run asimov on a machine which has access to a" - "condor scheduler in order to work correctly, or to specify" - "the address of a valid sceduler." + "scheduler in order to work correctly, or to specify" + "the address of a valid scheduler." ) sys.exit() + except Exception as e: + # Fall back to legacy CondorJobList for backward compatibility + logger.warning(f"Failed to use new JobList, falling back to legacy: {e}") + try: + job_list = condor.CondorJobList() + except condor.htcondor.HTCondorLocateError: + click.echo(click.style("Could not find the scheduler", bold=True)) + click.echo( + "You need to run asimov on a machine which has access to a" + "scheduler in order to work correctly, or to specify" + "the address of a valid scheduler." + ) + sys.exit() # also check the analyses in the project analyses for analysis in ledger.project_analyses: click.secho(f"Subjects: {analysis.subjects}", bold=True) - + if analysis.status.lower() in ACTIVE_STATES: - logger.debug(f"Available analyses: project_analyses/{analysis.name}") - - click.echo( - "\t- " - + click.style(f"{analysis.name}", bold=True) - + click.style(f"[{analysis.pipeline}]", fg="green") + monitor_analysis( + analysis=analysis, + job_list=job_list, + ledger=ledger, + dry_run=dry_run, + analysis_path=f"project_analyses/{analysis.name}" ) - # ignore the analysis if it is set to ready as it has not been started yet - if analysis.status.lower() == "ready": - click.secho(f" \t ● {analysis.status.lower()}", fg="green") - logger.debug(f"Ready production: project_analyses/{analysis.name}") - continue - - # check if there are jobs that need to be stopped - if analysis.status.lower() == "stop": - pipe = analysis.pipeline - logger.debug(f"Stop production project_analyses/{analysis.name}") - if not dry_run: - pipe.eject_job() - analysis.status = "stopped" - ledger.update_analysis_in_project_analysis(analysis) - click.secho(" \t Stopped", fg="red") - else: - click.echo("\t\t{analysis.name} --> stopped") - continue - - # deal with the condor jobs - analysis_scheduler = analysis.meta["scheduler"].copy() - try: - if "job id" in analysis_scheduler: - if not dry_run: - if analysis_scheduler["job id"] in job_list.jobs: - job = job_list.jobs[analysis_scheduler["job id"]] - else: - job = None - else: - logger.debug( - f"Running analysis: {event}/{analysis.name}, cluster {analysis.job_id}" - ) - click.echo("\t\tRunning under condor") - else: - raise ValueError - - if not dry_run: - if job.status.lower() == "idle": - click.echo( - " \t " - + click.style("●", "green") - + f" {analysis.name} is in the queue (condor id: {analysis_scheduler['job id']})" - ) - - elif job.status.lower() == "running": - click.echo( - " \t " - + click.style("●", "green") - + f" {analysis.name} is running (condor id: {analysis_scheduler['job id']})" - ) - if "profiling" not in analysis.meta: - analysis.meta["profiling"] = {} - if hasattr(analysis.pipeline, "while_running"): - analysis.pipeline.while_running() - analysis.status = "running" - ledger.update_analysis_in_project_analysis(analysis) - - elif job.status.lower() == "completed": - pipe.after_completion() - click.echo( - " \t " - + click.style("●", "green") - + f" {analysis.name} has finished and post-processing has been started" - ) - job_list.refresh() - - elif job.status.lower() == "held": - click.echo( - " \t " - + click.style("●", "yellow") - + f" {analysis.name} is held on the scheduler" - + f" (condor id: {analysis_scheduler['job id']})" - ) - analysis.status = "stuck" - ledger.update_analysis_in_project_analysis(analysis) - else: - continue - - except (ValueError, AttributeError): - if analysis.pipeline: - pipe = analysis.pipeline - if analysis.status.lower() == "stop": - pipe.eject_job() - analysis.status = "stopped" - ledger.update_analysis_in_project_analysis(analysis) - click.echo( - " \t " - + click.style("●", "red") - + f" {analysis.name} has been stopped" - ) - job_list.refresh() - - elif analysis.status.lower() == "finished": - pipe.after_completion() - click.echo( - " \t " - + click.style("●", "green") - + f" {analysis.name} has finished and post-processing has been started" - ) - job_list.refresh() - - elif analysis.status.lower() == "processing": - if pipe.detect_completion_processing(): - try: - pipe.after_processing() - click.echo( - " \t " - + click.style("●", "green") - + f" {analysis.name} has been finalised and stored" - ) - except ValueError as e: - click.echo(e) - else: - click.echo( - " \t " - + click.style("●", "green") - + f" {analysis.name} has finished and post-processing" - + f" is stuck ({analysis_scheduler['job id']})" - ) - - elif ( - pipe.detect_completion() - and analysis.status.lower() == "processing" - ): - click.echo( - " \t " - + click.style("●", "green") - + f" {analysis.name} has finished and post-processing is running" - ) - - elif ( - pipe.detect_completion() - and analysis.status.lower() == "running" - ): - if "profiling" not in analysis.meta: - analysis.meta["profiling"] = {} - - try: - config.get("condor", "scheduler") - analysis.meta["profiling"] = condor.collect_history( - analysis_scheduler["job id"] - ) - analysis_scheduler["job id"] = None - ledger.update_analysis_in_project_analysis(analysis) - except ( - configparser.NoOptionError, - configparser.NoSectionError, - ): - logger.warning( - "Could not collect condor profiling data as no " - + "scheduler was specified in the config file." - ) - except ValueError as e: - logger.error("Could not collect condor profiling data.") - logger.exception(e) - pass - - analysis.status = "finished" - ledger.update_analysis_in_project_analysis(analysis) - pipe.after_completion() - click.secho( - f" \t ● {analysis.name} - Completion detected", - fg="green", - ) - job_list.refresh() - - else: - # job may have been evicted from the clusters - click.echo( - " \t " - + click.style("●", "yellow") - + f" {analysis.name} is stuck; attempting a rescue" - ) - try: - pipe.resurrect() - except ( - Exception - ): # Sorry, but there are many ways the above command can fail - analysis.status = "stuck" - click.echo( - " \t " - + click.style("●", "red") - + f" {analysis.name} is stuck; automatic rescue was not possible" - ) - ledger.update_analysis_in_project_analysis(analysis) - - if analysis.status == "stuck": - click.echo( - " \t " - + click.style("●", "yellow") - + f" {analysis.name} is stuck" - ) - - ledger.update_analysis_in_project_analysis(analysis) - ledger.save() - if chain: - ctx.invoke(report.html) - all_analyses = set(ledger.project_analyses) complete = { analysis for analysis in ledger.project_analyses - if analysis.status in {"finished", "uploaded"} + if analysis.status in {"finished", "uploaded", "processing"} } others = all_analyses - complete if len(others) > 0: @@ -369,9 +242,6 @@ def monitor(ctx, event, update, dry_run, chain): ctx.invoke(report.html) for event in sorted(ledger.get_event(event), key=lambda e: e.name): - stuck = 0 - running = 0 - finish = 0 click.secho(f"{event.name}", bold=True) on_deck = [ production @@ -380,204 +250,68 @@ def monitor(ctx, event, update, dry_run, chain): ] for production in on_deck: - logger.debug(f"Available analyses: {event}/{production.name}") - click.echo( - "\t- " - + click.style(f"{production.name}", bold=True) - + click.style(f"[{production.pipeline}]", fg="green") + monitor_analysis( + analysis=production, + job_list=job_list, + ledger=ledger, + dry_run=dry_run, + analysis_path=f"{event.name}/{production.name}" ) - # Jobs marked as ready can just be ignored as they've not been stood-up - if production.status.lower() == "ready": - click.secho(f" \t ● {production.status.lower()}", fg="green") - logger.debug(f"Ready production: {event}/{production.name}") - continue - - # Deal with jobs which need to be stopped first - if production.status.lower() == "stop": - pipe = production.pipeline - logger.debug(f"Stop production: {event}/{production.name}") - if not dry_run: - pipe.eject_job() - production.status = "stopped" - click.secho(" \tStopped", fg="red") - else: - click.echo("\t\t{production.name} --> stopped") - continue - - # Get the condor jobs - try: - if "job id" in production.meta["scheduler"]: - if not dry_run: - if production.job_id in job_list.jobs: - job = job_list.jobs[production.job_id] - else: - job = None - else: - logger.debug( - f"Running analysis: {event}/{production.name}, cluster {production.job_id}" - ) - click.echo("\t\tRunning under condor") - else: - raise ValueError # Pass to the exception handler - - if not dry_run: - if job.status.lower() == "idle": - click.echo( - " \t " - + click.style("●", "green") - + f" {production.name} is in the queue (condor id: {production.job_id})" - ) - - elif job.status.lower() == "running": - click.echo( - " \t " - + click.style("●", "green") - + f" {production.name} is running (condor id: {production.job_id})" - ) - if "profiling" not in production.meta: - production.meta["profiling"] = {} - production.status = "running" - - elif job.status.lower() == "completed": - pipe.after_completion() - click.echo( - " \t " - + click.style("●", "green") - + f" {production.name} has finished and post-processing has been started" - ) - job_list.refresh() - - elif job.status.lower() == "held": - click.echo( - " \t " - + click.style("●", "yellow") - + f" {production.name} is held on the scheduler" - + f" (condor id: {production.job_id})" - ) - production.status = "stuck" - stuck += 1 - else: - running += 1 - - except (ValueError, AttributeError): - if production.pipeline: - - pipe = production.pipeline - - if production.status.lower() == "stop": - pipe.eject_job() - production.status = "stopped" - click.echo( - " \t " - + click.style("●", "red") - + f" {production.name} has been stopped" - ) - job_list.refresh() - elif production.status.lower() == "finished": - pipe.after_completion() - click.echo( - " \t " - + click.style("●", "green") - + f" {production.name} has finished and post-processing has been started" - ) - job_list.refresh() - elif production.status.lower() == "processing": - # Need to check the upload has completed - if pipe.detect_completion_processing(): - try: - pipe.after_processing() + ledger.update_event(event) + + # Auto-refresh combined summary pages (SubjectAnalysis) when stale and refreshable + try: + from asimov.analysis import SubjectAnalysis + except (ImportError, ModuleNotFoundError): + SubjectAnalysis = None + + if SubjectAnalysis: + for prod in event.productions: + try: + if isinstance(prod, SubjectAnalysis): + if getattr(prod, "is_refreshable", False) and prod.source_analyses_ready(): + current_names = [a.name for a in getattr(prod, "analyses", [])] + resolved = getattr(prod, "resolved_dependencies", None) or [] + + # For SubjectAnalysis with smart dependencies (_analysis_spec), + # the analyses list is automatically populated by dependency matching. + # We should NOT manually add candidates; just check if the set changed. + # For legacy explicit name lists, we may need to sync, but smart + # dependencies handle this automatically during initialization. + + # Check if dependency set changed + if set(current_names) != set(resolved): click.echo( " \t " - + click.style("●", "green") - + f" {production.name} has been finalised and stored" + + click.style("●", "yellow") + + f" {prod.name} has new/changed analyses; refreshing combined summary pages" ) - except ValueError as e: - click.echo(e) - else: - click.echo( - " \t " - + click.style("●", "green") - + f" {production.name} has finished and post-processing" - + f" is stuck ({production.job_id})" - ) - elif ( - pipe.detect_completion() - and production.status.lower() == "processing" - ): - click.echo( - " \t " - + click.style("●", "green") - + f" {production.name} has finished and post-processing is running" - ) - elif ( - pipe.detect_completion() - and production.status.lower() == "running" - ): - # The job has been completed, collect its assets - if "profiling" not in production.meta: - production.meta["profiling"] = {} - try: - config.get("condor", "scheduler") - production.meta["profiling"] = condor.collect_history( - production.job_id - ) - production.job_id = None - except ( - configparser.NoOptionError, - configparser.NoSectionError, - ): - logger.warning( - "Could not collect condor profiling data as" - " no scheduler was specified in the" - " config file." - ) - except ValueError as e: - logger.error("Could not collect condor profiling data.") - logger.exception(e) - pass - - finish += 1 - production.status = "finished" - pipe.after_completion() - click.secho( - f" \t ● {production.name} - Completion detected", - fg="green", - ) - job_list.refresh() - else: - # It looks like the job has been evicted from the cluster - click.echo( - " \t " - + click.style("●", "yellow") - + f" {production.name} is stuck; attempting a rescue" - ) - try: - pipe.resurrect() - except ( - Exception - ): # Sorry, but there are many ways the above command can fail - production.status = "stuck" - click.echo( - " \t " - + click.style("●", "red") - + f" {production.name} is stuck; automatic rescue was not possible" - ) - - if production.status == "stuck": - click.echo( - " \t " - + click.style("●", "yellow") - + f" {production.name} is stuck" - ) - - ledger.update_event(event) + try: + cluster_id = prod.pipeline.submit_dag() + prod.status = "processing" + prod.job_id = cluster_id + ledger.update_event(event) + click.echo( + " \t " + + click.style("●", "green") + + f" {prod.name} submitted (cluster {cluster_id})" + ) + except Exception as exc: + logger.warning("Failed to refresh %s: %s", prod.name, exc) + click.echo( + " \t " + + click.style("●", "red") + + f" {prod.name} refresh failed: {exc}" + ) + except Exception: + pass all_productions = set(event.productions) complete = { production for production in event.productions - if production.status in {"finished", "uploaded"} + if production.status in {"finished", "uploaded", "processing"} } others = all_productions - set(event.get_all_latest()) - complete if len(others) > 0: @@ -585,7 +319,23 @@ def monitor(ctx, event, update, dry_run, chain): "The event also has these analyses which are waiting on other analyses to complete:" ) for production in others: - needs = ", ".join(production._needs) + # Make dependency specs readable even when _needs contains nested lists/dicts + try: + formatted_needs = list(production.dependencies) + except Exception: + formatted_needs = [] + + if not formatted_needs: + def _fmt_need(need): + if isinstance(need, list): + return " & ".join(_fmt_need(n) for n in need) + if isinstance(need, dict): + return ", ".join(f"{k}: {v}" for k, v in need.items()) + return str(need) + + formatted_needs = [_fmt_need(need) for need in getattr(production, "_needs", [])] + + needs = ", ".join(formatted_needs) if formatted_needs else "(no unmet dependencies recorded)" click.echo(f"\t{production.name} which needs {needs}") # Post-monitor hooks if "hooks" in ledger.data: diff --git a/asimov/cli/project.py b/asimov/cli/project.py index 06b573cb..8585eaa9 100644 --- a/asimov/cli/project.py +++ b/asimov/cli/project.py @@ -67,7 +67,7 @@ def make_project( # Make the log directory pathlib.Path(logs).mkdir(parents=True, exist_ok=True) - config.set("logging", "directory", logs) + config.set("logging", "location", logs) # Make the results store storage.Store.create(root=results, name=f"{project_name} storage") @@ -79,7 +79,13 @@ def make_project( config.set("ledger", "location", os.path.join(".asimov", "ledger.yml")) # Set the default environment - python_loc = shutil.which("python").split("/")[:-2] + if (python_loc := shutil.which("python")) is not None: + python_loc = python_loc.split("/")[:-2] + elif (python_loc := shutil.which("python3")) is not None: + python_loc = python_loc.split("/")[:-2] + else: + raise RuntimeError("Unable to find python executable in PATH") + config.set("pipelines", "environment", os.path.join("/", *python_loc)) config.set("rift", "environment", os.path.join("/", *python_loc)) @@ -138,9 +144,29 @@ def init( """ Roll-out a new project. """ + from asimov import setup_file_logging make_project(name, root, working=working, checkouts=checkouts, results=results) click.echo(click.style("●", fg="green") + " New project created successfully!") - logger.info(f"A new project was created in {os.getcwd()}") + + # Log the project creation message + message = f"A new project was created in {os.getcwd()}" + logger.info(message) + + # Set up logging after project is created, passing the log directory directly + # to avoid config reload issues in test environments + try: + setup_file_logging(logfile=os.path.join("logs", "asimov.log")) + # Log again so that, if file logging is now configured, the message is written to the log file + logger.info(message) + except Exception as exc: + # Ensure failures to configure file logging are visible to the user + logger.error("Failed to set up file logging for new project: %s", exc) + click.echo( + click.style( + "⚠ Failed to set up file logging. See console output for details.", + fg="yellow", + ) + ) @click.command() diff --git a/asimov/cli/report.py b/asimov/cli/report.py index 99c6134f..f8b061d1 100644 --- a/asimov/cli/report.py +++ b/asimov/cli/report.py @@ -9,7 +9,10 @@ import click import pytz import yaml -from pkg_resources import resource_filename +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files import otter import otter.bootstrap as bt @@ -45,7 +48,7 @@ def html(event, webdir): f"{webdir}/index.html", author="Asimov", title="Asimov project report", - theme_location=resource_filename("asimov.cli", "report-theme"), + theme_location=str(files("asimov.cli").joinpath("report-theme")), config_file=os.path.join(".asimov", "asimov.conf"), ) with report: @@ -53,64 +56,1305 @@ def html(event, webdir): style = """ """ report + style script = """ """ - report + script + report += script with report: navbar = bt.Navbar( f"Asimov | {current_ledger.data['project']['name']}", @@ -119,7 +1363,72 @@ def html(event, webdir): report + navbar events = sorted(events, key=lambda a: a.name) - cards = "
" + + # Build summary dashboard + summary = """ +
+
+

Project Summary

+
+
+ 0 + Total Analyses +
+
+ 0 + Running +
+
+ 0 + Finished +
+
+ 0 + Stuck +
+
+ 0 + Cancelled +
+
+
+
+ """ + + # Build filter controls + filters = """ +
+
+
Status Filters
+ + + + + + + +
+
Review Status Filters
+ + + + + +
+
+ """ + + # Build search box + search_box = """ +
+ +
+ """ + + cards = summary + filters + search_box + cards += """ +
+""" toc = """
" + + # Add modal HTML structure + modal_html = """ + +
+ + +
+""" + + cards += modal_html + with report: - report + cards + report += cards with report: time = f"Report generated at {datetime.now(tz):%Y-%m-%d %H:%M}" - report + time + report += time @click.argument("event", default=None, required=False) diff --git a/asimov/cli/review.py b/asimov/cli/review.py index a17abd83..ff4c3ce3 100644 --- a/asimov/cli/review.py +++ b/asimov/cli/review.py @@ -127,11 +127,11 @@ def add(event, production, status, message, other_subjects=None, pipeline=None): f"production {production} and subjects {set(subjects)}", fg="red", ) - else: - click.echo( - click.style("●", fg="green") - + f" {event.name}/{production.name} Note added" - ) + else: + click.echo( + click.style("●", fg="green") + + f" {event.name}/{production.name} Note added" + ) @click.argument("production", default=None, required=False) diff --git a/asimov/condor.py b/asimov/condor.py index 36e0ce9d..80db2706 100644 --- a/asimov/condor.py +++ b/asimov/condor.py @@ -5,15 +5,29 @@ In order to improve performance the code caches results from the query to the scheduler. +Note: This module now uses the asimov.scheduler module internally for improved + scheduler abstraction. The functions here maintain backward compatibility. + """ import os import datetime +import configparser from dateutil import tz -import htcondor + + +import warnings +try: + warnings.filterwarnings("ignore", module="htcondor2") + import htcondor2 as htcondor # NoQA +except ImportError: + warnings.filterwarnings("ignore", module="htcondor") + import htcondor # NoQA + import yaml from asimov import config, logger, LOGGER_LEVEL +from asimov.scheduler import HTCondor as HTCondorScheduler UTC = tz.tzutc() @@ -42,19 +56,73 @@ def datetime_from_epoch(dt, tzinfo=UTC): def submit_job(submit_description): """ - Submit a new job to the condor scheduller + Submit a new job to the condor scheduler. + + This function now uses the asimov.scheduler module internally while + maintaining backward compatibility with the original interface. + + Parameters + ---------- + submit_description : dict + A dictionary containing the HTCondor submit description. + + Returns + ------- + int + The cluster ID of the submitted job. """ + # Try to get the configured scheduler name + try: + schedd_name = config.get("condor", "scheduler") + except (configparser.NoOptionError, configparser.NoSectionError, KeyError): + schedd_name = None + + # Create the scheduler instance + scheduler = HTCondorScheduler(schedd_name=schedd_name) + + # Try to submit using the new scheduler interface + try: + cluster_id = scheduler.submit(submit_description) + logger.info(f"Submitted job with cluster ID: {cluster_id}") + return cluster_id + except Exception as e: + logger.error(f"Failed to submit job: {e}") + # Fall back to the old implementation for robustness + logger.info("Falling back to legacy submission method") + return _submit_job_legacy(submit_description) + +def _submit_job_legacy(submit_description): + """ + Legacy job submission implementation (for backward compatibility). + + Parameters + ---------- + submit_description : dict + A dictionary containing the HTCondor submit description. + + Returns + ------- + int + The cluster ID of the submitted job. + """ hostname_job = htcondor.Submit(submit_description) try: - # There should really be a specified submit node, and if there is, use it. schedulers = htcondor.Collector().locate( htcondor.DaemonTypes.Schedd, config.get("condor", "scheduler") ) schedd = htcondor.Schedd(schedulers) logger.info(f"Found scheduler: {schedd}") - except: # NoQA + result = schedd.submit(hostname_job) + cluster_id = result.cluster() + except ( + htcondor.HTCondorLocateError, + htcondor.HTCondorIOError, + configparser.NoOptionError, + configparser.NoSectionError, + KeyError, + ): # Fall back to searching for any schedd on expected lookup/config errors # If you can't find a specified scheduler, try until it works collectors = htcondor.Collector().locateAll(htcondor.DaemonTypes.Schedd) logger.info("Searching for a scheduler of any kind") @@ -62,9 +130,9 @@ def submit_job(submit_description): logger.info(f"Found {collector}") schedd = htcondor.Schedd(collector) try: - with schedd.transaction() as txn: - cluster_id = hostname_job.queue(txn) - break + result = schedd.submit(hostname_job) + cluster_id = result.cluster() + break except htcondor.HTCondorIOError: logger.info(f"{collector} cannot receive jobs") @@ -72,13 +140,51 @@ def submit_job(submit_description): def delete_job(cluster_id): + """ + Delete a job from the condor scheduler. + + This function now uses the asimov.scheduler module internally while + maintaining backward compatibility with the original interface. + + Parameters + ---------- + cluster_id : int + The cluster ID of the job to delete. + """ + # Try to get the configured scheduler name + try: + schedd_name = config.get("condor", "scheduler") + except (configparser.NoOptionError, configparser.NoSectionError, KeyError): + schedd_name = None + + # Create the scheduler instance and delete the job + try: + scheduler = HTCondorScheduler(schedd_name=schedd_name) + scheduler.delete(cluster_id) + logger.info(f"Deleted job with cluster ID: {cluster_id}") + except Exception as e: + logger.error(f"Failed to delete job using new scheduler: {e}") + # Fall back to the old implementation + logger.info("Falling back to legacy deletion method") + _delete_job_legacy(cluster_id) + + +def _delete_job_legacy(cluster_id): + """ + Legacy job deletion implementation (for backward compatibility). + + Parameters + ---------- + cluster_id : int + The cluster ID of the job to delete. + """ try: # There should really be a specified submit node, and if there is, use it. schedulers = htcondor.Collector().locate( htcondor.DaemonTypes.Schedd, config.get("condor", "scheduler") ) schedd = htcondor.Schedd(schedulers) - except: # NoQA + except Exception: # Catch all exceptions to fall back to default schedd # If you can't find a specified scheduler, use the first one you find schedd = htcondor.Schedd() schedd.act(htcondor.JobAction.Remove, f"ClusterId == {cluster_id}") @@ -91,7 +197,7 @@ def collect_history(cluster_id): htcondor.DaemonTypes.Schedd, config.get("condor", "scheduler") ) schedd = htcondor.Schedd(schedulers) - except: # NoQA + except Exception: # Catch all exceptions to fall back to searching for any schedd # If you can't find a specified scheduler, use the first one you find collectors = htcondor.Collector().locateAll(htcondor.DaemonTypes.Schedd) logger.info("Searching for a scheduler of any kind") @@ -286,8 +392,12 @@ def __init__(self): age = -os.stat(cache).st_mtime + datetime.datetime.now().timestamp() logger.info(f"Condor cache is {age} seconds old") if float(age) < float(config.get("condor", "cache_time")): - with open(cache, "r") as f: - self.jobs = yaml.safe_load(f) + try: + with open(cache, "r") as f: + self.jobs = yaml.safe_load(f) + except yaml.constructor.ConstructorError: + logger.warning("Cache contains unreadable YAML tags, refreshing") + self.refresh() else: self.refresh() @@ -310,7 +420,7 @@ def refresh(self): try: schedd = htcondor.Schedd(schedd_ad) jobs = schedd.query( - opts=htcondor.htcondor.QueryOpts.DefaultMyJobsOnly, + opts=htcondor.QueryOpts.DefaultMyJobsOnly, projection=[ "ClusterId", "Cmd", @@ -323,7 +433,7 @@ def refresh(self): ], ) data += jobs - except: # NoQA + except Exception: # Catch all exceptions to skip problematic schedds pass retdat = [] @@ -356,7 +466,7 @@ def refresh(self): self.jobs[datum.idno] = datum.to_dict() with open(os.path.join(".asimov", "_cache_jobs.yaml"), "w") as f: - f.write(yaml.dump(self.jobs)) + f.write(yaml.dump({k: v.to_dict() if isinstance(v, CondorJob) else v for k, v in self.jobs.items()})) def get_job_priority(job_id): diff --git a/asimov/configs/README.rst b/asimov/configs/README.rst index a775c15d..7f26aebe 100644 --- a/asimov/configs/README.rst +++ b/asimov/configs/README.rst @@ -5,3 +5,58 @@ This directory contains the default configuration templates for the various pipe The templates are written using the liquidpy templating language. +Scheduler Configuration +----------------------- + +Asimov now supports multiple scheduler backends (HTCondor, Slurm, etc.) through the +``asimov.scheduler`` module. You can configure the scheduler in your ``asimov.conf`` file: + +HTCondor Configuration +~~~~~~~~~~~~~~~~~~~~~~ + +To use HTCondor (the default scheduler):: + + [scheduler] + type = htcondor + +You can also specify a specific schedd:: + + [condor] + scheduler = my-schedd.example.com + +Slurm Configuration (Future) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Support for Slurm is planned for future releases:: + + [scheduler] + type = slurm + +Using the Scheduler API +~~~~~~~~~~~~~~~~~~~~~~~ + +You can also use the scheduler API directly in your code:: + + from asimov.scheduler import get_scheduler, JobDescription + + # Get a scheduler instance + scheduler = get_scheduler("htcondor") + + # Create a job description + job = JobDescription( + executable="/path/to/executable", + output="stdout.log", + error="stderr.log", + log="job.log", + cpus=4, + memory="8GB" + ) + + # Submit the job + cluster_id = scheduler.submit(job) + + # Query job status + status = scheduler.query(cluster_id) + + # Delete the job + scheduler.delete(cluster_id) diff --git a/asimov/configs/bilby.ini b/asimov/configs/bilby.ini index c52135a1..2b2e5a03 100644 --- a/asimov/configs/bilby.ini +++ b/asimov/configs/bilby.ini @@ -15,6 +15,7 @@ {%- assign priors = production.meta['priors'] -%} {%- assign data = production.meta['data'] -%} {%- assign quality = production.meta['quality'] -%} +{%- assign waveform = production.meta['waveform'] -%} {%- assign ifos = production.meta['interferometers'] -%} {%- if data contains "calibration" %} @@ -79,7 +80,7 @@ psd-length={{ likelihood['psd length'] | round }} psd-maximum-duration=1024 psd-method=median psd-start-time=None -minimum-frequency={ {% for ifo in ifos %}{{ifo}}:{{quality['minimum frequency'][ifo]}},{% endfor %}{% if likelihood contains 'start frequency'%} waveform: {{ likelihood['start frequency'] }} {% endif %} } +minimum-frequency={ {% for ifo in ifos %}{{ifo}}:{{waveform['minimum frequency'][ifo]}},{% endfor %}{% if likelihood contains 'start frequency'%} waveform: {{ likelihood['start frequency'] }} {% endif %} } maximum-frequency={ {% for ifo in ifos %}{{ifo}}:{{quality['maximum frequency'][ifo]}},{% endfor %} } zero-noise=False tukey-roll-off={{ likelihood['roll off time'] | default: 0.4 }} @@ -186,64 +187,12 @@ final-result-nsamples=20000 ################################################################################ ## Prior arguments ################################################################################ +{%- assign prior_interface = production.pipeline.get_prior_interface() -%} -default-prior = {{ priors['default'] | default: "BBHPriorDict" }} +default-prior = {{ prior_interface.get_default_prior() }} deltaT=0.2 {% if production.meta contains "priors" %} -prior-dict = { -{%- if priors.keys() contains "geocentric time" %} -{%- assign p = priors['geocentric time'] %} - geocent_time = {{p['type']}}(name="geocent_time", minimum={{p['minimum']}}, maximum={{p['maximum']}}, boundary={{p['boundary'] | default: None}}), -{% endif %} -{%- if priors.keys() contains "chirp mass" %}{% assign p = priors['chirp mass'] %}{% else %}{% assign p = None %}{% endif %} - chirp_mass = {{p['type'] | default: "bilby.gw.prior.UniformInComponentsChirpMass" }}(name='chirp_mass', minimum={{p['minimum'] | default: 1}}, maximum={{p['maximum'] | default: 100}}, unit='$M_\{\{\odot\}\}$'), -{%- if priors.keys() contains "mass ratio" %}{% assign p = priors['mass ratio'] %}{% else %}{% assign p = None %}{% endif %} - mass_ratio = {{p['type'] | default: "bilby.gw.prior.UniformInComponentsMassRatio" }}(name='mass_ratio', minimum={{p['minimum']}}, maximum={{p['maximum']}}), -{%- if priors.keys() contains "total mass" %}{% assign p = priors['total mass'] %} - total_mass = {{p['type'] | default: Constraint}}(name='total_mass', minimum={{p['minimum']}}, maximum={{p['maximum']}}),{% endif %} -{%- if priors.keys() contains "mass 1" %}{% assign p = priors['mass 1'] %}{% else %}{% assign p = None %}{% endif %} - mass_1 = {{p['type'] | default: Constraint}}(name='mass_1', minimum={{p['minimum'] | default: 1}}, maximum={{p['maximum'] | default: 1000}}), -{%- if priors.keys() contains "mass 2" %}{% assign p = priors['mass 2'] %}{% else %}{% assign p = None %}{% endif %} - mass_2 = {{p['type'] | default: Constraint}}(name='mass_2', minimum={{p['minimum'] | default: 1 }}, maximum={{p['maximum'] | default: 1000}}), -{%- if priors.keys() contains "spin 1" %} -{%- assign p = priors['spin 1'] %} -{%- else %} -{%- assign p = None %} -{% endif %} - a_1 = {{ p['type'] | default: Uniform}}(name='a_1', minimum={{ p['minimum'] | default: 0}}, maximum={{ p['maximum'] | default: 0.99}}), -{%- if priors.keys() contains "spin 2" %} -{%- assign p = priors['spin 2'] %} -{%- else %} -{%- assign p = None %} -{%- endif %} - a_2 = {{ p['type'] | default: Uniform}}(name='a_2', minimum={{ p['minimum'] | default: 0}}, maximum={{ p['maximum'] | default: 0.99}}), -{%- if priors.keys() contains "tilt 1" %}{% assign p = priors['tilt 1'] %}{% else %}{% assign p = None %}{% endif %} - tilt_1 = {{ p['type'] | default: Sine}}(name='tilt_1'), -{%- if priors.keys() contains "tilt 2" %}{% assign p = priors['tilt 2'] %}{% else %}{% assign p = None %}{% endif %} - tilt_2 = {{ p['type'] | default: Sine}}(name='tilt_2'), -{%- if priors.keys() contains "phi 12" %}{% assign p = priors['phi 12'] %}{% else %}{% assign p = None %}{% endif %} - phi_12 = {{ p['type'] | default: Uniform}}(name='phi_12', minimum={{ p['minimum'] | default: 0}}, maximum={{ p['maximum'] | default: "2 * np.pi"}}, boundary={{p['boundary'] | default: "'periodic'"}}), -{%- if priors.keys() contains "phi jl" %}{% assign p = priors['phi jl'] %}{% else %}{% assign p = None %}{% endif %} - phi_jl = {{ p['type'] | default: Uniform}}(name='phi_jl', minimum={{ p['minimum'] | default: 0}}, maximum={{ p['maximum'] | default: "2 * np.pi"}}, boundary={{p['boundary'] | default: "'periodic'"}}), -{%- if priors.keys() contains "lambda 1" %} -{%- assign p = priors['lambda 1'] %} - lambda_1 = Uniform(name='lambda_1', minimum={{ p['minimum'] | default: 0}}, maximum={{ p['maximum'] | default: 5000}}), -{%- endif %} -{%- if priors.keys() contains "lambda 2" %}{% assign p = priors['lambda 2'] %} - lambda_2 = Uniform(name='lambda_2', minimum={{ p['minimum'] | default: 0}}, maximum={{ p['maximum'] | default: 5000}}), -{%- endif %} -{%- if priors.keys() contains "luminosity distance" %} -{%- assign p = priors['luminosity distance'] %} - luminosity_distance = {{ p['type'] | default: PowerLaw}}(name='luminosity_distance', {% for key in p.keys() %}{% if key != "type" %}{% if p[key] != None %}{{key | replace: " ", "_"}}={{p[key]}},{% endif %}{% endif %} {% endfor %} unit='Mpc'), -{%- else %} -{%- assign p = None %} - luminosity_distance = {{ p['type'] | default: PowerLaw}}(name='luminosity_distance', unit='Mpc'), -{%- endif %} - dec = Cosine(name='dec'), - ra = Uniform(name='ra', minimum=0, maximum=2 * np.pi, boundary='periodic'), - theta_jn = Sine(name='theta_jn'), - psi = Uniform(name='psi', minimum=0, maximum=np.pi, boundary='periodic'), - phase = Uniform(name='phase', minimum=0, maximum=2 * np.pi, boundary='periodic')} +prior-dict = {{ prior_interface.to_prior_dict_string() }} {% endif %} enforce-signal-duration={{ production.meta['waveform']['enforce signal duration'] | default: False }} @@ -263,7 +212,8 @@ single-postprocessing-arguments=None sampler={{sampler['sampler'] | default: "dynesty" }} sampling-seed={{sampler['seed'] | default: 1 }} n-parallel={{ sampler['parallel jobs'] | default: 2 }} -sampler-kwargs={{ sampler['sampler kwargs'] | default: "{'nlive': 1000, 'naccept': 60, 'check_point_plot': True, 'check_point_delta_t': 1800, 'print_method': 'interval-60', 'sample': 'acceptance-walk'}" }} +{% assign sampler_kwargs = production.pipeline.get_sampler_kwargs() %} +sampler-kwargs={{ sampler_kwargs | default: "{'nlive': 1000, 'naccept': 60, 'check_point_plot': True, 'check_point_delta_t': 1800, 'print_method': 'interval-60', 'sample': 'acceptance-walk'}" }} reweighting-configuration={{ likelihood['reweighting_configuration'] | default: None }} reweight-nested-samples={{ likelihood['reweight_nested_samples'] | default: False }} @@ -286,10 +236,10 @@ frequency-domain-source-model={{ production.meta['likelihood']['frequency domain conversion-function={{ production.meta['waveform']['conversion function'] | default: "None" }} generation-function={{ production.meta['waveform']['generation function'] | default: "None" }} -{%- if scheduler contains "additional files" %} -additional-transfer-paths={% for file in scheduler['additional files'] %}{{ file }} {% endfor %} +{%- assign additional_files = production.pipeline.get_additional_files() %} +{%- if additional_files.size > 0 %} +additional-transfer-paths={% for file in additional_files %}{{ file }} {% endfor %} {%- endif %} - ################################################################################ ## Global settings ################################################################################ diff --git a/asimov/configs/lalinference.ini b/asimov/configs/lalinference.ini index ce5c0e96..3d859233 100644 --- a/asimov/configs/lalinference.ini +++ b/asimov/configs/lalinference.ini @@ -7,9 +7,11 @@ {%- assign sampler = production.meta['sampler'] -%} {%- assign scheduler = production.meta['scheduler'] -%} {%- assign likelihood = production.meta['likelihood'] -%} -{%- assign priors = production.meta['priors'] -%} +{%- assign prior_interface = production.pipeline.get_prior_interface() -%} +{%- assign priors = prior_interface.convert() -%} {%- assign data = production.meta['data'] -%} {%- assign quality = production.meta['quality'] -%} +{%- assign waveform = production.meta['waveform'] -%} {%- assign ifos = production.meta['interferometers'] -%} {%- if production.event.repository -%} {%- assign repo_dir = production.event.repository.directory -%} @@ -80,15 +82,15 @@ types = {'H1': '{{ production.meta['data']['frame types']['H1'] }}', 'L1': '{{ p channels = {'H1': '{{ production.meta['data']['channels']['H1'] }}', 'L1': '{{ production.meta['data']['channels']['L1'] }}', 'V1': '{{ production.meta['data']['channels']['V1'] }}'} [lalinference] -flow = { {% if production.meta['interferometers'] contains "H1" %}'H1': {{ production.quality['minimum frequency']['H1'] }},{% endif %} {% if production.meta['interferometers'] contains "L1" %}'L1': {{ production.quality['minimum frequency']['L1']}},{% endif %} {% if production.meta['interferometers'] contains "V1" %} 'V1': {{ production.quality['minimum frequency']['V1']}} {% endif %} } +flow = { {% if production.meta['interferometers'] contains "H1" %}'H1': {{ waveform['minimum frequency']['H1'] }},{% endif %} {% if production.meta['interferometers'] contains "L1" %}'L1': {{ waveform['minimum frequency']['L1']}},{% endif %} {% if production.meta['interferometers'] contains "V1" %} 'V1': {{ waveform['minimum frequency']['V1']}} {% endif %} } fhigh = { {% if production.meta['interferometers'] contains "H1" %}'H1': {{ production.meta['quality']['high frequency'] }},{% endif %} {% if production.meta['interferometers'] contains "L1" %}'L1': {{ production.meta['quality']['high frequency'] }},{% endif %} {% if production.meta['interferometers'] contains "V1" %} 'V1': {{ production.meta['quality']['high frequency'] }} {% endif %} } [engine] -fref={{ production.meta['waveform']['reference frequency'] }} +fref={{ waveform['reference frequency'] }} approx = {{ production.meta['waveform']['approximant'] }} -amporder = {{ production.meta['priors']['amp order'] }} +amporder = {{ prior_interface.get_amp_order() }} seglen = {{ production.meta['data']['segment length'] }} srate = {{ production.quality['sample rate'] }} @@ -115,18 +117,18 @@ spcal-nodes = 10 a_spin1-max = 0.99 a_spin2-max = 0.99 -{% if production.meta['priors'] contains "chirp-mass" %} -chirpmass-min = {{ production.meta['priors']['chirp-mass'][0] }} -chirpmass-max = {{ production.meta['priors']['chirp-mass'][1] }} +{% if priors contains "chirp-mass" %} +chirpmass-min = {{ priors['chirp-mass'][0] }} +chirpmass-max = {{ priors['chirp-mass'][1] }} {% endif %} -q-min = {{ production.meta['priors']['mass ratio'][0] }} -comp-min = {{ production.meta['priors']['mass 1'][0] }} -comp-max = {{ production.meta['priors']['mass 1'][1] }} +q-min = {{ priors['mass ratio'][0] }} +comp-min = {{ priors['mass 1'][0] }} +comp-max = {{ priors['mass 1'][1] }} -distance-max = {{ production.meta['priors']['luminosity distance'][1] }} -{% if production.meta['priors'] contains "volume" %} - {% if production.meta['priors']['volume'] == "comoving" %} +distance-max = {{ priors['luminosity distance'][1] }} +{% if priors contains "volume" %} + {% if priors['volume'] == "comoving" %} distance-prior-comoving-volume = {% endif %} {% endif %} diff --git a/asimov/configs/projecttestpipeline.ini b/asimov/configs/projecttestpipeline.ini new file mode 100644 index 00000000..0b10666f --- /dev/null +++ b/asimov/configs/projecttestpipeline.ini @@ -0,0 +1,27 @@ +# ProjectTestPipeline Configuration Template +# This is a minimal configuration file for the ProjectTestPipeline +# used for testing asimov's infrastructure. + +[test] +# Test pipeline configuration +pipeline = projecttestpipeline +analysis_name = {{ production.name }} +rundir = {{ production.rundir }} + +# Number of subjects (events) being analyzed +{%- if production._subjects %} +num_subjects = {{ production._subjects | size }} +{%- else %} +num_subjects = 0 +{%- endif %} + +# Number of analyses being combined +{%- if production.analyses %} +num_analyses = {{ production.analyses | size }} +{%- else %} +num_analyses = 0 +{%- endif %} + +# These are dummy settings for testing purposes +test_parameter_1 = 10.5 +test_parameter_2 = population_test_value diff --git a/asimov/configs/rift.ini b/asimov/configs/rift.ini index bc5c31d5..28ab2bc7 100644 --- a/asimov/configs/rift.ini +++ b/asimov/configs/rift.ini @@ -62,7 +62,7 @@ types = { {% for ifo in ifos %}"{{ifo}}":"{{data['frame types'][ifo]}}",{% endfo channels = { {% for ifo in ifos %}"{{ifo}}":"{{data['channels'][ifo]}}",{% endfor %} } [lalinference] -flow = { {% for ifo in ifos %}"{{ifo}}":{{quality['minimum frequency'][ifo]}},{% endfor %} } +flow = { {% for ifo in ifos %}"{{ifo}}":{{waveform['minimum frequency'][ifo]}},{% endfor %} } fhigh = { {% for ifo in ifos %}"{{ifo}}":{{quality['maximum frequency'][ifo]}},{% endfor %} } [engine] diff --git a/asimov/configs/simpletestpipeline.ini b/asimov/configs/simpletestpipeline.ini new file mode 100644 index 00000000..92c84abc --- /dev/null +++ b/asimov/configs/simpletestpipeline.ini @@ -0,0 +1,14 @@ +# SimpleTestPipeline Configuration Template +# This is a minimal configuration file for the SimpleTestPipeline +# used for testing asimov's infrastructure. + +[test] +# Test pipeline configuration +pipeline = simpletestpipeline +analysis_name = {{ production.name }} +event_name = {{ production.event.name }} +rundir = {{ production.rundir }} + +# These are dummy settings for testing purposes +test_parameter_1 = 1.0 +test_parameter_2 = test_value diff --git a/asimov/configs/subjecttestpipeline.ini b/asimov/configs/subjecttestpipeline.ini new file mode 100644 index 00000000..1ffcea40 --- /dev/null +++ b/asimov/configs/subjecttestpipeline.ini @@ -0,0 +1,21 @@ +# SubjectTestPipeline Configuration Template +# This is a minimal configuration file for the SubjectTestPipeline +# used for testing asimov's infrastructure. + +[test] +# Test pipeline configuration +pipeline = subjecttestpipeline +analysis_name = {{ production.name }} +subject_name = {{ production.subject.name }} +rundir = {{ production.rundir }} + +# Number of analyses being combined +{%- if production.analyses %} +num_analyses = {{ production.analyses | size }} +{%- else %} +num_analyses = 0 +{%- endif %} + +# These are dummy settings for testing purposes +test_parameter_1 = 1.5 +test_parameter_2 = combined_test_value diff --git a/asimov/custom_states.py b/asimov/custom_states.py new file mode 100644 index 00000000..5962fde1 --- /dev/null +++ b/asimov/custom_states.py @@ -0,0 +1,355 @@ +""" +Custom monitor states for extended functionality. + +This module contains custom state handlers that extend the base monitor functionality. +These states are kept separate from the core states and can be moved to their own +package in the future. + +These states were inspired by functionality from the v0.6-release branch. +""" + +import click +from asimov import logger, LOGGER_LEVEL +from asimov.monitor_states import MonitorState, register_state + +logger = logger.getChild("custom_states") +logger.setLevel(LOGGER_LEVEL) + + +class ReviewState(MonitorState): + """ + Handle analyses in 'review' state. + + This state represents analyses that have completed and are awaiting review + before being marked as fully complete. This allows for a manual review step + before finalizing results. + """ + + @property + def state_name(self): + return "review" + + def handle(self, context): + """Handle analysis in review state.""" + analysis = context.analysis + + click.echo( + " \t " + + click.style("●", "blue") + + f" {analysis.name} is awaiting review" + ) + + # Check if review has been completed + if hasattr(analysis, 'review') and analysis.review: + if hasattr(analysis.review, 'status'): + if analysis.review.status == "approved": + analysis.status = "reviewed" + context.update_ledger() + click.echo( + " \t " + + click.style("✓", "green") + + f" {analysis.name} review approved" + ) + elif analysis.review.status == "rejected": + analysis.status = "review_failed" + context.update_ledger() + click.echo( + " \t " + + click.style("✗", "red") + + f" {analysis.name} review rejected" + ) + + return True + + +class ReviewedState(MonitorState): + """ + Handle analyses in 'reviewed' state. + + This state represents analyses that have been reviewed and approved. + They can now proceed to final processing or upload. + """ + + @property + def state_name(self): + return "reviewed" + + def handle(self, context): + """Handle reviewed analysis.""" + analysis = context.analysis + + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} has been reviewed and approved" + ) + + # Optionally trigger next step + if hasattr(analysis, 'pipeline') and analysis.pipeline: + pipe = analysis.pipeline + if hasattr(pipe, 'after_review'): + try: + pipe.after_review() + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} post-review processing started" + ) + except Exception as e: + logger.error(f"Error in post-review processing: {e}") + + return True + + +class UploadingState(MonitorState): + """ + Handle analyses in 'uploading' state. + + This state tracks analyses that are currently being uploaded to + storage or distribution systems. + """ + + @property + def state_name(self): + return "uploading" + + def handle(self, context): + """Handle uploading analysis.""" + analysis = context.analysis + pipe = analysis.pipeline + + if not pipe: + return False + + click.echo( + " \t " + + click.style("●", "cyan") + + f" {analysis.name} is uploading" + ) + + # Check if upload has completed + if hasattr(pipe, 'detect_upload_completion'): + if pipe.detect_upload_completion(): + analysis.status = "uploaded" + context.update_ledger() + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} upload complete" + ) + + return True + + +class UploadedState(MonitorState): + """ + Handle analyses in 'uploaded' state. + + This is a terminal state indicating the analysis has been successfully + uploaded and is complete. + """ + + @property + def state_name(self): + return "uploaded" + + def handle(self, context): + """Handle uploaded analysis.""" + analysis = context.analysis + + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} is uploaded and complete" + ) + + return True + + +class RestartState(MonitorState): + """ + Handle analyses in 'restart' state. + + This state allows analyses to be restarted from a previous checkpoint + or from the beginning. + """ + + @property + def state_name(self): + return "restart" + + def handle(self, context): + """Handle restart of analysis.""" + analysis = context.analysis + pipe = analysis.pipeline + + if not pipe: + return False + + click.echo( + " \t " + + click.style("●", "yellow") + + f" {analysis.name} is being restarted" + ) + + # Clean up old job if exists + if context.has_condor_job(): + job_id = context.job_id + if job_id: + try: + pipe.eject_job() + click.echo( + " \t " + + click.style("●", "yellow") + + f" {analysis.name} old job removed" + ) + except Exception as e: + logger.error(f"Error removing old job: {e}") + + # Reset to ready state to be picked up by submit + analysis.status = "ready" + context.update_ledger() + + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} reset to ready for restart" + ) + + return True + + +class WaitState(MonitorState): + """ + Handle analyses in 'wait' state. + + This state represents analyses that are waiting for dependencies + or other conditions to be met before they can proceed. + """ + + @property + def state_name(self): + return "wait" + + def handle(self, context): + """Handle waiting analysis.""" + analysis = context.analysis + + click.echo( + " \t " + + click.style("●", "cyan") + + f" {analysis.name} is waiting" + ) + + # Check if dependencies are met + if hasattr(analysis, '_needs') and analysis._needs: + # Check if all dependencies are complete + all_complete = True + for need in analysis._needs: + # This would need actual dependency checking logic + # For now, just report the wait state + pass + + if all_complete: + analysis.status = "ready" + context.update_ledger() + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} dependencies met, now ready" + ) + + return True + + +class CancelledState(MonitorState): + """ + Handle analyses in 'cancelled' state. + + This is a terminal state for analyses that have been cancelled + and will not be completed. + """ + + @property + def state_name(self): + return "cancelled" + + def handle(self, context): + """Handle cancelled analysis.""" + analysis = context.analysis + + click.echo( + " \t " + + click.style("●", "red") + + f" {analysis.name} is cancelled" + ) + + # Clean up any running jobs + if context.has_condor_job(): + pipe = analysis.pipeline + if pipe: + try: + pipe.eject_job() + click.echo( + " \t " + + click.style("●", "red") + + f" {analysis.name} job removed" + ) + except Exception as e: + logger.error(f"Error removing job: {e}") + + return True + + +class ManualState(MonitorState): + """ + Handle analyses in 'manual' state. + + This state represents analyses that require manual intervention + and should not be automatically managed by the monitor. + """ + + @property + def state_name(self): + return "manual" + + def handle(self, context): + """Handle manual analysis.""" + analysis = context.analysis + + click.echo( + " \t " + + click.style("●", "yellow") + + f" {analysis.name} requires manual intervention" + ) + + # Don't take any automatic action + return True + + +# Register custom states +def register_custom_states(): + """ + Register all custom state handlers. + + This function should be called to make the custom states available + to the monitor system. + """ + custom_states = [ + ReviewState(), + ReviewedState(), + UploadingState(), + UploadedState(), + RestartState(), + WaitState(), + CancelledState(), + ManualState(), + ] + + for state in custom_states: + register_state(state) + logger.debug(f"Registered custom state: {state.state_name}") + + +# Auto-register on import +register_custom_states() diff --git a/asimov/event.py b/asimov/event.py index 3635ab23..1df47b90 100644 --- a/asimov/event.py +++ b/asimov/event.py @@ -73,10 +73,10 @@ def __init__(self, name, repository=None, update=False, **kwargs): self.logger = logger.getChild("event").getChild(f"{self.name}") self.logger.setLevel(LOGGER_LEVEL) - # pathlib.Path(os.path.join(config.get("logging", "directory"), name)).mkdir( + # pathlib.Path(os.path.join(config.get("logging", "location"), name)).mkdir( # parents=True, exist_ok=True # ) - # logfile = os.path.join(config.get("logging", "directory"), name, "asimov.log") + # logfile = os.path.join(config.get("logging", "location"), name, "asimov.log") # fh = logging.FileHandler(logfile) # formatter = logging.Formatter("%(asctime)s - %(message)s", "%Y-%m-%d %H:%M:%S") @@ -131,16 +131,35 @@ def __init__(self, name, repository=None, update=False, **kwargs): if "productions" in kwargs: for production in kwargs["productions"]: - if ("analyses" in production) or ("productions" in production): + # Normalise stored production structures. They may arrive either as + # {name: {..metadata..}} (preferred) or a flat dict. Ensure the + # inner dict carries the production name so downstream factories + # have the required fields. + if isinstance(production, dict) and len(production) == 1: + prod_name, prod_meta = next(iter(production.items())) + if prod_meta is None: + prod_meta = {} + if "name" not in prod_meta: + prod_meta["name"] = prod_name + elif isinstance(production, dict): + prod_meta = dict(production) + else: + # Unknown structure; skip + continue + + if ("analyses" in prod_meta) or ("productions" in prod_meta): self.add_production( - SubjectAnalysis.from_dict(production, subject=self) + SubjectAnalysis.from_dict(prod_meta, subject=self) ) else: self.add_production( Production.from_dict( - production, subject=self, ledger=self.ledger + prod_meta, subject=self, ledger=self.ledger ) ) + # After all productions are added, update the graph to build dependency edges + # This ensures dependencies can be resolved regardless of order in the ledger + self.update_graph() self._check_required() if ( @@ -234,14 +253,40 @@ def add_production(self, production): self.productions.append(production) self.graph.add_node(production) - if production.dependencies: - for dependency in production.dependencies: - if dependency == production: - continue - analysis_dict = { - production.name: production for production in self.productions - } - self.graph.add_edge(analysis_dict[dependency], production) + # Note: Dependencies are resolved dynamically when accessed, so we don't + # build edges here. Instead, call update_graph() after all productions + # are added to ensure the graph reflects current dependencies. + # This fixes the issue where dependencies appearing later in the ledger + # couldn't be found during initial loading. + + def update_graph(self): + """ + Rebuild the dependency graph based on current production dependencies. + + This is necessary because dependency queries (e.g., property-based filters) + are evaluated dynamically and may change as productions are added or modified. + Call this method before using the graph to ensure edges reflect current state. + """ + # Clear all edges but keep nodes + self.graph.clear_edges() + + # Rebuild edges based on current dependencies + analysis_dict = {production.name: production for production in self.productions} + + for production in self.productions: + if production.dependencies: + for dependency_name in production.dependencies: + if dependency_name == production.name: + continue + if dependency_name in analysis_dict: + self.graph.add_edge(analysis_dict[dependency_name], production) + + # Re-resolve SubjectAnalysis dependencies now that all productions are loaded + # This ensures smart dependencies work correctly regardless of production order + from asimov.analysis import SubjectAnalysis + for production in self.productions: + if isinstance(production, SubjectAnalysis): + production.resolve_analyses() def __repr__(self): return f"" @@ -381,7 +426,9 @@ def to_dict(self, productions=True): if productions: data["productions"] = [] for production in self.productions: - data["productions"].append(production.to_dict(event=False)) + # Store production metadata keyed by its name so it can be + # reconstructed losslessly when reloading the ledger. + data["productions"].append({production.name: production.to_dict(event=False)}) data["working directory"] = self.work_dir if "ledger" in data: @@ -411,6 +458,9 @@ def get_all_latest(self): set A set of independent jobs which are not finished execution. """ + # Update graph to reflect current dependencies + self.update_graph() + unfinished = self.graph.subgraph( [ production @@ -461,19 +511,344 @@ def build_report(self): production.build_report() def html(self): + # Helper function to get review info from a node + def get_review_info(node): + """Extract review status and message from a node.""" + review_status = 'none' + review_message = '' + if hasattr(node, 'review') and len(node.review) > 0: + # Get the latest review message (Review class implements __getitem__) + latest_review = node.review[-1] + if latest_review: + review_status = latest_review.status.lower() if latest_review.status else 'none' + review_message = latest_review.message if latest_review.message else '' + return review_status, review_message + + # Helper function to generate review indicator HTML + def get_review_indicator(review_status): + """Generate HTML for review status indicator.""" + if review_status == 'approved': + return '' + elif review_status == 'rejected': + return '' + elif review_status == 'deprecated': + return '' + return '' + card = f""" -
+
-

{self.name}

+

{self.name}

""" - card += "

Analyses

" - card += """
""" - - for production in self.productions: - card += production.html() - - card += """
""" + # Add event metadata if available + if hasattr(self, 'meta') and self.meta: + if "gps" in self.meta: + card += f"""

GPS Time: {self.meta['gps']}

""" + if "interferometers" in self.meta: + ifos = ", ".join(self.meta["interferometers"]) if isinstance(self.meta["interferometers"], list) else self.meta["interferometers"] + card += f"""

Interferometers: {ifos}

""" + + # Generate graph-based workflow visualization + if hasattr(self, 'graph') and self.graph and len(self.graph.nodes()) > 0: + # Update graph to reflect current dependencies (important for property-based queries) + self.update_graph() + + card += """
""" + card += """

Workflow Graph

""" + + try: + import networkx as nx + from asimov.event import status_map + + # Organize nodes by dependency layers + if nx.is_directed_acyclic_graph(self.graph): + # Get layers using topological generations + layers = list(nx.topological_generations(self.graph)) + + card += """
""" + + for layer_idx, layer in enumerate(layers): + card += """
""" + + for node in layer: + # Get status and review for styling + status = node.status if hasattr(node, 'status') else 'unknown' + review_status, review_message = get_review_info(node) + + status_badge = status_map.get(status, 'secondary') + + # Get pipeline name + pipeline_name = node.pipeline.name if hasattr(node, 'pipeline') and node.pipeline else '' + + # Get dependencies (predecessors in the graph) + predecessors = list(self.graph.predecessors(node)) + predecessor_names = ','.join([pred.name for pred in predecessors]) if predecessors else '' + + # Get dependents (successors in the graph) + successors = list(self.graph.successors(node)) + successor_names = ','.join([succ.name for succ in successors]) if successors else '' + + # Create graph node with click handler + # Add running indicator for active analyses + running_indicator = '' + if status in ['running', 'processing']: + running_indicator = '' + + # Add review status indicator + review_indicator = get_review_indicator(review_status) + + # Check if this is a subject analysis + is_subject = hasattr(node, 'category') and node.category == 'subject_analyses' + subject_class = ' graph-node-subject' if is_subject else '' + + # Check if stale (dependencies changed) + is_stale = hasattr(node, 'is_stale') and node.is_stale + is_refreshable = hasattr(node, 'is_refreshable') and node.is_refreshable + stale_class = ' graph-node-stale' if is_stale else '' + + # Add staleness indicator for subject analyses + stale_indicator = '' + if is_subject and is_stale: + stale_indicator = '' + + # Create unique node IDs by including event name + node_id = f"node-{self.name}-{node.name}" + data_id = f"analysis-data-{self.name}-{node.name}" + + # For subject analyses, include source analysis names + source_analyses_str = '' + if is_subject and hasattr(node, '_analysis_spec_names'): + # Build list of source analyses with their statuses for styling + source_specs = [] + for source_name in node._analysis_spec_names: + # Find the source analysis status + source_status = 'unknown' + for n in self.graph.nodes(): + if n.name == source_name: + source_status = n.status if hasattr(n, 'status') else 'unknown' + break + source_specs.append(f"{source_name}:{source_status}") + source_analyses_str = '|'.join(source_specs) + + card += f""" +
+ {running_indicator} + {review_indicator} + {stale_indicator} +
{node.name}
+
{pipeline_name}
+
+ """ + + # Add hidden data container for modal + comment = node.comment if hasattr(node, 'comment') and node.comment else '' + rundir = node.rundir if hasattr(node, 'rundir') and node.rundir else '' + approximant = node.meta.get('approximant', '') if hasattr(node, 'meta') else '' + + # Get webdir for results links + webdir = '' + if hasattr(node, 'event') and hasattr(node.event, 'webdir') and node.event.webdir: + webdir = node.event.webdir + + # Construct potential result page URLs based on pipeline + result_pages = [] + if webdir and rundir: + # Extract just the directory name from the full rundir path + import os + rundir_name = os.path.basename(rundir.rstrip('/')) + base_url = f"{webdir}/{rundir_name}" + + # Add common result page patterns for different pipelines + if pipeline_name.lower() == 'bilby': + result_pages.append(f"{base_url}/result/homepage.html|Bilby Results") + result_pages.append(f"{base_url}/result/corner.png|Corner Plot") + elif pipeline_name.lower() == 'bayeswave': + result_pages.append(f"{base_url}/post/megaplot.png|Bayeswave Megaplot") + elif pipeline_name.lower() == 'pesummary': + result_pages.append(f"{base_url}/home.html|PESummary Results") + + result_pages_str = ';;'.join(result_pages) if result_pages else '' + + # Get current dependencies + dependencies = node.dependencies if hasattr(node, 'dependencies') else [] + dependencies_str = ', '.join(dependencies) if dependencies else '' + + # Escape review message for HTML attribute + review_message_escaped = review_message.replace('"', '"').replace("'", ''') + + card += f""" + + """ + + card += """
""" + + # Add arrow between layers + if layer_idx < len(layers) - 1: + card += """
""" + + card += """
""" + + else: + # Fallback for non-DAG: just list nodes + card += """
""" + card += """
""" + for node in self.graph.nodes(): + status = node.status if hasattr(node, 'status') else 'unknown' + status_badge = status_map.get(status, 'secondary') + pipeline_name = node.pipeline.name if hasattr(node, 'pipeline') and node.pipeline else '' + + review_status, review_message = get_review_info(node) + + # Get dependencies even for non-DAG + predecessors = list(self.graph.predecessors(node)) if hasattr(self.graph, 'predecessors') else [] + predecessor_names = ','.join([pred.name for pred in predecessors]) if predecessors else '' + + successors = list(self.graph.successors(node)) if hasattr(self.graph, 'successors') else [] + successor_names = ','.join([succ.name for succ in successors]) if successors else '' + + # Add running indicator for active analyses + running_indicator = '' + if status in ['running', 'processing']: + running_indicator = '' + + # Add review status indicator + review_indicator = get_review_indicator(review_status) + + # Check if this is a subject analysis + is_subject = hasattr(node, 'category') and node.category == 'subject_analyses' + subject_class = ' graph-node-subject' if is_subject else '' + + # Check if stale (dependencies changed) + is_stale = hasattr(node, 'is_stale') and node.is_stale + is_refreshable = hasattr(node, 'is_refreshable') and node.is_refreshable + stale_class = ' graph-node-stale' if is_stale else '' + + # Add staleness indicator for subject analyses + stale_indicator = '' + if is_subject and is_stale: + stale_indicator = '' + + # Create unique node IDs by including event name + node_id = f"node-{self.name}-{node.name}" + data_id = f"analysis-data-{self.name}-{node.name}" + + # For subject analyses, include source analysis names + source_analyses_str = '' + if is_subject and hasattr(node, '_analysis_spec_names'): + # Build list of source analyses with their statuses for styling + source_specs = [] + for source_name in node._analysis_spec_names: + # Find the source analysis status + source_status = 'unknown' + for n in self.graph.nodes(): + if n.name == source_name: + source_status = n.status if hasattr(n, 'status') else 'unknown' + break + source_specs.append(f"{source_name}:{source_status}") + source_analyses_str = '|'.join(source_specs) + + card += f""" +
+ {running_indicator} + {review_indicator} + {stale_indicator} +
{node.name}
+
{pipeline_name}
+
+ """ + + comment = node.comment if hasattr(node, 'comment') and node.comment else '' + rundir = node.rundir if hasattr(node, 'rundir') and node.rundir else '' + approximant = node.meta.get('approximant', '') if hasattr(node, 'meta') else '' + + # Get webdir for results links + webdir = '' + if hasattr(node, 'event') and hasattr(node.event, 'webdir') and node.event.webdir: + webdir = node.event.webdir + + # Construct potential result page URLs based on pipeline + result_pages = [] + if webdir and rundir: + # Extract just the directory name from the full rundir path + import os + rundir_name = os.path.basename(rundir.rstrip('/')) + base_url = f"{webdir}/{rundir_name}" + + # Add common result page patterns for different pipelines + if pipeline_name.lower() == 'bilby': + result_pages.append(f"{base_url}/result/homepage.html|Bilby Results") + result_pages.append(f"{base_url}/result/corner.png|Corner Plot") + elif pipeline_name.lower() == 'bayeswave': + result_pages.append(f"{base_url}/post/megaplot.png|Bayeswave Megaplot") + elif pipeline_name.lower() == 'pesummary': + result_pages.append(f"{base_url}/home.html|PESummary Results") + + result_pages_str = ';;'.join(result_pages) if result_pages else '' + + # Get current dependencies + dependencies = node.dependencies if hasattr(node, 'dependencies') else [] + dependencies_str = ', '.join(dependencies) if dependencies else '' + + # Escape review message for HTML attribute + review_message_escaped = review_message.replace('"', '"').replace("'", ''') + + card += f""" + + """ + card += """
""" + card += """
""" + + except Exception as e: + card += f"""

Error rendering graph: {str(e)}

""" + + card += """
""" # card += """ #
diff --git a/asimov/git.py b/asimov/git.py index da7c9864..523164a7 100644 --- a/asimov/git.py +++ b/asimov/git.py @@ -43,6 +43,52 @@ def __init__(self, directory, url=None, update=False): self.logger = logger + def get_default_branch(self): + """ + Get the default branch name for this repository. + + Returns + ------- + str + The name of the default branch (e.g., 'master', 'main') + """ + try: + # Try to get the remote's default branch + if self.repo.remotes: + remote = self.repo.remotes[0] + # Get the symbolic reference for HEAD from the remote + if hasattr(remote, 'refs'): + for ref in remote.refs: + ref_name = getattr(ref, "name", "") + if ref_name.endswith("HEAD"): + # Get what HEAD points to + remote_head = getattr(ref, "remote_head", None) + if remote_head: + return remote_head + target_ref = getattr(ref, "ref", None) + target_name = getattr(target_ref, "name", None) + if target_name: + return target_name.split("/")[-1] + + # Fallback: check local HEAD or common branch names + if self.repo.head.is_valid(): + return self.repo.head.ref.name + + # Final fallback: try common names + for branch_name in ['main', 'master']: + try: + self.repo.git.rev_parse('--verify', branch_name) + return branch_name + except git.exc.GitCommandError: + continue + + # If all else fails, return 'master' as last resort + return 'master' + except (git.exc.GitCommandError, AttributeError) as e: + # In case of any error, return 'master' as a safe default + self.logger.warning(f"Could not detect default branch for {self.event}: {e}") + return 'master' + def __repr__(self): return self.directory @@ -59,8 +105,17 @@ def create(cls, location): directory = config.get("general", "calibration_directory") os.makedirs(location, exist_ok=True) try: - repo = git.Repo.init(location, initial_branch="master") - except Exception: + # Try to create with 'main' as the initial branch (modern convention) + repo = git.Repo.init(location, initial_branch="main") + except (TypeError, git.exc.GitCommandError) as exc: + # Fallback for older git versions that don't support initial_branch + logger.warning( + "Git version does not support 'initial_branch' when initializing " + "repository at %s; falling back to default initial branch. " + "Original error: %s", + location, + exc, + ) repo = git.Repo.init(location) os.makedirs(os.path.join(location, directory), exist_ok=True) with open(os.path.join(location, directory, ".gitkeep"), "w") as f: @@ -179,6 +234,12 @@ def add_file(self, source, destination, commit_message=None): def find_timefile(self, category=config.get("general", "calibration_directory")): """ Find the time file in this repository. + + Parameters + ---------- + category : str, optional + The category directory to search in. + Defaults to the value of "general/calibration_directory" from config. """ with set_directory(os.path.join(self.directory, category)): @@ -191,6 +252,12 @@ def find_timefile(self, category=config.get("general", "calibration_directory")) def find_coincfile(self, category=config.get("general", "calibration_directory")): """ Find the coinc file for this calibration category in this repository. + + Parameters + ---------- + category : str, optional + The category directory to search in. + Defaults to the value of "general/calibration_directory" from config. """ coinc_file = glob.glob( os.path.join(os.getcwd(), self.directory, category, "*coinc*.xml") @@ -214,7 +281,7 @@ def find_prods( The name of the production. If omitted then all production ini files are returned. category : str, optional - The category of run. Defaults to "general/calibration_directory" from the config file. + The category of run. Defaults to the value of "general/calibration_directory" from config. """ self.update() @@ -242,7 +309,7 @@ def upload_prod( ---------- category : str, optional The category of the job. - Defaults to "C01_offline". + Defaults to the value of "general/calibration_directory" from config. production : str The production name. rundir : str @@ -277,7 +344,9 @@ def upload_prod( ) out, err = dagman.communicate() - if err or "master -> master" not in str(out): + # Check if there was an error or if the push didn't succeed + # Instead of checking for "master -> master", check for general push success + if err: raise ValueError(f"Sample upload failed.\n{out}\n{err}") else: return out @@ -357,7 +426,7 @@ def upload_preferred(self, event, prods): return True - def update(self, stash=False, branch="master"): + def update(self, stash=False, branch=None): """ Pull the latest updates to the repository. @@ -369,11 +438,14 @@ def update(self, stash=False, branch="master"): Default is False. branch : str, optional The branch which should be checked-out. - Default is master. + If not provided, uses the repository's default branch. """ if stash: self.repo.git.stash() + if branch is None: + branch = self.get_default_branch() + self.repo.git.checkout(branch) try: self.repo.git.pull() diff --git a/asimov/ledger.py b/asimov/ledger.py index 85f97d87..c051901f 100644 --- a/asimov/ledger.py +++ b/asimov/ledger.py @@ -37,8 +37,8 @@ class YAMLLedger(Ledger): def __init__(self, location=None): if not location: location = os.path.join(".asimov", "ledger.yml") - self.location = location - with open(location, "r") as ledger_file: + self.location = os.path.abspath(location) + with open(self.location, "r") as ledger_file: self.data = yaml.safe_load(ledger_file) self.data["events"] = [ @@ -152,6 +152,9 @@ def add_analysis(self, analysis, event=None): -------- """ if isinstance(analysis, ProjectAnalysis): + # Ensure "project analyses" key exists for old ledgers + if "project analyses" not in self.data: + self.data["project analyses"] = [] names = [ana["name"] for ana in self.data["project analyses"]] if analysis.name not in names: self.data["project analyses"].append(analysis.to_dict()) @@ -190,7 +193,7 @@ def get_defaults(self): def project_analyses(self): return [ ProjectAnalysis.from_dict(analysis, ledger=self) - for analysis in self.data["project analyses"] + for analysis in self.data.get("project analyses", []) ] def get_event(self, event=None): diff --git a/asimov/monitor_api.py b/asimov/monitor_api.py new file mode 100644 index 00000000..98358119 --- /dev/null +++ b/asimov/monitor_api.py @@ -0,0 +1,280 @@ +""" +Programmatic API for asimov monitor functionality. + +This module provides Python functions to run asimov monitoring operations +programmatically, suitable for use in scripts, Jupyter notebooks, or custom +automation workflows. +""" + + +from typing import Optional, List +from asimov import condor, logger, LOGGER_LEVEL +from asimov import current_ledger as ledger +from asimov.cli import ACTIVE_STATES +from asimov.monitor_helpers import monitor_analysis + +logger = logger.getChild("monitor_api") +logger.setLevel(LOGGER_LEVEL) + + +def run_monitor( + *, + event_filter: Optional[str] = None, + dry_run: bool = False, + verbose: bool = False +) -> dict: + """ + Run the asimov monitor programmatically. + + This function performs the same monitoring operations as the CLI command + `asimov monitor`, but can be called from Python scripts or Jupyter notebooks. + + Parameters + ---------- + event_filter : str, optional + Filter to specific event name. If None, monitors all events. + dry_run : bool, optional + If True, performs monitoring without making any changes (default: False). + verbose : bool, optional + If True, prints progress information (default: False). + + Returns + ------- + dict + Summary of monitoring results with the following keys: + - 'project_analyses': Number of project analyses monitored + - 'event_analyses': Number of event analyses monitored + - 'total': Total number of analyses monitored + - 'active': Number of active analyses + - 'complete': Number of complete analyses + - 'stuck': Number of stuck analyses + + Examples + -------- + Run monitor on all analyses: + + >>> from asimov.monitor_api import run_monitor + >>> results = run_monitor() + >>> print(f"Monitored {results['total']} analyses") + + Run monitor for a specific event: + + >>> results = run_monitor(event_filter="GW150914", verbose=True) + + Dry run to see what would happen: + + >>> results = run_monitor(dry_run=True, verbose=True) + + Use in a Jupyter notebook: + + >>> results = run_monitor() + >>> import pandas as pd + >>> df = pd.DataFrame([results]) + >>> display(df) + + Raises + ------ + RuntimeError + If condor scheduler cannot be found. + """ + if verbose: + print("Starting asimov monitor...") + + logger.info("Running asimov monitor (programmatic API)") + + # Initialize results + results = { + 'project_analyses': 0, + 'event_analyses': 0, + 'total': 0, + 'active': 0, + 'complete': 0, + 'stuck': 0, + } + + # Get condor job listing + try: + job_list = condor.CondorJobList() + except condor.htcondor.HTCondorLocateError: + raise RuntimeError( + "Could not find the condor scheduler. " + "You need to run asimov on a machine which has access to a " + "condor scheduler or specify the address of a valid scheduler." + ) + + # Monitor project analyses + for analysis in ledger.project_analyses: + if analysis.status.lower() in ACTIVE_STATES: + results['project_analyses'] += 1 + results['total'] += 1 + + if verbose: + print(f"Monitoring project analysis: {analysis.name} [{analysis.status}]") + + monitor_analysis( + analysis=analysis, + job_list=job_list, + ledger=ledger, + dry_run=dry_run, + analysis_path=f"project_analyses/{analysis.name}" + ) + + # Track status counts + status_lower = analysis.status.lower() + if status_lower in ACTIVE_STATES: + results['active'] += 1 + if status_lower == 'stuck': + results['stuck'] += 1 + if analysis.status in {"finished", "uploaded"}: + results['complete'] += 1 + + # Monitor event analyses + for event in ledger.get_event(event_filter): + on_deck = [ + production + for production in event.productions + if production.status.lower() in ACTIVE_STATES + ] + + for production in on_deck: + results['event_analyses'] += 1 + results['total'] += 1 + + if verbose: + print(f"Monitoring {event.name}/{production.name} [{production.status}]") + + monitor_analysis( + analysis=production, + job_list=job_list, + ledger=ledger, + dry_run=dry_run, + analysis_path=f"{event.name}/{production.name}" + ) + + # Track status counts + status_lower = production.status.lower() + if status_lower in ACTIVE_STATES: + results['active'] += 1 + if status_lower == 'stuck': + results['stuck'] += 1 + if production.status in {"finished", "uploaded"}: + results['complete'] += 1 + + ledger.update_event(event) + + if verbose: + print(f"\nMonitoring complete:") + print(f" Total analyses: {results['total']}") + print(f" Project analyses: {results['project_analyses']}") + print(f" Event analyses: {results['event_analyses']}") + print(f" Active: {results['active']}") + print(f" Complete: {results['complete']}") + print(f" Stuck: {results['stuck']}") + + logger.info(f"Monitored {results['total']} analyses") + + return results + + +def get_analysis_status(*, analysis_name: str = None, event_name: str = None) -> dict: + """ + Get the current status of one or more analyses. + + Parameters + ---------- + analysis_name : str, optional + Name of a specific analysis to check. + event_name : str, optional + Name of event to filter analyses. + + Returns + ------- + dict + Dictionary mapping analysis names to their current status. + + Examples + -------- + Get status of all analyses: + + >>> from asimov.monitor_api import get_analysis_status + >>> statuses = get_analysis_status() + >>> for name, status in statuses.items(): + ... print(f"{name}: {status}") + + Get status for specific event: + + >>> statuses = get_analysis_status(event_name="GW150914") + + Get status for specific analysis: + + >>> status = get_analysis_status(analysis_name="bilby_analysis") + """ + statuses = {} + + # Check project analyses + for analysis in ledger.project_analyses: + if analysis_name is None or analysis.name == analysis_name: + statuses[f"project_analyses/{analysis.name}"] = analysis.status + + # Check event analyses + for event in ledger.get_event(event_name): + for production in event.productions: + if analysis_name is None or production.name == analysis_name: + statuses[f"{event.name}/{production.name}"] = production.status + + return statuses + + +def list_active_analyses() -> List[dict]: + """ + List all active analyses in the current project. + + Returns + ------- + list of dict + List of dictionaries with analysis information. Each dict contains: + - 'name': Analysis name + - 'type': 'project' or 'event' + - 'status': Current status + - 'event': Event name (for event analyses only) + - 'pipeline': Pipeline name + + Examples + -------- + >>> from asimov.monitor_api import list_active_analyses + >>> analyses = list_active_analyses() + >>> for analysis in analyses: + ... print(f"{analysis['name']}: {analysis['status']}") + + Use with pandas in Jupyter: + + >>> import pandas as pd + >>> analyses = list_active_analyses() + >>> df = pd.DataFrame(analyses) + >>> display(df) + """ + analyses = [] + + # Project analyses + for analysis in ledger.project_analyses: + if analysis.status.lower() in ACTIVE_STATES: + analyses.append({ + 'name': analysis.name, + 'type': 'project', + 'status': analysis.status, + 'pipeline': str(analysis.pipeline), + }) + + # Event analyses + for event in ledger.get_event(None): + for production in event.productions: + if production.status.lower() in ACTIVE_STATES: + analyses.append({ + 'name': production.name, + 'type': 'event', + 'status': production.status, + 'event': event.name, + 'pipeline': str(production.pipeline), + }) + + return analyses diff --git a/asimov/monitor_context.py b/asimov/monitor_context.py new file mode 100644 index 00000000..531e63d9 --- /dev/null +++ b/asimov/monitor_context.py @@ -0,0 +1,99 @@ +""" +Context management for asimov monitor loop. + +This module provides the MonitorContext class that coordinates state handling +and manages analysis monitoring. +""" + +from asimov import logger, LOGGER_LEVEL + +logger = logger.getChild("monitor_context") +logger.setLevel(LOGGER_LEVEL) + + +class MonitorContext: + """ + Context object for monitoring an analysis. + + This class encapsulates all the state and operations needed to monitor + a single analysis, including condor job lookups, ledger updates, and + state transitions. + + Parameters + ---------- + analysis : Analysis + The analysis to monitor. + job_list : CondorJobList + The condor job list for checking job status. + ledger : Ledger + The ledger for updating analysis state. + dry_run : bool, optional + If True, don't actually perform updates (default: False). + analysis_path : str, optional + Path to the analysis for logging (default: ""). + """ + + def __init__(self, analysis, job_list, ledger, dry_run=False, analysis_path=""): + self.analysis = analysis + self.job_list = job_list + self.ledger = ledger + self.dry_run = dry_run + self.analysis_path = analysis_path + self._job = None + self._job_checked = False + + @property + def job_id(self): + """Get the condor job ID for this analysis.""" + try: + scheduler = self.analysis.meta.get("scheduler", {}) + if scheduler: + return scheduler.get("job id") + return None + except (AttributeError, TypeError): + return None + + @property + def job(self): + """ + Get the condor job object for this analysis. + + Returns None if the analysis has no job ID or if the job is not found + in the condor job list. + """ + if not self._job_checked: + job_id = self.job_id + if job_id and not self.dry_run: + self._job = self.job_list.jobs.get(job_id) + self._job_checked = True + return self._job + + def has_condor_job(self): + """Check if this analysis has a condor job ID.""" + return self.job_id is not None + + def clear_job_id(self): + """Clear the job ID from the analysis metadata.""" + if hasattr(self.analysis, 'meta') and self.analysis.meta: + if "scheduler" in self.analysis.meta: + self.analysis.meta["scheduler"]["job id"] = None + + def update_ledger(self): + """Update the analysis in the ledger.""" + if self.dry_run: + return + + # Determine if this is a project analysis or event analysis + if hasattr(self.analysis, 'event'): + # Event analysis (production) + self.ledger.update_event(self.analysis.event) + else: + # Project analysis + self.ledger.update_analysis_in_project_analysis(self.analysis) + + self.ledger.save() + + def refresh_job_list(self): + """Refresh the condor job list.""" + if not self.dry_run: + self.job_list.refresh() diff --git a/asimov/monitor_helpers.py b/asimov/monitor_helpers.py new file mode 100644 index 00000000..18e1d8a2 --- /dev/null +++ b/asimov/monitor_helpers.py @@ -0,0 +1,144 @@ +""" +Helper functions for the asimov monitor loop. + +This module provides reusable functions to monitor analyses, +replacing the duplicated code in the monitor command. +""" + +import click +from asimov import logger, LOGGER_LEVEL +from asimov.cli import ACTIVE_STATES +from asimov.monitor_states import get_state_handler +from asimov.monitor_context import MonitorContext + +logger = logger.getChild("monitor_helpers") +logger.setLevel(LOGGER_LEVEL) + + +def monitor_analysis(analysis, job_list, ledger, dry_run=False, analysis_path=None): + """ + Monitor a single analysis and handle its state transitions. + + This function replaces the duplicated monitoring logic for both + event analyses and project analyses. + + Parameters + ---------- + analysis : Analysis + The analysis to monitor (can be SimpleAnalysis, ProjectAnalysis, etc.). + job_list : CondorJobList + The condor job list for checking job status. + ledger : Ledger + The ledger for updating analysis state. + dry_run : bool, optional + If True, don't actually perform updates (default: False). + analysis_path : str, optional + Path to the analysis for logging (default: None). + + Returns + ------- + bool + True if monitoring was successful, False otherwise. + """ + + # Create analysis path for logging + if analysis_path is None: + if hasattr(analysis, 'event'): + analysis_path = f"{analysis.event.name}/{analysis.name}" + else: + analysis_path = f"project_analyses/{analysis.name}" + + # Display analysis header + click.echo( + "\t- " + + click.style(f"{analysis.name}", bold=True) + + click.style(f"[{analysis.pipeline}]", fg="green") + ) + + # Skip inactive analyses + if analysis.status.lower() not in ACTIVE_STATES: + logger.debug(f"Skipping inactive analysis: {analysis_path}") + return True + + logger.debug(f"Monitoring analysis: {analysis_path}") + + # Create monitoring context + context = MonitorContext( + analysis=analysis, + job_list=job_list, + ledger=ledger, + dry_run=dry_run, + analysis_path=analysis_path + ) + + # Get the appropriate state handler (pipeline-specific if available) + pipeline = getattr(analysis, 'pipeline', None) + state_handler = get_state_handler(analysis.status, pipeline=pipeline) + + if state_handler: + # Use the state handler to process this analysis + # Note: State handlers are responsible for calling context.update_ledger() + # when they make changes that need to be persisted + try: + success = state_handler.handle(context) + return success + except Exception as e: + logger.exception(f"Error handling state {analysis.status} for {analysis_path}") + click.echo( + " \t " + + click.style("●", "red") + + f" Error processing {analysis.name}: {e}" + ) + return False + else: + logger.warning(f"No state handler for status: {analysis.status}") + click.echo( + " \t " + + click.style("●", "yellow") + + f" Unknown status: {analysis.status}" + ) + return False + + +def monitor_analyses_list(analyses, job_list, ledger, dry_run=False, label="analyses"): + """ + Monitor a list of analyses. + + Parameters + ---------- + analyses : list + List of analyses to monitor. + job_list : CondorJobList + The condor job list for checking job status. + ledger : Ledger + The ledger for updating analysis state. + dry_run : bool, optional + If True, don't actually perform updates (default: False). + label : str, optional + Label for the analyses being monitored (default: "analyses"). + + Returns + ------- + dict + Statistics about the monitored analyses (counts by status). + """ + + stats = { + "total": 0, + "running": 0, + "stuck": 0, + "finished": 0, + "ready": 0, + } + + for analysis in analyses: + if analysis.status.lower() in ACTIVE_STATES: + logger.debug(f"Available {label}: {analysis.name}") + monitor_analysis(analysis, job_list, ledger, dry_run) + + stats["total"] += 1 + status = analysis.status.lower() + if status in stats: + stats[status] += 1 + + return stats diff --git a/asimov/monitor_states.py b/asimov/monitor_states.py new file mode 100644 index 00000000..12c63ce2 --- /dev/null +++ b/asimov/monitor_states.py @@ -0,0 +1,494 @@ +""" +State machine implementation for asimov monitor loop. + +This module provides a clean state pattern implementation to replace the +hard-coded if-elif chains in the monitor loop. +""" + +from abc import ABC, abstractmethod +import configparser +import sys +import click +from asimov import logger, LOGGER_LEVEL, config, condor + +if sys.version_info < (3, 10): + from importlib_metadata import entry_points +else: + from importlib.metadata import entry_points + +logger = logger.getChild("monitor_states") +logger.setLevel(LOGGER_LEVEL) + + +class MonitorState(ABC): + """ + Abstract base class for monitor states. + + Each concrete state handles the monitoring logic for analyses in that state. + """ + + @abstractmethod + def handle(self, context): + """ + Handle the monitoring logic for an analysis in this state. + + Parameters + ---------- + context : MonitorContext + The monitoring context containing the analysis, job, and other info. + + Returns + ------- + bool + True if the state was handled successfully, False otherwise. + """ + pass + + @property + @abstractmethod + def state_name(self): + """Return the name of this state.""" + pass + + +class ReadyState(MonitorState): + """Handle analyses in 'ready' state (not yet started).""" + + @property + def state_name(self): + return "ready" + + def handle(self, context): + """Ready analyses are not yet started, just report status.""" + click.secho(f" \t ● {context.analysis.status.lower()}", fg="green") + logger.debug(f"Ready analysis: {context.analysis_path}") + return True + + +class StopState(MonitorState): + """Handle analyses that need to be stopped.""" + + @property + def state_name(self): + return "stop" + + def handle(self, context): + """Stop the analysis job on the scheduler.""" + pipe = context.analysis.pipeline + logger.debug(f"Stop analysis: {context.analysis_path}") + + if not context.dry_run: + pipe.eject_job() + context.analysis.status = "stopped" + context.update_ledger() + click.secho(" \t Stopped", fg="red") + else: + click.echo(f"\t\t{context.analysis.name} --> stopped") + + return True + + +class RunningState(MonitorState): + """Handle analyses in 'running' state.""" + + @property + def state_name(self): + return "running" + + def handle(self, context): + """Check if job is still running or has completed.""" + + + + # Check if job has a condor ID + if context.has_condor_job(): + return self._handle_condor_job(context) + else: + return self._handle_no_condor_job(context) + + def _handle_condor_job(self, context): + """Handle analysis with a condor job.""" + job = context.job + analysis = context.analysis + + if job is None: + # Job not found, may have completed or been evicted + return self._handle_no_condor_job(context) + + if job.status.lower() == "idle": + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} is in the queue (condor id: {context.job_id})" + ) + return True + + elif job.status.lower() == "running": + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} is running (condor id: {context.job_id})" + ) + if "profiling" not in analysis.meta: + analysis.meta["profiling"] = {} + if hasattr(analysis.pipeline, "while_running"): + analysis.pipeline.while_running() + analysis.status = "running" + context.update_ledger() + return True + + elif job.status.lower() == "completed": + pipe = analysis.pipeline + pipe.after_completion() + click.echo( + " \t " + + click.style("●", "green") + + f" {analysis.name} has finished and post-processing has been started" + ) + context.refresh_job_list() + return True + + elif job.status.lower() == "held": + click.echo( + " \t " + + click.style("●", "yellow") + + f" {analysis.name} is held on the scheduler" + + f" (condor id: {context.job_id})" + ) + analysis.status = "stuck" + context.update_ledger() + return True + + return False + + def _handle_no_condor_job(self, context): + """Handle analysis without a condor job (may have completed).""" + analysis = context.analysis + pipe = analysis.pipeline + + if not pipe: + return False + + # Check if job has completed + if pipe.detect_completion(): + if "profiling" not in analysis.meta: + analysis.meta["profiling"] = {} + + # Only collect profiling if we have a valid job ID + job_id = context.job_id + if job_id: + try: + config.get("condor", "scheduler") + analysis.meta["profiling"] = condor.collect_history(job_id) + context.clear_job_id() + context.update_ledger() + except (configparser.NoOptionError, configparser.NoSectionError): + logger.warning( + "Could not collect condor profiling data as no " + + "scheduler was specified in the config file." + ) + except ValueError as e: + logger.error("Could not collect condor profiling data.") + logger.exception(e) + + analysis.status = "finished" + context.update_ledger() + pipe.after_completion() + click.secho( + f" \t ● {analysis.name} - Completion detected", + fg="green", + ) + context.refresh_job_list() + return True + else: + # Job may have been evicted + click.echo( + " \t " + + click.style("●", "yellow") + + f" {analysis.name} is stuck; attempting a rescue" + ) + try: + pipe.resurrect() + return True + except Exception: + analysis.status = "stuck" + click.echo( + " \t " + + click.style("●", "red") + + f" {analysis.name} is stuck; automatic rescue was not possible" + ) + context.update_ledger() + return False + + +class FinishedState(MonitorState): + """Handle analyses in 'finished' state.""" + + @property + def state_name(self): + return "finished" + + def handle(self, context): + """Trigger post-processing for finished analyses.""" + pipe = context.analysis.pipeline + + if pipe: + pipe.after_completion() + click.echo( + " \t " + + click.style("●", "green") + + f" {context.analysis.name} has finished and post-processing has been started" + ) + context.refresh_job_list() + + return True + + +class ProcessingState(MonitorState): + """Handle analyses in 'processing' state.""" + + @property + def state_name(self): + return "processing" + + def handle(self, context): + """Check if post-processing has completed.""" + pipe = context.analysis.pipeline + + if not pipe: + return False + + # Check if processing has completed + if pipe.detect_completion_processing(): + try: + pipe.after_processing() + click.echo( + " \t " + + click.style("●", "green") + + f" {context.analysis.name} has been finalised and stored" + ) + return True + except ValueError as e: + click.echo(e) + return False + else: + # Also check if the job has just completed + if pipe.detect_completion(): + click.echo( + " \t " + + click.style("●", "green") + + f" {context.analysis.name} has finished and post-processing is running" + ) + return True + else: + click.echo( + " \t " + + click.style("●", "green") + + f" {context.analysis.name} has finished and post-processing" + + f" is stuck ({context.job_id})" + ) + return False + + +class StuckState(MonitorState): + """Handle analyses in 'stuck' state.""" + + @property + def state_name(self): + return "stuck" + + def handle(self, context): + """Report that the analysis is stuck.""" + click.echo( + " \t " + + click.style("●", "yellow") + + f" {context.analysis.name} is stuck" + ) + return True + + +class StoppedState(MonitorState): + """Handle analyses in 'stopped' state.""" + + @property + def state_name(self): + return "stopped" + + def handle(self, context): + """Stopped analyses are not active, just report status.""" + click.echo( + " \t " + + click.style("●", "red") + + f" {context.analysis.name} is stopped" + ) + return True + + +# State registry for mapping status strings to state handlers +STATE_REGISTRY = { + "ready": ReadyState(), + "stop": StopState(), + "running": RunningState(), + "finished": FinishedState(), + "processing": ProcessingState(), + "stuck": StuckState(), + "stopped": StoppedState(), +} + + +def register_state(state_handler): + """ + Register a custom state handler. + + This function allows custom state handlers to be registered at runtime, + either programmatically or via entry points. + + Parameters + ---------- + state_handler : MonitorState + An instance of a MonitorState subclass to register. + + Examples + -------- + >>> class CustomState(MonitorState): + ... @property + ... def state_name(self): + ... return "custom" + ... def handle(self, context): + ... return True + >>> register_state(CustomState()) + """ + if not isinstance(state_handler, MonitorState): + raise TypeError( + f"State handler must be an instance of MonitorState, " + f"got {type(state_handler).__name__}" + ) + + state_name = state_handler.state_name + if state_name in STATE_REGISTRY: + logger.warning( + f"Overwriting existing state handler for '{state_name}'" + ) + + STATE_REGISTRY[state_name] = state_handler + logger.debug(f"Registered state handler for '{state_name}'") + + +def discover_custom_states(): + """ + Discover and register custom state handlers via entry points. + + This function looks for entry points in the 'asimov.monitor.states' group + and automatically registers any custom state handlers defined by plugins. + + Entry points should return an instance of a MonitorState subclass. + + Examples + -------- + In your package's setup.py or pyproject.toml: + + .. code-block:: python + + # setup.py + entry_points={ + 'asimov.monitor.states': [ + 'validation = mypackage.states:ValidationState', + ] + } + + Or in pyproject.toml: + + .. code-block:: toml + + [project.entry-points."asimov.monitor.states"] + validation = "mypackage.states:ValidationState" + """ + try: + discovered_states = entry_points(group="asimov.monitor.states") + + for state_entry in discovered_states: + try: + # Load the state handler class or instance + state_obj = state_entry.load() + + # If it's a class, instantiate it + if isinstance(state_obj, type): + state_handler = state_obj() + else: + state_handler = state_obj + + # Register the state + register_state(state_handler) + logger.info( + f"Discovered and registered custom state '{state_entry.name}' " + f"from {state_entry.value}" + ) + except Exception as e: + logger.warning( + f"Failed to load custom state '{state_entry.name}': {e}" + ) + except Exception as e: + logger.debug(f"No custom states discovered: {e}") + + +def get_state_handler(status, pipeline=None): + """ + Get the appropriate state handler for a given status. + + This function first checks for pipeline-specific state handlers if a + pipeline is provided, then falls back to the global state registry. + This allows pipelines to define custom behavior for specific states. + + Parameters + ---------- + status : str + The status string (e.g., "running", "finished"). + pipeline : Pipeline, optional + The pipeline instance. If provided, pipeline-specific state handlers + will be checked first before falling back to default handlers. + + Returns + ------- + MonitorState + The state handler for this status, or None if not found. + + Examples + -------- + Get default state handler: + + >>> handler = get_state_handler("running") + + Get pipeline-specific handler with fallback: + + >>> handler = get_state_handler("running", pipeline=bilby_pipeline) + """ + status_lower = status.lower() + + # First, check for pipeline-specific state handlers + if pipeline is not None: + try: + pipeline_handlers = pipeline.get_state_handlers() + if pipeline_handlers and status_lower in pipeline_handlers: + logger.debug( + f"Using pipeline-specific handler for state '{status_lower}' " + f"from {pipeline.name}" + ) + return pipeline_handlers[status_lower] + except Exception as e: + logger.warning( + f"Error getting pipeline state handlers from {pipeline.name}: {e}" + ) + + # Fall back to global state registry + return STATE_REGISTRY.get(status_lower) + + +# Discover and register custom states on module import +discover_custom_states() + +# Import custom states to ensure they're registered +try: + from asimov import custom_states + logger.debug("Custom states module imported and registered") +except ImportError: + logger.debug("Custom states module not available") diff --git a/asimov/olivaw.py b/asimov/olivaw.py index f25b1a34..16aafbf0 100644 --- a/asimov/olivaw.py +++ b/asimov/olivaw.py @@ -1,5 +1,10 @@ +import logging import os import sys +if sys.version_info < (3, 10): + from importlib_metadata import entry_points +else: + from importlib.metadata import entry_points # Ignore warnings from the condor module import warnings @@ -24,23 +29,53 @@ project, report, review, + blueprint, ) # NoQA +class ProjectAwareGroup(click.Group): + """ + Custom Click Group that checks for project directory. + + Allows certain commands (init and all plugin commands) to run + outside of an asimov project directory. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._plugin_commands = set() + + def invoke(self, ctx): + """Check project directory before invoking command.""" + # If no subcommand is specified (e.g., just `asimov`), let it through + # to show the help message + if ctx.invoked_subcommand is None: + return super().invoke(ctx) + + # Commands that can run outside of a project + commands_allowed_outside_project = {"init", "clone"} + + # Add all registered plugin commands (they handle their own project checks if needed) + commands_allowed_outside_project.update(self._plugin_commands) + + # Check if we're in a project or running an allowed command + if not os.path.exists(".asimov") and ctx.invoked_subcommand not in commands_allowed_outside_project: + click.secho("This isn't an asimov project", fg="white", bg="red") + sys.exit(1) + + return super().invoke(ctx) + + @click.version_option(asimov.__version__) -@click.group() +@click.group(cls=ProjectAwareGroup) @click.pass_context def olivaw(ctx): """ This is the main program which runs the DAGs for each event issue. """ - # Check that we're running in an actual asimov project - if not os.path.exists(".asimov") and ctx.invoked_subcommand != "init": - # This isn't the root of an asimov project, let's fail. - click.secho("This isn't an asimov project", fg="white", bg="red") - sys.exit(1) - pass + # Project presence is enforced in ProjectAwareGroup.invoke; no extra work needed here. + return ctx # Project initialisation @@ -64,3 +99,21 @@ def olivaw(ctx): # Review commands olivaw.add_command(review.review) olivaw.add_command(application.apply) + +# Auto-discover plugin commands + +discovered_commands = entry_points(group="asimov.commands") +for ep in discovered_commands: + try: + command = ep.load() + olivaw.add_command(command) + olivaw._plugin_commands.add(ep.name) + except (ImportError, ModuleNotFoundError, AttributeError) as e: + # Log but don't fail if a plugin command can't load due to import/attribute issues + logger = logging.getLogger("asimov.olivaw") + logger.debug(f"Failed to load plugin command {ep.name}: {e}") + except Exception as e: + # For unexpected errors, log with full traceback and re-raise + logger = logging.getLogger("asimov.olivaw") + logger.exception(f"Unexpected error while loading plugin command {ep.name}: {e}") + raise diff --git a/asimov/pipeline.py b/asimov/pipeline.py index 52c6aa23..d8dad38e 100644 --- a/asimov/pipeline.py +++ b/asimov/pipeline.py @@ -1,6 +1,7 @@ """Defines the interface with generic analysis pipelines.""" import configparser + import os import subprocess import time @@ -8,8 +9,14 @@ import asimov.analysis -warnings.filterwarnings("ignore", module="htcondor") -import htcondor # NoQA +try: + warnings.filterwarnings("ignore", module="htcondor2") + import htcondor2 as htcondor # NoQA + import classad2 as classad # NoQA +except ImportError: + warnings.filterwarnings("ignore", module="htcondor") + import htcondor # NoQA + import classad # NoQA from asimov import utils # NoQA from asimov import config, logger, logging, LOGGER_LEVEL # NoQA @@ -97,6 +104,30 @@ def __init__(self, production, category=None): self.logger = logger.getChild(full_name) self.logger.setLevel(LOGGER_LEVEL) + + # Initialize scheduler instance (lazy-loaded via property) + self._scheduler = None + + # Initialize prior interface + self._prior_interface = None + + @property + def scheduler(self): + """ + Get the configured scheduler instance for this pipeline. + + The scheduler is lazy-loaded on first access and cached for reuse. + + Returns + ------- + Scheduler + A configured scheduler instance + """ + if self._scheduler is None: + from asimov.scheduler_utils import get_configured_scheduler + self._scheduler = get_configured_scheduler() + return self._scheduler + def __repr__(self): return self.name.lower() @@ -159,6 +190,10 @@ def store_results(self): """ Store the PE Summary results """ + # Prefer absolute webroot; if relative, join to project root + webroot = config.get("general", "webroot") + if not os.path.isabs(webroot): + webroot = os.path.join(config.get("project", "root"), webroot) files = [ f"{self.production.name}_pesummary.dat", @@ -168,32 +203,94 @@ def store_results(self): for filename in files: results = os.path.join( - config.get("general", "webroot"), + webroot, self.production.event.name, self.production.name, "pesummary", "samples", filename, ) - store = Store(root=config.get("storage", "directory")) - store.add_file( - self.production.event.name, self.production.name, file=results - ) + if os.path.exists(results): + try: + store = Store(root=config.get("storage", "directory")) + store.add_file( + self.production.event.name, self.production.name, file=results + ) + except (OSError, IOError) as e: + self.logger.warning("Failed to store result %s: %s", results, e) + else: + self.logger.debug("Result not found, skipping: %s", results) def detect_completion_processing(self): - files = f"{self.production.name}_pesummary.dat" - results = os.path.join( - config.get("general", "webroot"), - self.production.event.name, - self.production.name, - "pesummary", - "samples", - files, - ) - if os.path.exists(results): + """ + Detect that PESummary post-processing outputs exist and are valid. + + For SubjectAnalysis productions, validates that the HDF5 file contains + all expected analyses as datasets. For regular analyses, just checks + that the file exists and is readable. + """ + webroot = config.get("general", "webroot") + if not os.path.isabs(webroot): + webroot = os.path.join(config.get("project", "root"), webroot) + + base = os.path.join(webroot, self.production.event.name, self.production.name, "pesummary") + + # Posterior file is the primary completion criterion + posterior = os.path.join(base, "samples", "posterior_samples.h5") + if os.path.exists(posterior): + # Validate HDF5 file is readable and contains expected content + try: + import h5py + with h5py.File(posterior, 'r') as f: + # For SubjectAnalysis, verify all expected analyses are present as datasets + from asimov.analysis import SubjectAnalysis + if isinstance(self.production, SubjectAnalysis): + # Get the list of analyses that should be in the file + # Use resolved_dependencies if available (what was actually processed) + # Otherwise fall back to current analyses list + expected_analyses = getattr(self.production, 'resolved_dependencies', None) + if not expected_analyses and hasattr(self.production, 'analyses'): + expected_analyses = [a.name for a in self.production.analyses] + + if expected_analyses: + # Check if all expected analyses have datasets in the HDF5 file + # PESummary stores each analysis as a top-level group + available_keys = list(f.keys()) + missing = [name for name in expected_analyses if name not in available_keys] + + if missing: + self.logger.warning( + f"HDF5 file exists but is missing expected analyses: {missing}. " + f"Available: {available_keys}" + ) + return False + + self.logger.debug(f"HDF5 file validated with all expected analyses: {expected_analyses}") + else: + # For regular analysis, just verify the file has some content + if len(f.keys()) == 0: + self.logger.warning("HDF5 file exists but is empty") + return False + + return True + + except (OSError, IOError) as e: + self.logger.warning(f"HDF5 file exists but is not readable: {e}") + return False + except ImportError: + # h5py not available, fall back to simple existence check + self.logger.warning("h5py not available, cannot validate HDF5 contents") + return True + except Exception as e: + self.logger.warning(f"Error validating HDF5 file: {e}") + return False + + # Legacy sentinel + legacy = os.path.join(base, "samples", f"{self.production.name}_pesummary.dat") + if os.path.exists(legacy): return True - else: - return False + + return False def after_processing(self): """ @@ -201,9 +298,29 @@ def after_processing(self): """ try: self.store_results() - self.production.status = "uploaded" except Exception as e: - raise ValueError(e) + # Do not block upload on storage failures; log and continue + self.logger.warning("Post-processing storage error: %s", e) + self.production.status = "uploaded" + + def get_prior_interface(self): + """ + Get the prior interface for this pipeline. + + This method should be overridden by pipeline-specific implementations + to return their custom prior interface. + + Returns + ------- + PriorInterface + The prior interface for this pipeline + """ + from asimov.priors import PriorInterface + + if self._prior_interface is None: + priors = self.production.priors + self._prior_interface = PriorInterface(priors) + return self._prior_interface def eject_job(self): """ @@ -241,6 +358,61 @@ def submit_dag(self): def resurrect(self): pass + def while_running(self): + """ + Define a hook to run while the job is running. + + This method is called during each monitor cycle while the analysis + is in the 'running' state. It can be used to collect intermediate + results, update progress information, etc. + + Note, this method should take no arguments, and should be over-written + in the specific pipeline implementation if required. + """ + pass + + def get_state_handlers(self): + """ + Get pipeline-specific state handlers. + + This method allows pipelines to define their own custom state handlers + that override or extend the default state handlers. This enables + pipeline-specific behavior for different analysis states. + + Returns + ------- + dict or None + A dictionary mapping state names (str) to MonitorState instances, + or None to use only default state handlers. + + Examples + -------- + Override the running state handler: + + >>> from asimov.monitor_states import MonitorState + >>> + >>> class CustomRunningState(MonitorState): + ... @property + ... def state_name(self): + ... return "running" + ... def handle(self, context): + ... # Custom running logic for this pipeline + ... return True + >>> + >>> class MyPipeline(Pipeline): + ... def get_state_handlers(self): + ... return { + ... "running": CustomRunningState(), + ... } + + Note + ---- + Pipeline-specific handlers take precedence over default handlers. + If a state is not defined in the pipeline's handlers, the default + handler will be used. + """ + return None + @classmethod def read_ini(cls, filepath): """ @@ -350,6 +522,15 @@ def submit_dag(self, dryrun=False): configfile = self.production.event.repository.find_prods( self.production.name, self.category )[0] + + # Validate minimum frequency format + min_freq = self.production.meta["waveform"]["minimum frequency"] + if not isinstance(min_freq, dict) or not min_freq: + raise ValueError( + "Minimum frequency in 'waveform' section must be a non-empty dictionary " + "mapping interferometer names to frequency values." + ) + command = [ "--webdir", os.path.join( @@ -365,7 +546,7 @@ def submit_dag(self, dryrun=False): "--approximant", self.production.meta["waveform"]["approximant"], "--f_low", - str(min(self.production.meta["quality"]["minimum frequency"].values())), + str(min(min_freq.values())), "--f_ref", str(self.production.meta["waveform"]["reference frequency"]), ] @@ -467,7 +648,7 @@ def submit_dag(self, dryrun=False): # "should_transfer_files": "YES", "request_disk": "8192MB", "+flock_local": "True", - "+DESIRED_Sites": htcondor.classad.quote("nogrid"), + "+DESIRED_Sites": classad.quote("nogrid"), } if "accounting group" in self.meta: @@ -498,11 +679,18 @@ def submit_dag(self, dryrun=False): htcondor.DaemonTypes.Schedd, config.get("condor", "scheduler") ) schedd = htcondor.Schedd(schedulers) - except: # NoQA + except ( + configparser.NoOptionError, + configparser.NoSectionError, + htcondor.HTCondorLocateError, + htcondor.HTCondorIOError, + ): # If you can't find a specified scheduler, use the first one you find - schedd = htcondor.Schedd() - with schedd.transaction() as txn: - cluster_id = hostname_job.queue(txn) + schedulers = htcondor.Collector().locate(htcondor.DaemonTypes.Schedd) + schedd = htcondor.Schedd(schedulers) + + result = schedd.submit(hostname_job) + cluster_id = result.cluster() else: cluster_id = 0 diff --git a/asimov/pipelines/bayeswave.py b/asimov/pipelines/bayeswave.py index 54e272e1..faf709ed 100644 --- a/asimov/pipelines/bayeswave.py +++ b/asimov/pipelines/bayeswave.py @@ -27,7 +27,7 @@ class BayesWave(Pipeline): The production object. category : str, optional The category of the job. - Defaults to "C01_offline". + Defaults to "analyses". """ name = "BayesWave" @@ -47,11 +47,11 @@ def __init__(self, production, category=None): try: self.category = config.get("general", "calibration_directory") except configparser.NoOptionError: - self.category = "C01_offline" - self.logger.info("Assuming C01_offline calibration.") + self.category = "analyses" + self.logger.info("Assuming analyses directory.") if not production.meta.get("quality", {}).get("lowest minimum frequency", None): - production.meta["quality"]["lowest minimum frequency"] = self.flow + production.meta.setdefault("quality", {})["lowest minimum frequency"] = self.flow def build_dag(self, user=None, dryrun=False): """ @@ -288,7 +288,20 @@ def flow(self): minimum frequency from the list of interferometer lower frequencies. """ - return min(self.production.meta["quality"]["minimum frequency"].values()) + if "waveform" not in self.production.meta or "minimum frequency" not in self.production.meta["waveform"]: + raise ValueError( + "Minimum frequency must be specified in the 'waveform' section. " + "Please update your blueprint to include 'minimum frequency' in 'waveform'." + ) + + min_freq = self.production.meta["waveform"]["minimum frequency"] + if not isinstance(min_freq, dict) or not min_freq: + raise ValueError( + "Minimum frequency in 'waveform' section must be a non-empty dictionary " + "mapping interferometer names to frequency values." + ) + + return min(min_freq.values()) def before_submit(self): """ @@ -314,7 +327,7 @@ def before_submit(self): def submit_dag(self, dryrun=False): """ - Submit a DAG file to the condor cluster. + Submit a DAG file to the scheduler. Parameters ---------- @@ -336,50 +349,41 @@ def submit_dag(self, dryrun=False): """ self.before_submit() - command = [ - "condor_submit_dag", - "-batch-name", - f"bwave/{self.production.event.name}/{self.production.name}", - f"{self.production.name}.dag", - ] + dag_filename = f"{self.production.name}.dag" + batch_name = f"bwave/{self.production.event.name}/{self.production.name}" - self.logger.info((" ".join(command))) + self.logger.info(f"Submitting DAG: {dag_filename} with batch name: {batch_name}") if dryrun: - print(" ".join(command)) + print(f"Would submit DAG: {dag_filename} with batch name: {batch_name}") else: with set_directory(self.production.rundir): try: - dagman = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + # Use the scheduler API to submit the DAG + cluster_id = self.scheduler.submit_dag( + dag_file=dag_filename, + batch_name=batch_name + ) + + self.production.status = "running" + self.production.job_id = int(cluster_id) + self.logger.info( + f"Successfully submitted to cluster {self.production.job_id}" ) + return (int(cluster_id),) + except FileNotFoundError as e: self.logger.exception(e) raise PipelineException( - "It looks like condor isn't installed on this system.\n" - f"""I wanted to run {" ".join(command)}.""" + "It looks like the scheduler isn't properly configured.\n" + f"Failed to submit DAG file: {dag_filename}" + ) from e + except RuntimeError as e: + self.logger.exception(e) + raise PipelineException( + f"The DAG file could not be submitted: {e}", ) from e - - stdout, stderr = dagman.communicate() - - if "submitted to cluster" in str(stdout): - cluster = re.search( - r"submitted to cluster ([\d]+)", str(stdout) - ).groups()[0] - self.production.status = "running" - self.production.job_id = int(cluster) - self.logger.info( - f"Successfully submitted to cluster {self.production.job_id}" - ) - self.logger.debug(stdout) - return (int(cluster),) - else: - self.logger.info(stdout) - self.logger.error(stderr) - raise PipelineException( - f"The DAG file could not be submitted.\n\n{stdout}\n\n{stderr}", - ) def upload_assets(self): """ @@ -425,7 +429,7 @@ def collect_logs(self): messages = {} logfile = os.path.join( - config.get("logging", "directory"), + config.get("logging", "location"), self.production.event.name, self.production.name, "asimov.log", diff --git a/asimov/pipelines/bilby.py b/asimov/pipelines/bilby.py index 974d8286..527772ad 100644 --- a/asimov/pipelines/bilby.py +++ b/asimov/pipelines/bilby.py @@ -8,11 +8,276 @@ import subprocess import time +from typing import Dict, Any + from .. import config from ..pipeline import Pipeline, PipelineException, PipelineLogger from .. import auth from .pesummary import PESummary +from ..priors import PriorInterface + + +class BilbyPriorInterface(PriorInterface): + """ + Prior interface for the Bilby pipeline. + + Converts asimov prior specifications into bilby prior_dict format. + """ + + def convert(self) -> Dict[str, Any]: + """ + Convert asimov priors to bilby prior_dict format. + + Returns + ------- + dict + Dictionary suitable for bilby's prior-dict config option + """ + if self.prior_dict is None: + return {} + + # Return the dictionary representation + # The actual rendering to bilby format happens in the template + return self.prior_dict.to_dict() + + def get_default_prior(self) -> str: + """ + Get the default prior set for bilby. + + Returns + ------- + str + The default prior class name (e.g., "BBHPriorDict") + """ + if self.prior_dict is None or self.prior_dict.default is None: + return "BBHPriorDict" + return self.prior_dict.default + + def to_prior_dict_string(self) -> str: + """ + Generate a string representation of the prior_dict for bilby. + + This method creates a complete Python dictionary string that can be + directly inserted into the bilby configuration file, providing + maximum flexibility for prior specifications. + + Returns + ------- + str + String representation of the prior dictionary for bilby + """ + if self.prior_dict is None: + # Return default priors if none specified + return self._get_default_prior_dict_string() + + priors = self.prior_dict.to_dict() + prior_lines = [] + + # Process each prior specification + for param_name, param_spec in priors.items(): + if param_name == 'default': + # Skip the default key as it's handled separately + continue + + if not isinstance(param_spec, dict): + # Skip non-dict values + continue + + # Generate the prior string for this parameter + prior_str = self._format_prior(param_name, param_spec) + if prior_str: + prior_lines.append(prior_str) + + # Add default fixed priors for sky location and polarization, + # but only if they have not been specified by the user. + default_sky_priors = { + "dec": "dec = Cosine(name='dec')", + "ra": "ra = Uniform(name='ra', minimum=0, maximum=2 * np.pi, boundary='periodic')", + "theta_jn": "theta_jn = Sine(name='theta_jn')", + "psi": "psi = Uniform(name='psi', minimum=0, maximum=np.pi, boundary='periodic')", + "phase": "phase = Uniform(name='phase', minimum=0, maximum=2 * np.pi, boundary='periodic')", + } + + # Determine which parameters have been explicitly specified in the prior dict + specified_params = {name for name in priors.keys() if name != "default"} + + # Only append defaults for parameters that are not explicitly specified + for param_name, prior_str in default_sky_priors.items(): + if param_name not in specified_params: + prior_lines.append(prior_str) + + # Join all lines with proper indentation + return "{\n " + ",\n ".join(prior_lines) + "}" + + def _format_prior(self, param_name: str, param_spec: Dict[str, Any]) -> str: + """ + Format a single prior specification as a string. + + Parameters + ---------- + param_name : str + The parameter name + param_spec : dict + The prior specification + + Returns + ------- + str + Formatted prior string + """ + # Map parameter names to bilby parameter names + name_map = { + 'chirp mass': 'chirp_mass', + 'mass ratio': 'mass_ratio', + 'total mass': 'total_mass', + 'mass 1': 'mass_1', + 'mass 2': 'mass_2', + 'spin 1': 'a_1', + 'spin 2': 'a_2', + 'tilt 1': 'tilt_1', + 'tilt 2': 'tilt_2', + 'phi 12': 'phi_12', + 'phi jl': 'phi_jl', + 'lambda 1': 'lambda_1', + 'lambda 2': 'lambda_2', + 'luminosity distance': 'luminosity_distance', + 'geocentric time': 'geocent_time' + } + + bilby_name = name_map.get(param_name, param_name.replace(' ', '_')) + + # Get prior type and parameters + prior_type = param_spec.get('type') + minimum = param_spec.get('minimum') + maximum = param_spec.get('maximum') + boundary = param_spec.get('boundary') + + # Whitelist of allowed prior types to prevent code injection + allowed_prior_types = { + 'Uniform', 'LogUniform', 'PowerLaw', 'Gaussian', 'TruncatedGaussian', + 'Sine', 'Cosine', 'Interped', 'FromFile', + 'DeltaFunction', 'Constraint', + 'bilby.gw.prior.UniformInComponentsChirpMass', + 'bilby.gw.prior.UniformInComponentsMassRatio', + 'bilby.gw.prior.AlignedSpin', + 'bilby.gw.prior.UniformComovingVolume', + 'bilby.gw.prior.UniformSourceFrame', + 'bilby.core.prior.Uniform', + 'bilby.core.prior.LogUniform', + 'bilby.core.prior.PowerLaw', + 'bilby.core.prior.Gaussian', + 'bilby.core.prior.TruncatedGaussian', + 'bilby.core.prior.Sine', + 'bilby.core.prior.Cosine', + 'bilby.core.prior.Interped', + 'bilby.core.prior.FromFile', + 'bilby.core.prior.DeltaFunction', + 'bilby.core.prior.Constraint' + } + + # Default prior types for common parameters + default_types = { + 'chirp_mass': 'bilby.gw.prior.UniformInComponentsChirpMass', + 'mass_ratio': 'bilby.gw.prior.UniformInComponentsMassRatio', + 'mass_1': 'Constraint', + 'mass_2': 'Constraint', + 'total_mass': 'Constraint', + 'a_1': 'Uniform', + 'a_2': 'Uniform', + 'tilt_1': 'Sine', + 'tilt_2': 'Sine', + 'phi_12': 'Uniform', + 'phi_jl': 'Uniform', + 'lambda_1': 'Uniform', + 'lambda_2': 'Uniform', + 'luminosity_distance': 'PowerLaw', + 'geocent_time': 'Uniform' + } + + if prior_type is None: + prior_type = default_types.get(bilby_name, 'Uniform') + else: + # Validate that the prior type is in the whitelist + if prior_type not in allowed_prior_types: + raise ValueError( + f"Prior type '{prior_type}' for parameter '{bilby_name}' is not in the " + f"allowed list. This prevents potential code injection. " + f"Allowed types: {sorted(allowed_prior_types)}" + ) + + # Build the prior string + parts = [f"name='{bilby_name}'"] + + # Add minimum and maximum if present + if minimum is not None: + parts.append(f"minimum={minimum}") + elif bilby_name in ['a_1', 'a_2', 'phi_12', 'phi_jl', 'lambda_1', 'lambda_2']: + parts.append("minimum=0") + elif bilby_name in ['mass_1', 'mass_2']: + parts.append("minimum=1") + + if maximum is not None: + parts.append(f"maximum={maximum}") + elif bilby_name in ['a_1', 'a_2']: + parts.append("maximum=0.99") + elif bilby_name in ['phi_12', 'phi_jl']: + parts.append("maximum=2 * np.pi") + elif bilby_name in ['lambda_1', 'lambda_2']: + parts.append("maximum=5000") + elif bilby_name in ['mass_1', 'mass_2']: + parts.append("maximum=1000") + + # Add boundary condition if present + if boundary: + parts.append(f"boundary='{boundary}'") + elif bilby_name in ['phi_12', 'phi_jl']: + parts.append("boundary='periodic'") + + # Add unit for mass parameters + if bilby_name == 'chirp_mass': + parts.append("unit='$M_{\\odot}$'") + elif bilby_name == 'luminosity_distance': + parts.append("unit='Mpc'") + + # Add any other parameters from the spec + for key, value in param_spec.items(): + if key not in ['type', 'minimum', 'maximum', 'boundary'] and value is not None: + key_name = key.replace(' ', '_') + if isinstance(value, str): + parts.append(f"{key_name}='{value}'") + else: + parts.append(f"{key_name}={value}") + + return f"{bilby_name} = {prior_type}({', '.join(parts)})" + + def _get_default_prior_dict_string(self) -> str: + """ + Get the default prior dictionary string when no priors are specified. + + Returns + ------- + str + Default prior dictionary string + """ + return """{ + chirp_mass = bilby.gw.prior.UniformInComponentsChirpMass(name='chirp_mass', minimum=1, maximum=100, unit='$M_{\\odot}$'), + mass_ratio = bilby.gw.prior.UniformInComponentsMassRatio(name='mass_ratio', minimum=0.05, maximum=1.0), + mass_1 = Constraint(name='mass_1', minimum=1, maximum=1000), + mass_2 = Constraint(name='mass_2', minimum=1, maximum=1000), + a_1 = Uniform(name='a_1', minimum=0, maximum=0.99), + a_2 = Uniform(name='a_2', minimum=0, maximum=0.99), + tilt_1 = Sine(name='tilt_1'), + tilt_2 = Sine(name='tilt_2'), + phi_12 = Uniform(name='phi_12', minimum=0, maximum=2 * np.pi, boundary='periodic'), + phi_jl = Uniform(name='phi_jl', minimum=0, maximum=2 * np.pi, boundary='periodic'), + luminosity_distance = PowerLaw(name='luminosity_distance', unit='Mpc'), + dec = Cosine(name='dec'), + ra = Uniform(name='ra', minimum=0, maximum=2 * np.pi, boundary='periodic'), + theta_jn = Sine(name='theta_jn'), + psi = Uniform(name='psi', minimum=0, maximum=np.pi, boundary='periodic'), + phase = Uniform(name='phase', minimum=0, maximum=2 * np.pi, boundary='periodic') +}""" class Bilby(Pipeline): @@ -25,7 +290,7 @@ class Bilby(Pipeline): The production object. category : str, optional The category of the job. - Defaults to "C01_offline". + Defaults to "analyses". """ name = "bilby" @@ -42,19 +307,33 @@ def __init__(self, production, category=None): if not production.pipeline.lower() == "bilby": raise PipelineException("Pipeline mismatch") + + def get_prior_interface(self): + """ + Get the bilby-specific prior interface. + + Returns + ------- + BilbyPriorInterface + The prior interface for bilby + """ + if self._prior_interface is None: + priors = self.production.priors + self._prior_interface = BilbyPriorInterface(priors) + return self._prior_interface def detect_completion(self): """ Check for the production of the posterior file to signal that the job has completed. """ self.logger.info("Checking if the bilby job has completed") - results_dir = glob.glob(f"{self.production.rundir}/result") + results_dir = glob.glob(f"{self.production.rundir}/final_result") if len(results_dir) > 0: # dynesty_merge_result.json results_files = glob.glob( - os.path.join(results_dir[0], "*merge*_result.hdf5") + os.path.join(results_dir[0], "*.hdf5") ) results_files += glob.glob( - os.path.join(results_dir[0], "*merge*_result.json") + os.path.join(results_dir[0], "*.json") ) self.logger.debug(f"results files {results_files}") if len(results_files) > 0: @@ -74,6 +353,31 @@ def before_submit(self): """ pass + def get_sampler_kwargs(self): + defaults = self.production.meta.get("sampler", {}).get("sampler kwargs", {}) + if self.production.dependencies: + productions = {} + for production in self.production.event.productions: + productions[production.name] = production + for previous_job in self.production.dependencies: + if "samples" in productions[previous_job].pipeline.collect_assets(): + posterior_file = productions[previous_job].pipeline.collect_assets()['samples'] + defaults['initial_result_file'] = posterior_file[0] + return defaults + + def get_additional_files(self): + defaults = self.production.meta.get("scheduler", {}).get("additional files", []) + if self.production.dependencies: + productions = {} + for production in self.production.event.productions: + productions[production.name] = production + for previous_job in self.production.dependencies: + if "samples" in productions[previous_job].pipeline.collect_assets(): + posterior_file = productions[previous_job].pipeline.collect_assets()['samples'] + defaults.append(posterior_file[0]) + return defaults + + @auth.refresh_scitoken def build_dag(self, psds=None, user=None, clobber_psd=False, dryrun=False): """ @@ -126,19 +430,23 @@ def build_dag(self, psds=None, user=None, clobber_psd=False, dryrun=False): else: job_label = self.production.name - default_executable = os.path.join( - config.get("pipelines", "environment"), "bin", "bilby_pipe" - ) - executable = self.production.meta.get("executable", default_executable) - if (executable := shutil.which(executable)) is not None: - pass - elif (executable := shutil.which("bilby_pipe")) is not None: - pass - else: - raise PipelineException( - "Cannot find bilby_pipe executable", - production=self.production.name, + if not dryrun: + default_executable = os.path.join( + config.get("pipelines", "environment"), "bin", "bilby_pipe" ) + executable = self.production.meta.get("executable", default_executable) + if (executable := shutil.which(executable)) is not None: + pass + elif (executable := shutil.which("bilby_pipe")) is not None: + pass + else: + raise PipelineException( + "Cannot find bilby_pipe executable", + production=self.production.name, + ) + else: + executable = "bilby_pipe" + command = [ executable, ini, @@ -183,7 +491,7 @@ def build_dag(self, psds=None, user=None, clobber_psd=False, dryrun=False): def submit_dag(self, dryrun=False): """ - Submit a DAG file to the condor cluster. + Submit a DAG file to the scheduler. Parameters ---------- @@ -207,7 +515,10 @@ def submit_dag(self, dryrun=False): Notes ----- This overloads the default submission routine, as bilby seems to store - its DAG files in a different location + its DAG files in a different location. + + This method now uses the scheduler API for DAG submission, making it + scheduler-agnostic and easier to support multiple scheduling systems. """ cwd = os.getcwd() @@ -220,53 +531,51 @@ def submit_dag(self, dryrun=False): job_label = self.production.meta["job label"] else: job_label = self.production.name + dag_filename = f"dag_{job_label}.submit" - command = [ - # "ssh", f"{config.get('scheduler', 'server')}", - "condor_submit_dag", - "-batch-name", - f"bilby/{self.production.event.name}/{self.production.name}", - os.path.join(self.production.rundir, "submit", dag_filename), - ] + dag_path = os.path.join(self.production.rundir, "submit", dag_filename) + batch_name = f"bilby/{self.production.event.name}/{self.production.name}" if dryrun: - print(" ".join(command)) + print(f"Would submit DAG: {dag_path} with batch name: {batch_name}") else: - self.logger.info(f"Working in {os.getcwd()}") + self.logger.info(f"Submitting DAG: {dag_path}") - dagman = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - - self.logger.info(" ".join(command)) - - stdout, stderr = dagman.communicate() - - if "submitted to cluster" in str(stdout): - cluster = re.search( - r"submitted to cluster ([\d]+)", str(stdout) - ).groups()[0] + try: + # Use the scheduler API to submit the DAG + cluster_id = self.scheduler.submit_dag( + dag_file=dag_path, + batch_name=batch_name + ) + self.logger.info( - f"Submitted successfully. Running with job id {int(cluster)}" + f"Submitted successfully. Running with job id {int(cluster_id)}" ) self.production.status = "running" - self.production.job_id = int(cluster) - return cluster, PipelineLogger(stdout) - else: - self.logger.error("Could not submit the job to the cluster") - self.logger.info(stdout) - self.logger.error(stderr) - + self.production.job_id = int(cluster_id) + + # Create a mock stdout message for compatibility + stdout_msg = f"DAG submitted to cluster {cluster_id}" + return cluster_id, PipelineLogger(stdout_msg) + + except FileNotFoundError as error: + self.logger.error(f"DAG file not found: {dag_path}") + raise PipelineException( + f"The DAG file could not be found at {dag_path}.", + ) from error + except RuntimeError as error: + self.logger.error("Could not submit the job to the scheduler") + self.logger.exception(error) raise PipelineException( "The DAG file could not be submitted.", - ) + ) from error except FileNotFoundError as error: self.logger.exception(error) raise PipelineException( - "It looks like condor isn't installed on this system.\n" - f"""I wanted to run {" ".join(command)}.""" + "It looks like the scheduler isn't properly configured.\n" + f"Failed to submit DAG file: {dag_path}" ) from error def collect_assets(self): @@ -291,8 +600,8 @@ def samples(self, absolute=False): rundir = self.production.rundir self.logger.info(f"Rundir for samples: {rundir}") return glob.glob( - os.path.join(rundir, "result", "*_merge*_result.hdf5") - ) + glob.glob(os.path.join(rundir, "result", "*_merge*_result.json")) + os.path.join(rundir, "final_result", "*.hdf5") + ) + glob.glob(os.path.join(rundir, "final_result", "*.json")) def after_completion(self): post_pipeline = PESummary(production=self.production) diff --git a/asimov/pipelines/lalinference.py b/asimov/pipelines/lalinference.py index 19cdf123..9ee09b16 100644 --- a/asimov/pipelines/lalinference.py +++ b/asimov/pipelines/lalinference.py @@ -6,11 +6,76 @@ import re import subprocess +from typing import Dict, Any from asimov import config, logger from asimov.utils import set_directory from ..pipeline import Pipeline, PipelineException, PipelineLogger +from ..priors import PriorInterface + + +class LALInferencePriorInterface(PriorInterface): + """ + Prior interface for the LALInference pipeline. + + Converts asimov prior specifications into LALInference format. + LALInference uses different naming conventions and expects priors + as ranges (min/max values) rather than distribution objects. + """ + + def convert(self) -> Dict[str, Any]: + """ + Convert asimov priors to LALInference format. + + Returns + ------- + dict + Dictionary with LALInference-specific prior format + """ + if self.prior_dict is None: + return {} + + # Convert to LALInference format + # LALInference uses [min, max] arrays for ranges + lalinf_priors = {} + original_priors = self.prior_dict.to_dict() + + for param_name, param_spec in original_priors.items(): + if param_name == 'default': + continue + + if isinstance(param_spec, dict): + # Convert to LALInference range format + if 'minimum' in param_spec and 'maximum' in param_spec: + lalinf_priors[param_name] = [param_spec['minimum'], param_spec['maximum']] + else: + # Pass through as-is if not a min/max prior + lalinf_priors[param_name] = param_spec + else: + lalinf_priors[param_name] = param_spec + + return lalinf_priors + + def get_amp_order(self) -> int: + """ + Get the amplitude order for LALInference. + + Returns + ------- + int + Amplitude order (default: 0) + + Notes + ----- + Prefers 'amp order' but falls back to 'amplitude order' for backward compatibility. + """ + if self.prior_dict is None: + return 0 + + original_priors = self.prior_dict.to_dict() + # Prefer 'amp order' as the canonical name + return original_priors.get('amp order', original_priors.get('amplitude order', 0)) class LALInference(Pipeline): @@ -23,7 +88,7 @@ class LALInference(Pipeline): The production object. category : str, optional The category of the job. - Defaults to "C01_offline". + Defaults to "analyses". """ name = "lalinference" @@ -39,6 +104,20 @@ def __init__(self, production, category=None): ) if not production.pipeline.lower() == "lalinference": raise PipelineException("Pipeline mismatch") + + def get_prior_interface(self): + """ + Get the LALInference-specific prior interface. + + Returns + ------- + LALInferencePriorInterface + The prior interface for LALInference + """ + if self._prior_interface is None: + priors = self.production.priors + self._prior_interface = LALInferencePriorInterface(priors) + return self._prior_interface def detect_completion(self): """ @@ -171,13 +250,13 @@ def collect_logs(self): def submit_dag(self, dryrun=False): """ - Submit a DAG file to the condor cluster. + Submit a DAG file to the scheduler. Parameters ---------- category : str, optional The category of the job. - Defaults to "C01_offline". + Defaults to "analyses". production : str The production name. dryrun: bool @@ -204,40 +283,37 @@ def submit_dag(self, dryrun=False): self.before_submit(dryrun=dryrun) try: - command = [ - "condor_submit_dag", - "-batch-name", - f"lalinf/{self.production.event.name}/{self.production.name}", - os.path.join(self.production.rundir, "multidag.dag"), - ] + dag_path = os.path.join(self.production.rundir, "multidag.dag") + batch_name = f"lalinf/{self.production.event.name}/{self.production.name}" if dryrun: - print(" ".join(command)) + print(f"Would submit DAG: {dag_path} with batch name: {batch_name}") else: - dagman = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - - stdout, stderr = dagman.communicate() - - if "submitted to cluster" in str(stdout): - cluster = re.search( - r"submitted to cluster ([\d]+)", str(stdout) - ).groups()[0] + try: + # Use the scheduler API to submit the DAG + cluster_id = self.scheduler.submit_dag( + dag_file=dag_path, + batch_name=batch_name + ) + self.production.status = "running" - self.production.job_id = cluster - return cluster, PipelineLogger(stdout) - else: + self.production.job_id = cluster_id + + # Create a mock stdout message for compatibility + stdout_msg = f"DAG submitted to cluster {cluster_id}" + return cluster_id, PipelineLogger(stdout_msg) + + except (FileNotFoundError, RuntimeError) as error: raise PipelineException( - f"The DAG file could not be submitted.\n\n{stdout}\n\n{stderr}", + f"The DAG file could not be submitted: {error}", issue=self.production.event.issue_object, production=self.production.name, - ) + ) from error except FileNotFoundError as error: raise PipelineException( - "It looks like condor isn't installed on this system.\n" - f"""I wanted to run {" ".join(command)}.""" + "It looks like the scheduler isn't properly configured.\n" + f"Failed to submit DAG file: {dag_path}" ) from error def after_completion(self): diff --git a/asimov/pipelines/pesummary.py b/asimov/pipelines/pesummary.py index d636d198..15f93e78 100644 --- a/asimov/pipelines/pesummary.py +++ b/asimov/pipelines/pesummary.py @@ -1,13 +1,15 @@ """Defines the interface with generic analysis pipelines.""" +import configparser import os import warnings -warnings.filterwarnings("ignore", module="htcondor") - - -import htcondor # NoQA - +try: + warnings.filterwarnings("ignore", module="htcondor2") + import htcondor2 as htcondor # NoQA +except ImportError: + warnings.filterwarnings("ignore", module="htcondor") + import htcondor # NoQA from asimov import utils # NoQA from asimov import config, logger, logging, LOGGER_LEVEL # NoQA @@ -19,6 +21,10 @@ class PESummary(Pipeline): """ A postprocessing pipeline add-in using PESummary. + + This pipeline can work in two modes: + 1. Post-processing hook: Called after a single analysis completes (legacy mode) + 2. SubjectAnalysis: Processes results from multiple analyses as dependencies """ executable = os.path.join( @@ -27,11 +33,112 @@ class PESummary(Pipeline): name = "PESummary" def __init__(self, production, category=None): - self.production = production + """ + Initialize PESummary pipeline. + + Parameters + ---------- + production : Analysis + The analysis this pipeline is attached to. Can be a SimpleAnalysis + (for post-processing hook mode) or SubjectAnalysis (for multi-analysis mode) + category : str, optional + The category for file locations + """ + # Call parent constructor + super().__init__(production, category) + # Resolve executable, prefer explicit [pesummary] executable if provided + try: + pes_exec = config.get("pesummary", "executable") + if pes_exec: + self.executable = pes_exec + except (configparser.NoSectionError, configparser.NoOptionError): + # Fall back to pipelines environment path + pass + + self.analysis = production + + # Get subject/event - handle different analysis types + if hasattr(production, 'subject'): + self.event = self.subject = production.subject + elif hasattr(production, 'event'): + self.event = self.subject = production.event + else: + raise PipelineException( + "Production must have either 'subject' or 'event' attribute" + ) + + # Set category appropriately + if category: + self.category = category + elif hasattr(production, 'category'): + self.category = production.category + else: + self.category = config.get("general", "calibration_directory") + + # Get metadata - check different locations based on analysis type + if "postprocessing" in production.meta and self.name.lower() in production.meta["postprocessing"]: + self.meta = production.meta["postprocessing"][self.name.lower()] + elif hasattr(production, 'subject') and "postprocessing" in production.subject.meta: + # For SimpleAnalysis, check subject metadata + if self.name.lower() in production.subject.meta["postprocessing"]: + self.meta = production.subject.meta["postprocessing"][self.name.lower()] + else: + self.meta = {} + else: + self.meta = production.meta + + def collect_assets(self): + """ + Gather all of the results assets for this job. - self.category = category if category else production.category - self.logger = logger - self.meta = self.production.meta["postprocessing"][self.name.lower()] + For PESummary SubjectAnalysis jobs, this returns the combined results. + For PESummary post-processing jobs, this returns the samples and config. + + Returns + ------- + dict + A dictionary of assets with keys like 'samples', 'config', etc. + """ + # For PESummary as a SubjectAnalysis, return the combined samples + webroot = config.get("general", "webroot") + if not os.path.isabs(webroot): + webroot = os.path.join(config.get("project", "root"), webroot) + + # Path to the combined posterior samples file + samples_file = os.path.join( + webroot, + self.subject.name, + self.production.name, + "pesummary", + "samples", + "posterior_samples.h5" + ) + + assets = {} + + # Add samples if they exist + if os.path.exists(samples_file): + assets["samples"] = samples_file + + # For post-processing mode, also include the config + from asimov.analysis import SubjectAnalysis + if not isinstance(self.production, SubjectAnalysis): + try: + config_file = self.event.repository.find_prods( + self.production.name, self.category + )[0] + assets["config"] = config_file + except (AttributeError, IndexError): + # If the event or repository is missing, or no production config + # is found, skip adding a config asset but continue without error. + logger.debug( + "PESummary.collect_assets: no config found for production %s " + "in category %s", + getattr(self.production, "name", ""), + getattr(self, "category", ""), + ) + + return assets def results(self): """ @@ -55,51 +162,231 @@ def results(self): self.outputs = os.path.join( config.get("project", "root"), config.get("general", "webroot"), - self.subject.name, + self.name, ) - self.outputs = os.path.join(self.outputs, self.production.name) - self.outputs = os.path.join(self.outputs, "pesummary") + self.outputs = os.path.join(self.outputs, self.name, "pesummary") metafile = os.path.join(self.outputs, "samples", "posterior_samples.h5") return dict(metafile=metafile) + def build_dag(self, user=None, dryrun=False): + """ + Prepare the PESummary job for submission. + + For PESummary, there's no DAG file to build since it runs as a single job. + This method exists to satisfy the Pipeline interface. + + Parameters + ---------- + user : str, optional + The user accounting tag (not used by PESummary) + dryrun : bool, optional + If True, don't actually build anything + """ + # PESummary doesn't need a DAG file - it runs as a single condor job + # The actual job configuration is done in submit_dag + pass + def submit_dag(self, dryrun=False): """ Run PESummary on the results of this job. + + Supports two modes: + 1. Post-processing a single analysis (SimpleAnalysis) + 2. Combining multiple analyses (SubjectAnalysis) """ + # Determine if this is a SubjectAnalysis or SimpleAnalysis + from asimov.analysis import SubjectAnalysis + is_subject_analysis = isinstance(self.production, SubjectAnalysis) + + # Get config file(s) + # For SubjectAnalysis, configs are collected from dependencies above + # For SimpleAnalysis (post-processing hook), get the config from the production + configfile = None # Initialize to avoid unbound variable + if not is_subject_analysis: + try: + configfile = self.event.repository.find_prods( + self.production.name, self.category + )[0] + except (AttributeError, IndexError): # pragma: no cover + raise PipelineException( + "Could not find PESummary configuration file." + ) - configfile = self.production.event.repository.find_prods( - self.production.name, self.category - )[0] - label = str(self.production.name) + # Prefer assets from the current production; fall back to dependency assets + current_assets = {} + if not is_subject_analysis: + try: + current_assets = self.production.pipeline.collect_assets() + except (AttributeError, PipelineException): + # If the production has no pipeline or the pipeline fails in an + # expected way, fall back to using no current assets. + current_assets = {} + + # Determine labels and samples for PESummary + if is_subject_analysis: + # Multiple analyses - get labels and samples from dependencies + labels = [] + samples_list = [] + config_list = [] + approximants = [] + f_lows = [] + f_refs = [] + + # Get the analyses that are dependencies + # Prefer the current analyses list; fall back to productions if needed + if hasattr(self.production, 'analyses') and self.production.analyses: + source_analyses = self.production.analyses + elif hasattr(self.production, 'productions') and self.production.productions: + source_analyses = self.production.productions + else: + raise PipelineException( + "SubjectAnalysis PESummary has no source analyses to process." + ) + + for dep_analysis in source_analyses: + # Get samples and config directly from this analysis + dep_assets = dep_analysis.pipeline.collect_assets() + if not isinstance(dep_assets, dict): + self.logger.warning( + f"collect_assets for {dep_analysis.name} returned " + f"{type(dep_assets).__name__}, expected dict; skipping this analysis." + ) + continue + dep_samples = dep_assets.get("samples", None) + dep_config = dep_assets.get("config", None) + if dep_samples: + labels.append(dep_analysis.name) + samples_list.append(dep_samples) + + # Collect waveform parameters for this analysis + if "waveform" in dep_analysis.meta: + if "approximant" in dep_analysis.meta["waveform"]: + approximants.append(dep_analysis.meta["waveform"]["approximant"]) + if "reference frequency" in dep_analysis.meta["waveform"]: + f_refs.append(str(dep_analysis.meta["waveform"]["reference frequency"])) + + if "waveform" in dep_analysis.meta: + if "minimum frequency" in dep_analysis.meta["waveform"]: + min_freq = dep_analysis.meta["waveform"]["minimum frequency"] + if isinstance(min_freq, dict) and min_freq: + f_lows.append(str(min(min_freq.values()))) + else: + self.logger.warning( + f"Invalid minimum frequency format in {dep_analysis.name}, skipping" + ) + + # Config file should be added for each analysis that has samples + if dep_config: + # Convert to absolute path if needed + if isinstance(dep_config, str): + config_path = os.path.join( + self.event.repository.directory, + dep_analysis.category if hasattr(dep_analysis, 'category') else self.category, + dep_config + ) + config_list.append(config_path) + elif isinstance(dep_config, list): + # If it's a list, handle each config file + for cfg in dep_config: + config_path = os.path.join( + self.event.repository.directory, + dep_analysis.category if hasattr(dep_analysis, 'category') else self.category, + cfg + ) + config_list.append(config_path) + else: + config_list.append(dep_config) + else: + self.logger.warning(f"No config found for {dep_analysis.name}") + else: + self.logger.warning(f"No samples found for {dep_analysis.name}") + + if not samples_list: + raise PipelineException( + "No samples found from any dependency analyses." + ) + + # Persist resolved dependencies so we can detect staleness later + try: + self.production.resolved_dependencies = labels + self.logger.info(f"Stored resolved dependencies: {labels}") + except Exception as e: + self.logger.error(f"Failed to store resolved_dependencies: {e}") + raise PipelineException(f"Could not store resolved dependencies: {e}") from e + + # Ensure that the run directory exists (race-free) + os.makedirs(self.production.rundir, exist_ok=True) + + # For SubjectAnalysis, use metadata from the production itself + # Individual analysis waveform settings are collected above + waveform_meta = self.production.meta.get("waveform", {}) + quality_meta = self.production.meta.get("quality", {}) + else: + # Single analysis mode (post-processing) + labels = [self.production.name] + if "samples" in current_assets and current_assets["samples"]: + samples_list = [current_assets["samples"]] + else: + samples_list = [self.production._previous_assets().get("samples", {})] + waveform_meta = self.production.meta.get("waveform", {}) + quality_meta = self.production.meta.get("quality", {}) command = [ "--webdir", os.path.join( config.get("project", "root"), config.get("general", "webroot"), - self.production.event.name, + self.subject.name, self.production.name, "pesummary", ), "--labels", - label, ] + command.extend(labels) command += ["--gw"] - command += [ - "--approximant", - self.production.meta["waveform"]["approximant"], - ] - - command += [ - "--f_low", - str(min(self.production.meta["quality"]["minimum frequency"].values())), - "--f_ref", - str(self.production.meta["waveform"]["reference frequency"]), - ] + + # Add waveform settings if available + # For SubjectAnalysis with multiple approximants, pass them as a list + if is_subject_analysis and approximants: + command += ["--approximant"] + command.extend(approximants) + elif "approximant" in waveform_meta: + command += [ + "--approximant", + waveform_meta["approximant"], + ] + + # f_low - use per-analysis values if available, otherwise use global + if is_subject_analysis and f_lows: + # If we have per-analysis f_low values, use them + command += ["--f_low"] + command.extend(f_lows) + elif "minimum frequency" in waveform_meta: + min_freq = waveform_meta["minimum frequency"] + if isinstance(min_freq, dict) and min_freq: + command += [ + "--f_low", + str(min(min_freq.values())), + ] + else: + raise ValueError( + "Minimum frequency in 'waveform' section must be a non-empty dictionary " + "mapping interferometer names to frequency values." + ) + + # f_ref - use per-analysis values if available, otherwise use global + if is_subject_analysis and f_refs: + command += ["--f_ref"] + command.extend(f_refs) + elif "reference frequency" in waveform_meta: + command += [ + "--f_ref", + str(waveform_meta["reference frequency"]), + ] if "cosmology" in self.meta: command += [ @@ -120,7 +407,7 @@ def submit_dag(self, dryrun=False): if "backwards" in self.meta["evolve spins"]: command += ["--evolve_spins_backwards", "precession_averaged"] - if "nrsur" in self.production.meta["waveform"]["approximant"].lower(): + if "nrsur" in waveform_meta.get("approximant", "").lower(): command += ["--NRSur_fits"] if "multiprocess" in self.meta: @@ -133,47 +420,141 @@ def submit_dag(self, dryrun=False): if "precessing snr" in self.meta["calculate"]: command += ["--calculate_precessing_snr"] - # Config file - command += [ - "--config", - os.path.join( - self.production.event.repository.directory, self.category, configfile - ), - ] - # Samples + # Handle additional arguments - supports both flags and options with values + # This allows passing arbitrary PESummary command-line arguments via the blueprint. + # + # Supported formats in the blueprint: + # + # 1. Dictionary format (for options with values): + # additional arguments: + # nsamples: 1000 + # seed: 42 + # custom_option: "some_value" + # Result: --nsamples 1000 --seed 42 --custom_option some_value + # + # 2. List format (for flags without values): + # additional arguments: ["disable_prior_sampling", "no_ligo_skymap"] + # Result: --disable_prior_sampling --no_ligo_skymap + # + # 3. Mixed list format (flags and options): + # additional arguments: + # - "disable_prior_sampling" # flag + # - {nsamples: 1000} # option with value + # - "no_ligo_skymap" # flag + # - {seed: 42} # option with value + # Result: --disable_prior_sampling --nsamples 1000 --no_ligo_skymap --seed 42 + # + # Note: Options with None or empty string values will only add the flag without a value. + if "additional arguments" in self.meta: + additional_args = self.meta["additional arguments"] + + # If it's a dictionary, treat keys as options and values as their arguments + if isinstance(additional_args, dict): + for key, value in additional_args.items(): + command += [f"--{key}"] + if value is not None and value != "": + # Only add the value if it's not None or empty string + command += [str(value)] + + # If it's a list, each item can be a flag (string) or option (dict) + elif isinstance(additional_args, list): + for arg in additional_args: + if isinstance(arg, str): + # Simple flag + command += [f"--{arg}"] + elif isinstance(arg, dict): + # Option with value(s) + for key, value in arg.items(): + command += [f"--{key}"] + if value is not None and value != "": + command += [str(value)] + + # Samples - handle both single and multiple analyses command += ["--samples"] - command += [self.production._previous_assets().get("samples", {})] + if is_subject_analysis: + # Multiple samples files + for samples in samples_list: + if isinstance(samples, dict): + # If samples is a dict, log a warning and skip + self.logger.warning( + f"Unexpected dict format for samples in SubjectAnalysis: {samples}" + ) + continue + elif isinstance(samples, list): + command.extend(samples) + else: + command.append(samples) + else: + # Single samples file + samples = samples_list[0] + if isinstance(samples, list): + command.extend(samples) + elif isinstance(samples, str): + command.append(samples) + else: + # Dict or other - try to convert to string + self.logger.warning( + f"Unexpected format for samples: {type(samples)}, converting to string" + ) + command.append(str(samples)) + + # Config files - handle both single and multiple analyses + command += ["--config"] + if is_subject_analysis: + # Multiple config files from dependencies + if config_list: + command.extend(config_list) + else: + self.logger.warning("No config files found from dependency analyses") + else: + # Single config file for post-processing mode + if configfile is not None: + command.append( + os.path.join( + self.event.repository.directory, self.category, configfile + ) + ) + else: + raise PipelineException("No config file available for PESummary") - # PSDs + # PSDs - get from first analysis in SubjectAnalysis mode or from this production + if is_subject_analysis and source_analyses: + psds = source_analyses[0].pipeline.collect_assets().get("psds", {}) + else: + psds = current_assets.get("psds", {}) or self.production._previous_assets().get("psds", {}) + psds = { ifo: os.path.abspath(psd) - for ifo, psd in self.production._previous_assets().get("psds", {}).items() + for ifo, psd in psds.items() } if len(psds) > 0: command += ["--psds"] for key, value in psds.items(): command += [f"{key}:{value}"] - # Calibration envelopes + # Calibration envelopes - get from first analysis in SubjectAnalysis mode or from this production + if is_subject_analysis and source_analyses: + cals = source_analyses[0].pipeline.collect_assets().get("calibration", {}) + else: + cals = current_assets.get("calibration", {}) or self.production._previous_assets().get("calibration", {}) + cals = { - ifo: os.path.abspath(psd) - for ifo, psd in self.production._previous_assets() - .get("calibration", {}) - .items() + ifo: os.path.abspath(cal) + for ifo, cal in cals.items() } if len(cals) > 0: command += ["--calibration"] for key, value in cals.items(): command += [f"{key}:{value}"] - with utils.set_directory(self.subject.work_dir): + with utils.set_directory(self.production.rundir): with open("pesummary.sh", "w") as bash_file: bash_file.write(f"{self.executable} " + " ".join(command)) self.logger.info( f"PE summary command: {self.executable} {' '.join(command)}", ) - + if dryrun: print("PESUMMARY COMMAND") print("-----------------") @@ -182,9 +563,9 @@ def submit_dag(self, dryrun=False): submit_description = { "executable": self.executable, "arguments": " ".join(command), - "output": f"{self.subject.work_dir}/pesummary.out", - "error": f"{self.subject.work_dir}/pesummary.err", - "log": f"{self.subject.work_dir}/pesummary.log", + "output": f"{self.production.rundir}/pesummary.out", + "error": f"{self.production.rundir}/pesummary.err", + "log": f"{self.production.rundir}/pesummary.log", "request_cpus": self.meta["multiprocess"], "getenv": "true", "batch_name": f"Summary Pages/{self.subject.name}/{self.production.name}", @@ -202,19 +583,20 @@ def submit_dag(self, dryrun=False): print(submit_description) if not dryrun: - hostname_job = htcondor.Submit(submit_description) + job = htcondor.Submit(submit_description) try: - # There should really be a specified submit node, and if there is, use it. schedulers = htcondor.Collector().locate( htcondor.DaemonTypes.Schedd, config.get("condor", "scheduler") ) - schedd = htcondor.Schedd(schedulers) - except: # NoQA - # If you can't find a specified scheduler, use the first one you find - schedd = htcondor.Schedd() - with schedd.transaction() as txn: - cluster_id = hostname_job.queue(txn) + except (configparser.NoOptionError, configparser.NoSectionError): + schedulers = htcondor.Collector().locate(htcondor.DaemonTypes.Schedd) + + schedd = htcondor.Schedd(schedulers) + + result = schedd.submit(job) + cluster_id = result.cluster() + self.logger.info(f"Submitted {cluster_id} to htcondor job queue.") else: cluster_id = 0 diff --git a/asimov/pipelines/rift.py b/asimov/pipelines/rift.py index 7c2d6dbd..192cc8ac 100644 --- a/asimov/pipelines/rift.py +++ b/asimov/pipelines/rift.py @@ -25,7 +25,7 @@ class Rift(Pipeline): The production object. category : str, optional The category of the job. - Defaults to "C01_offline". + Defaults to "analyses". """ name = "RIFT" @@ -281,7 +281,7 @@ def build_dag(self, user=None, dryrun=False): if self.production.event.repository: bootstrap_file = os.path.join( self.production.event.repository.directory, - "C01_offline", + "analyses", f"{self.production.name}_bootstrap.xml.gz", ) else: @@ -350,14 +350,14 @@ def build_dag(self, user=None, dryrun=False): def submit_dag(self, dryrun=False): """ - Submit a DAG file to the condor cluster (using the RIFT dag name). + Submit a DAG file to the scheduler (using the RIFT dag name). This is an overwrite of the near identical parent function submit_dag() Parameters ---------- category : str, optional The category of the job. - Defaults to "C01_offline". + Defaults to "analyses". production : str The production name. @@ -378,18 +378,14 @@ def submit_dag(self, dryrun=False): ifo = psdfile.split("/")[-1].split("-")[1].split(".")[0] os.system(f"cp {psdfile} {ifo}-psd.xml.gz") - command = [ - "condor_submit_dag", - "-batch-name", - f"rift/{self.production.event.name}/{self.production.name}", - "marginalize_intrinsic_parameters_BasicIterationWorkflow.dag", - ] + dag_filename = "marginalize_intrinsic_parameters_BasicIterationWorkflow.dag" + batch_name = f"rift/{self.production.event.name}/{self.production.name}" if dryrun: for psdfile in self.production.get_psds("xml"): print(f"cp {psdfile} {self.production.rundir}/{psdfile.split('/')[-1]}") print("") - print(" ".join(command)) + print(f"Would submit DAG: {dag_filename} with batch name: {batch_name}") else: for psdfile in self.production.get_psds("xml"): os.system( @@ -398,32 +394,31 @@ def submit_dag(self, dryrun=False): try: with set_directory(self.production.rundir): - - dagman = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + # Use the scheduler API to submit the DAG + cluster_id = self.scheduler.submit_dag( + dag_file=dag_filename, + batch_name=batch_name ) - self.logger.info(command, production=self.production) + + self.logger.info(f"Submitted DAG to cluster {cluster_id}", production=self.production) + self.production.status = "running" + self.production.job_id = int(cluster_id) + + # Create a mock stdout message for compatibility + stdout_msg = f"DAG submitted to cluster {cluster_id}" + return cluster_id, PipelineLogger(stdout_msg) + except FileNotFoundError as exception: raise PipelineException( - "It looks like condor isn't installed on this system.\n" - f"""I wanted to run {" ".join(command)}.""" + "It looks like the scheduler isn't properly configured.\n" + f"Failed to submit DAG file: {dag_filename}" ) from exception - - stdout, stderr = dagman.communicate() - - if "submitted to cluster" in str(stdout): - cluster = re.search( - r"submitted to cluster ([\d]+)", str(stdout) - ).groups()[0] - self.production.status = "running" - self.production.job_id = int(cluster) - return cluster, PipelineLogger(stdout) - else: + except RuntimeError as exception: raise PipelineException( - f"The DAG file could not be submitted.\n\n{stdout}\n\n{stderr}", + f"The DAG file could not be submitted: {exception}", issue=self.production.event.issue_object, production=self.production.name, - ) + ) from exception def resurrect(self): """ diff --git a/asimov/pipelines/testing/README.md b/asimov/pipelines/testing/README.md new file mode 100644 index 00000000..36d41e83 --- /dev/null +++ b/asimov/pipelines/testing/README.md @@ -0,0 +1,248 @@ +# Testing Pipelines for Asimov + +This directory contains minimal testing pipelines for asimov's three analysis types. These pipelines are designed for testing asimov's infrastructure without requiring real gravitational wave analysis pipelines, and also serve as templates for pipeline developers. + +## Overview + +The testing pipelines provide minimal implementations for: + +1. **SimpleAnalysis** (`simple.py`) - Single-event, single-pipeline analysis +2. **SubjectAnalysis** (`subject.py`) - Single-event, multi-pipeline analysis +3. **ProjectAnalysis** (`project.py`) - Multi-event, multi-pipeline analysis + +## Purpose + +These pipelines serve two important purposes: + +### 1. Testing Infrastructure +- Enable end-to-end testing of asimov without real analysis pipelines +- Complete quickly (seconds instead of hours) +- Create dummy output files that mimic real pipeline outputs +- Allow testing of workflow management, job submission, and monitoring + +### 2. Developer Templates +- Provide well-documented examples of pipeline implementation +- Show the minimum required methods and their signatures +- Demonstrate proper use of the Pipeline base class +- Illustrate best practices for each analysis type + +## Usage + +### In Tests + +The testing pipelines are used in asimov's test suite and CI/CD pipelines: + +```yaml +# Example test configuration +kind: analysis +name: test-simple +pipeline: simpletestpipeline +status: ready +``` + +See `tests/test_blueprints/` for complete examples. + +### As Templates + +Pipeline developers can use these as starting points: + +```python +from asimov.pipeline import Pipeline + +class MyPipeline(Pipeline): + """My new pipeline implementation.""" + + name = "MyPipeline" + STATUS = {"wait", "stuck", "stopped", "running", "finished"} + + def __init__(self, production, category=None): + super(MyPipeline, self).__init__(production, category) + # Initialize your pipeline + + def submit_dag(self, dryrun=False): + # Submit your job to the cluster + # Return the job ID + pass + + def detect_completion(self): + # Check if the job has finished + # Return True/False + pass + + # Implement other required methods... +``` + +## Pipeline Descriptions + +### SimpleTestPipeline + +The simplest testing pipeline for basic analyses on single events. + +**Features:** +- Creates a basic job script +- Generates dummy output files (results.dat, posterior_samples.dat) +- Completes immediately (no actual computation) +- Returns a fixed job ID for testing (12345) + +**Use cases:** +- Testing basic job submission and monitoring +- Template for parameter estimation pipelines +- Integration testing of asimov core functionality + +### SubjectTestPipeline + +Testing pipeline for analyses that combine multiple simple analyses. + +**Features:** +- Depends on SimpleAnalysis results +- Creates combined output files +- Logs information about dependent analyses +- Returns job ID 23456 + +**Use cases:** +- Testing dependency management +- Template for meta-analysis pipelines +- Testing multi-analysis workflows + +### ProjectTestPipeline + +Testing pipeline for population/catalog analyses across multiple events. + +**Features:** +- Operates on multiple subjects (events) +- Can filter and combine analyses across events +- Creates population-level outputs +- Returns job ID 34567 + +**Use cases:** +- Testing project-level analyses +- Template for population studies +- Testing multi-event workflows + +## Implementation Details + +### Required Methods + +All pipelines must implement: + +- `__init__(production, category=None)` - Initialize the pipeline +- `submit_dag(dryrun=False)` - Submit the job, return job ID +- `detect_completion()` - Check if job finished, return bool + +### Optional Methods + +Pipelines may override: + +- `before_submit(dryrun=False)` - Pre-submission setup +- `before_build(dryrun=False)` - Pre-build setup +- `after_completion()` - Post-processing after job finishes +- `samples(absolute=False)` - Return paths to output samples +- `collect_assets()` - Return dict of output files for version control +- `collect_logs()` - Return log information + +### Output Files + +Each pipeline creates specific output files: + +**SimpleTestPipeline:** +- `test_job.sh` - Job script + +- `results.dat` - Analysis results +- `posterior_samples.dat` - Sample outputs + +**SubjectTestPipeline:** +- `test_subject_job.sh` - Job script +- `combined_results.dat` - Combined results +- `combined_samples.dat` - Combined samples + +**ProjectTestPipeline:** +- `test_project_job.sh` - Job script +- `population_results.dat` - Population results +- `population_samples.dat` - Population samples + +## Installation + +The testing pipelines are registered via entry points and are available when asimov is installed: + +```bash +pip install asimov[testing] +``` + +They are automatically discovered by asimov's pipeline discovery mechanism. + +## Testing + +Run the testing pipeline tests: + +```bash +python -m unittest tests.test_pipelines.test_testing_pipelines +``` + +Or use the GitHub Actions workflow that tests all three pipelines with HTCondor: + +```bash +# See .github/workflows/testing-pipelines.yml +``` + +## Examples + +### Creating a Simple Analysis + +```python +from asimov.analysis import SimpleAnalysis +from asimov.event import Event + +event = Event("GW150914_095045", ledger=ledger) +analysis = SimpleAnalysis( + subject=event, + name="test-run", + pipeline="simpletestpipeline", + status="ready" +) + +# Submit the job +job_id = analysis.pipeline.submit_dag() + +# Check completion +if analysis.pipeline.detect_completion(): + analysis.pipeline.after_completion() +``` + +### Creating a Subject Analysis + +```yaml +kind: subject_analysis +name: combine-results +pipeline: subjecttestpipeline +needs: + - analysis1 + - analysis2 +``` + +### Creating a Project Analysis + +```yaml +kind: project_analysis +name: population-study +pipeline: projecttestpipeline +subjects: + - GW150914_095045 + - GW151226_033853 +analyses: + - status: finished +``` + +## Contributing + +When adding new features to asimov that affect pipelines: + +1. Update the testing pipelines to support the feature +2. Add tests using the testing pipelines +3. Update this documentation + +## See Also + +- [asimov.pipeline.Pipeline](../../asimov/pipeline.py) - Base pipeline class +- [asimov.analysis](../../asimov/analysis.py) - Analysis type definitions +- [tests/test_pipelines/](../../tests/test_pipelines/) - Pipeline tests +- [asimov documentation](https://asimov.docs.ligo.org/asimov) - Full documentation diff --git a/asimov/pipelines/testing/__init__.py b/asimov/pipelines/testing/__init__.py new file mode 100644 index 00000000..b846a9dc --- /dev/null +++ b/asimov/pipelines/testing/__init__.py @@ -0,0 +1,63 @@ +""" +Testing pipelines for asimov. + +This module provides minimal testing pipelines for each of the three +analysis types supported by asimov: + +- SimpleAnalysis: Single-event, single-pipeline analysis +- SubjectAnalysis: Single-event, multi-pipeline analysis +- ProjectAnalysis: Multi-event, multi-pipeline analysis + +These pipelines are designed for: + +1. **Testing Infrastructure**: Running end-to-end tests of asimov without + requiring real gravitational wave analysis pipelines. + +2. **Template/Examples**: Serving as documented examples for pipeline + developers to use as starting points for new pipeline implementations. + +The testing pipelines complete quickly, create dummy output files, and +implement all required pipeline methods without performing actual analyses. + +Usage +----- +These pipelines are discoverable via asimov's standard pipeline discovery +mechanism using entry points. To use them, specify the pipeline name in +your ledger configuration: + +.. code-block:: yaml + + # For SimpleAnalysis + kind: analysis + pipeline: simpletestpipeline + + # For SubjectAnalysis + kind: subject_analysis + pipeline: subjecttestpipeline + + # For ProjectAnalysis + kind: project_analysis + pipeline: projecttestpipeline + +Installation +------------ +The testing pipelines are only installed when asimov is installed with +the testing optional dependency: + +.. code-block:: bash + + pip install asimov[testing] + +This ensures they don't add unnecessary dependencies for production use. + +See Also +-------- +asimov.pipeline.Pipeline : Base pipeline class +asimov.analysis : Analysis type definitions +""" + +from .simple import SimpleTestPipeline +from .subject import SubjectTestPipeline +from .project import ProjectTestPipeline + +__all__ = ['SimpleTestPipeline', 'SubjectTestPipeline', 'ProjectTestPipeline'] diff --git a/asimov/pipelines/testing/project.py b/asimov/pipelines/testing/project.py new file mode 100644 index 00000000..2cf8c488 --- /dev/null +++ b/asimov/pipelines/testing/project.py @@ -0,0 +1,357 @@ +""" +Minimal testing pipeline for ProjectAnalysis. + +This pipeline is designed to test asimov's ProjectAnalysis infrastructure, +which operates across multiple events/subjects. It provides a minimal +implementation ideal for testing and as a template for population analyses. +""" + +import os + +from pathlib import Path + +from ...pipeline import Pipeline + + +class ProjectTestPipeline(Pipeline): + """ + A minimal testing pipeline for ProjectAnalysis. + + This pipeline implements the minimum required functionality for testing + asimov's ProjectAnalysis infrastructure. ProjectAnalyses operate across + multiple subjects (events), making them suitable for population analyses, + catalog studies, or any analysis requiring data from multiple events. + + This pipeline serves two purposes: + 1. Testing asimov's ProjectAnalysis infrastructure + 2. Providing a template for developers creating population or catalog + analysis pipelines + + Parameters + ---------- + production : :class:`asimov.analysis.ProjectAnalysis` + The project analysis object. + category : str, optional + The category of the job. + + Examples + -------- + To use this pipeline in a ledger configuration: + + .. code-block:: yaml + + kind: project_analysis + name: test-population + pipeline: projecttestpipeline + status: ready + subjects: + - Event1 + - Event2 + analyses: + - status:finished + + Notes + ----- + This pipeline creates a combined output file that references all + subjects and their analyses, simulating a population study. + """ + + name = "ProjectTestPipeline" + STATUS = {"wait", "stuck", "stopped", "running", "finished"} + + def __init__(self, production, category=None): + """ + Initialize the ProjectTestPipeline. + + Parameters + ---------- + production : :class:`asimov.analysis.ProjectAnalysis` + The project analysis object this pipeline will run for. + category : str, optional + The category of the job. + """ + super().__init__(production, category) + self.logger.info("Using the ProjectTestPipeline for testing") + + def _ensure_rundir(self): + """ + Ensure the run directory exists. + + Returns + ------- + bool + True if rundir exists or was created, False if no rundir is configured. + """ + if not self.production.rundir: + return False + Path(self.production.rundir).mkdir(parents=True, exist_ok=True) + return True + + def build_dag(self, user=None, dryrun=False): + """ + Build the DAG for this project analysis pipeline. + + Creates a HTCondor submit file and DAG file that will run a simple + test job on the scheduler. + + Parameters + ---------- + user : str, optional + The user account for job submission. + dryrun : bool, optional + If True, only simulate the build without creating files. + + Returns + ------- + None + """ + if not dryrun: + if self._ensure_rundir(): + # Create a simple job script that will create results + job_script = os.path.join(self.production.rundir, "test_project_job.sh") + results_file = os.path.join(self.production.rundir, "population_results.dat") + with open(job_script, "w") as f: + f.write("#!/bin/bash\n") + f.write("# Project analysis test pipeline job\n") + f.write("set -e\n") + f.write("echo 'Processing analyses across multiple subjects'\n") + f.write(f"echo 'Working directory: {self.production.rundir}'\n") + f.write("echo 'Current directory:' $(pwd)\n") + f.write("sleep 2\n") + f.write("# Create the results file with absolute path\n") + f.write(f"cat > {results_file} << 'EOF'\n") + f.write("# Project analysis test pipeline results\n") + f.write("# Population/catalog analysis\n") + f.write("population_rate: 10.5\n") + f.write("rate_uncertainty: 2.3\n") + f.write("selection_effects: 0.85\n") + f.write("EOF\n") + f.write(f"echo 'Project analysis complete - {results_file} created'\n") + f.write("ls -la\n") + + # Make script executable + os.chmod(job_script, 0o755) + + # Create HTCondor submit file + submit_file = os.path.join(self.production.rundir, "test_project_job.sub") + with open(submit_file, "w") as f: + f.write("# HTCondor submit file for ProjectTestPipeline\n") + f.write("universe = vanilla\n") + f.write(f"executable = {job_script}\n") + f.write(f"initialdir = {self.production.rundir}\n") + f.write("output = test_project_job.out\n") + f.write("error = test_project_job.err\n") + f.write("log = test_project_job.log\n") + f.write("getenv = True\n") + f.write("queue 1\n") + + # Create a minimal DAG file + dag_file = os.path.join(self.production.rundir, "test_project.dag") + with open(dag_file, "w") as f: + f.write("# Project test pipeline DAG\n") + f.write("JOB test_project_job test_project_job.sub\n") + + self.logger.info(f"Built project test DAG in {self.production.rundir}") + else: + self.logger.warning("No run directory specified, cannot build DAG") + else: + self.logger.info("Dry run: would build project test DAG") + + def submit_dag(self, dryrun=False): + """ + Submit the pipeline job to HTCondor. + + This submits the DAG file to HTCondor so the job actually runs + on the scheduler and creates the results file. + + Parameters + ---------- + dryrun : bool, optional + If True, only simulate the submission. + + Returns + ------- + int + The HTCondor cluster ID. + """ + import subprocess + import re + + if not self.production.rundir: + self.logger.warning("No run directory specified") + return None + + # Build the DAG first + self.build_dag(dryrun=dryrun) + + self.before_submit(dryrun=dryrun) + + dag_file = "test_project.dag" + + command = [ + "condor_submit_dag", + "-batch-name", + f"test-project/{self.production.name}", + dag_file + ] + + self.logger.info(f"Submitting project DAG: {' '.join(command)}") + + if dryrun: + print(f"Would run: {' '.join(command)}") + return 34567 + else: + # Change to run directory before submitting + original_dir = os.getcwd() + os.chdir(self.production.rundir) + + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True + ) + + self.logger.info(f"Project DAG submitted successfully") + self.logger.debug(f"Output: {result.stdout}") + + # Extract cluster ID from output + match = re.search(r'submitted to cluster (\d+)', result.stdout) + if match: + cluster_id = int(match.group(1)) + self.logger.info(f"Cluster ID: {cluster_id}") + return cluster_id + else: + self.logger.warning("Could not extract cluster ID from condor_submit_dag output") + return None + + except subprocess.CalledProcessError as e: + self.logger.error(f"Failed to submit project DAG: {e}") + self.logger.error(f"stderr: {e.stderr}") + raise + finally: + os.chdir(original_dir) + + def detect_completion(self): + """ + Check if the project analysis has completed. + + Returns + ------- + bool + True if the job has completed, False otherwise. + """ + if not self.production.rundir: + return False + + completion_file = os.path.join(self.production.rundir, "population_results.dat") + return os.path.exists(completion_file) + + def before_submit(self, dryrun=False): + """ + Prepare the job before submission. + + This checks that required subjects and analyses are available + and creates the run directory. + + Parameters + ---------- + dryrun : bool, optional + If True, only simulate the preparation. + """ + if not dryrun and self._ensure_rundir(): + # Log information about subjects and analyses + if hasattr(self.production, '_subjects'): + self.logger.info( + f"Project analysis across {len(self.production._subjects)} subjects" + ) + for subject in self.production._subjects: + self.logger.info(f" - Subject: {subject}") + + if hasattr(self.production, 'analyses'): + self.logger.info( + f"Combining {len(self.production.analyses)} total analyses" + ) + + self.logger.info(f"Prepared run directory: {self.production.rundir}") + + def after_completion(self): + """ + Post-processing after job completion. + + This creates a population/catalog results file referencing all + input subjects and analyses. + """ + if self.production.rundir: + # Create a population results file + results_file = os.path.join(self.production.rundir, "population_results.dat") + if not os.path.exists(results_file): + with open(results_file, "w") as f: + f.write("# Project analysis test pipeline results\n") + f.write("# Population/catalog analysis\n") + + if hasattr(self.production, '_subjects'): + f.write(f"# Number of subjects: {len(self.production._subjects)}\n") + for i, subject in enumerate(self.production._subjects): + f.write(f"# Subject {i+1}: {subject}\n") + + if hasattr(self.production, 'analyses'): + f.write(f"# Total analyses: {len(self.production.analyses)}\n") + + f.write("population_rate: 10.5\n") + f.write("rate_uncertainty: 2.3\n") + f.write("selection_effects: 0.85\n") + + super().after_completion() + + def samples(self, absolute=False): + """ + Return the location of population samples. + + Parameters + ---------- + absolute : bool, optional + If True, return absolute paths. + + Returns + ------- + list + List of paths to population sample files. + """ + if not self.production.rundir: + return [] + + # Ensure directory exists + self._ensure_rundir() + + samples_file = os.path.join(self.production.rundir, "population_samples.dat") + + # Create dummy population samples file + if not os.path.exists(samples_file): + with open(samples_file, "w") as f: + f.write("# rate mass_distribution\n") + f.write("10.5 1.0\n") + f.write("11.2 1.1\n") + f.write("9.8 0.9\n") + + if absolute: + return [os.path.abspath(samples_file)] + else: + return [samples_file] + + def collect_assets(self): + """ + Collect analysis assets for version control. + + Returns + ------- + dict + Dictionary of assets produced by this pipeline. + """ + assets = {} + if self.production.rundir: + results = os.path.join(self.production.rundir, "population_results.dat") + if os.path.exists(results): + assets['population_results'] = results + return assets diff --git a/asimov/pipelines/testing/simple.py b/asimov/pipelines/testing/simple.py new file mode 100644 index 00000000..dcbf2219 --- /dev/null +++ b/asimov/pipelines/testing/simple.py @@ -0,0 +1,327 @@ +""" +Minimal testing pipeline for SimpleAnalysis. + +This pipeline is designed to be used for testing asimov's infrastructure +without requiring a real gravitational wave analysis pipeline. +It provides a minimal implementation that completes quickly, making it +ideal for end-to-end testing and as a template for pipeline developers. +""" + +import os + +from pathlib import Path + +from ...pipeline import Pipeline + + +class SimpleTestPipeline(Pipeline): + """ + A minimal testing pipeline for SimpleAnalysis. + + This pipeline implements the minimum required functionality for testing + asimov's infrastructure. It creates dummy output files and completes + quickly without performing any actual analysis. + + This pipeline serves two purposes: + 1. Testing asimov's infrastructure without running real analyses + 2. Providing a template for developers creating new pipelines + + Parameters + ---------- + production : :class:`asimov.analysis.SimpleAnalysis` + The production/analysis object. + category : str, optional + The category of the job. + + Examples + -------- + To use this pipeline in a ledger configuration: + + .. code-block:: yaml + + kind: analysis + name: test-simple + pipeline: simpletestpipeline + status: ready + + Notes + ----- + This pipeline creates a simple output file in the run directory + to simulate a completed analysis. + """ + + name = "SimpleTestPipeline" + STATUS = {"wait", "stuck", "stopped", "running", "finished"} + + def __init__(self, production, category=None): + """ + Initialize the SimpleTestPipeline. + + Parameters + ---------- + production : :class:`asimov.analysis.SimpleAnalysis` + The production object this pipeline will run for. + category : str, optional + The category of the job (e.g., calibration version). + """ + super().__init__(production, category) + self.logger.info("Using the SimpleTestPipeline for testing") + + def _ensure_rundir(self): + """ + Ensure the run directory exists. + + Returns + ------- + bool + True if rundir exists or was created, False if no rundir is configured. + """ + if not self.production.rundir: + return False + Path(self.production.rundir).mkdir(parents=True, exist_ok=True) + return True + + def build_dag(self, user=None, dryrun=False): + """ + Build the DAG for this pipeline. + + Creates a HTCondor submit file and DAG file that will run a simple + test job on the scheduler. + + Parameters + ---------- + user : str, optional + The user account for job submission. + dryrun : bool, optional + If True, only simulate the build without creating files. + + Returns + ------- + None + """ + if not dryrun: + if self._ensure_rundir(): + # Create a simple job script that will create results + job_script = os.path.join(self.production.rundir, "test_job.sh") + results_file = os.path.join(self.production.rundir, "results.dat") + with open(job_script, "w") as f: + f.write("#!/bin/bash\n") + f.write("# Simple test pipeline job\n") + f.write("set -e\n") + f.write("echo 'Test job running'\n") + f.write(f"echo 'Working directory: {self.production.rundir}'\n") + f.write("echo 'Current directory:' $(pwd)\n") + f.write("sleep 2\n") + f.write("# Create the results file with absolute path\n") + f.write(f"cat > {results_file} << 'EOF'\n") + f.write("# Test pipeline results\n") + f.write("test_parameter: 1.0\n") + f.write("test_error: 0.1\n") + f.write("EOF\n") + f.write(f"echo 'Test job complete - {results_file} created'\n") + f.write("ls -la\n") + + # Make script executable + os.chmod(job_script, 0o755) + + # Create HTCondor submit file + submit_file = os.path.join(self.production.rundir, "test_job.sub") + with open(submit_file, "w") as f: + f.write("# HTCondor submit file for SimpleTestPipeline\n") + f.write("universe = vanilla\n") + f.write(f"executable = {job_script}\n") + f.write(f"initialdir = {self.production.rundir}\n") + f.write("output = test_job.out\n") + f.write("error = test_job.err\n") + f.write("log = test_job.log\n") + f.write("getenv = True\n") + f.write("queue 1\n") + + # Create a minimal DAG file + dag_file = os.path.join(self.production.rundir, "test.dag") + with open(dag_file, "w") as f: + f.write("# Simple test pipeline DAG\n") + f.write("JOB test_job test_job.sub\n") + + self.logger.info(f"Built test DAG in {self.production.rundir}") + else: + self.logger.warning("No run directory specified, cannot build DAG") + else: + self.logger.info("Dry run: would build test DAG") + + def submit_dag(self, dryrun=False): + """ + Submit the pipeline job to HTCondor. + + This submits the DAG file to HTCondor so the job actually runs + on the scheduler and creates the results file. + + Parameters + ---------- + dryrun : bool, optional + If True, only simulate the submission without actually submitting. + Default is False. + + Returns + ------- + int + The HTCondor cluster ID. + """ + import subprocess + import re + + if not self.production.rundir: + self.logger.warning("No run directory specified, cannot submit job") + return None + + # Build the DAG first + self.build_dag(dryrun=dryrun) + + self.before_submit(dryrun=dryrun) + + dag_file = "test.dag" + + command = [ + "condor_submit_dag", + "-batch-name", + f"test/{self.production.event.name}/{self.production.name}", + dag_file + ] + + self.logger.info(f"Submitting DAG: {' '.join(command)}") + + if dryrun: + print(f"Would run: {' '.join(command)}") + return 12345 + else: + # Change to run directory before submitting + original_dir = os.getcwd() + os.chdir(self.production.rundir) + + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True + ) + + self.logger.info(f"DAG submitted successfully") + self.logger.debug(f"Output: {result.stdout}") + + # Extract cluster ID from output + match = re.search(r'submitted to cluster (\d+)', result.stdout) + if match: + cluster_id = int(match.group(1)) + self.logger.info(f"Cluster ID: {cluster_id}") + return cluster_id + else: + self.logger.warning("Could not extract cluster ID from condor_submit_dag output") + return None + + except subprocess.CalledProcessError as e: + self.logger.error(f"Failed to submit DAG: {e}") + self.logger.error(f"stderr: {e.stderr}") + raise + finally: + os.chdir(original_dir) + + def detect_completion(self): + """ + Check if the pipeline has completed. + + This checks for the existence of a results file that would be + created by a completed job. + + Returns + ------- + bool + True if the job has completed, False otherwise. + """ + if not self.production.rundir: + return False + + # Check for a completion marker file + completion_file = os.path.join(self.production.rundir, "results.dat") + return os.path.exists(completion_file) + + def before_submit(self, dryrun=False): + """ + Prepare the job before submission. + + This creates the run directory and any necessary setup files. + + Parameters + ---------- + dryrun : bool, optional + If True, only simulate the preparation. + """ + if not dryrun and self._ensure_rundir(): + self.logger.info(f"Prepared run directory: {self.production.rundir}") + + def after_completion(self): + """ + Post-processing after job completion. + + This creates a simple results file and updates the status. + """ + if self.production.rundir: + # Create a dummy results file + results_file = os.path.join(self.production.rundir, "results.dat") + if not os.path.exists(results_file): + with open(results_file, "w") as f: + f.write("# Test pipeline results\n") + f.write("test_parameter: 1.0\n") + f.write("test_error: 0.1\n") + + super().after_completion() + + def samples(self, absolute=False): + """ + Return the location of output samples. + + Parameters + ---------- + absolute : bool, optional + If True, return absolute paths. + + Returns + ------- + list + List of paths to sample files (dummy file for testing). + """ + if not self.production.rundir: + return [] + + # Ensure directory exists + self._ensure_rundir() + + samples_file = os.path.join(self.production.rundir, "posterior_samples.dat") + + # Create dummy samples file if it doesn't exist + if not os.path.exists(samples_file): + with open(samples_file, "w") as f: + f.write("# parameter1 parameter2\n") + f.write("1.0 2.0\n") + f.write("1.1 2.1\n") + + if absolute: + return [os.path.abspath(samples_file)] + else: + return [samples_file] + + def collect_assets(self): + """ + Collect analysis assets for version control. + + Returns + ------- + dict + Dictionary of assets produced by this pipeline. + """ + assets = {} + if self.production.rundir: + results = os.path.join(self.production.rundir, "results.dat") + if os.path.exists(results): + assets['results'] = results + return assets diff --git a/asimov/pipelines/testing/subject.py b/asimov/pipelines/testing/subject.py new file mode 100644 index 00000000..6a98cbc0 --- /dev/null +++ b/asimov/pipelines/testing/subject.py @@ -0,0 +1,341 @@ +""" +Minimal testing pipeline for SubjectAnalysis. + +This pipeline is designed to test asimov's SubjectAnalysis infrastructure, +which operates on multiple SimpleAnalysis results for a single event/subject. +It provides a minimal implementation ideal for testing and as a template. +""" + +import os + +from pathlib import Path + +from ...pipeline import Pipeline + + +class SubjectTestPipeline(Pipeline): + """ + A minimal testing pipeline for SubjectAnalysis. + + This pipeline implements the minimum required functionality for testing + asimov's SubjectAnalysis infrastructure. SubjectAnalyses operate on the + results of multiple SimpleAnalysis runs for a single subject (event). + + This pipeline serves two purposes: + 1. Testing asimov's SubjectAnalysis infrastructure + 2. Providing a template for developers creating pipelines that combine + results from multiple analyses + + Parameters + ---------- + production : :class:`asimov.analysis.SubjectAnalysis` + The subject analysis object. + category : str, optional + The category of the job. + + Examples + -------- + To use this pipeline in a ledger configuration: + + .. code-block:: yaml + + kind: subject_analysis + name: test-subject + pipeline: subjecttestpipeline + status: ready + needs: + - status:finished + + Notes + ----- + This pipeline creates a combined output file that references the + analyses it depends on, simulating a meta-analysis or comparison. + """ + + name = "SubjectTestPipeline" + STATUS = {"wait", "stuck", "stopped", "running", "finished"} + + def __init__(self, production, category=None): + """ + Initialize the SubjectTestPipeline. + + Parameters + ---------- + production : :class:`asimov.analysis.SubjectAnalysis` + The subject analysis object this pipeline will run for. + category : str, optional + The category of the job. + """ + super().__init__(production, category) + self.logger.info("Using the SubjectTestPipeline for testing") + + def _ensure_rundir(self): + """ + Ensure the run directory exists. + + Returns + ------- + bool + True if rundir exists or was created, False if no rundir is configured. + """ + if not self.production.rundir: + return False + Path(self.production.rundir).mkdir(parents=True, exist_ok=True) + return True + + def build_dag(self, user=None, dryrun=False): + """ + Build the DAG for this subject analysis pipeline. + + Creates a HTCondor submit file and DAG file that will run a simple + test job on the scheduler. + + Parameters + ---------- + user : str, optional + The user account for job submission. + dryrun : bool, optional + If True, only simulate the build without creating files. + + Returns + ------- + None + """ + if not dryrun: + if self._ensure_rundir(): + # Create a simple job script that will create results + job_script = os.path.join(self.production.rundir, "test_subject_job.sh") + results_file = os.path.join(self.production.rundir, "combined_results.dat") + with open(job_script, "w") as f: + f.write("#!/bin/bash\n") + f.write("# Subject analysis test pipeline job\n") + f.write("set -e\n") + f.write("echo 'Processing multiple analyses for subject'\n") + f.write(f"echo 'Working directory: {self.production.rundir}'\n") + f.write("echo 'Current directory:' $(pwd)\n") + f.write("sleep 2\n") + f.write("# Create the results file with absolute path\n") + f.write(f"cat > {results_file} << 'EOF'\n") + f.write("# Subject analysis test pipeline results\n") + f.write("# Combined analysis for subject\n") + f.write("combined_metric: 1.5\n") + f.write("uncertainty: 0.2\n") + f.write("EOF\n") + f.write(f"echo 'Subject analysis complete - {results_file} created'\n") + f.write("ls -la\n") + + # Make script executable + os.chmod(job_script, 0o755) + + # Create HTCondor submit file + submit_file = os.path.join(self.production.rundir, "test_subject_job.sub") + with open(submit_file, "w") as f: + f.write("# HTCondor submit file for SubjectTestPipeline\n") + f.write("universe = vanilla\n") + f.write(f"executable = {job_script}\n") + f.write(f"initialdir = {self.production.rundir}\n") + f.write("output = test_subject_job.out\n") + f.write("error = test_subject_job.err\n") + f.write("log = test_subject_job.log\n") + f.write("getenv = True\n") + f.write("queue 1\n") + + # Create a minimal DAG file + dag_file = os.path.join(self.production.rundir, "test_subject.dag") + with open(dag_file, "w") as f: + f.write("# Subject test pipeline DAG\n") + f.write("JOB test_subject_job test_subject_job.sub\n") + + self.logger.info(f"Built subject test DAG in {self.production.rundir}") + else: + self.logger.warning("No run directory specified, cannot build DAG") + else: + self.logger.info("Dry run: would build subject test DAG") + + def submit_dag(self, dryrun=False): + """ + Submit the pipeline job to HTCondor. + + This submits the DAG file to HTCondor so the job actually runs + on the scheduler and creates the results file. + + Parameters + ---------- + dryrun : bool, optional + If True, only simulate the submission. + + Returns + ------- + int + The HTCondor cluster ID. + """ + import subprocess + import re + + if not self.production.rundir: + self.logger.warning("No run directory specified") + return None + + # Build the DAG first + self.build_dag(dryrun=dryrun) + + self.before_submit(dryrun=dryrun) + + dag_file = "test_subject.dag" + + command = [ + "condor_submit_dag", + "-batch-name", + f"test-subject/{self.production.event.name}/{self.production.name}", + dag_file + ] + + self.logger.info(f"Submitting subject DAG: {' '.join(command)}") + + if dryrun: + print(f"Would run: {' '.join(command)}") + return 23456 + else: + # Change to run directory before submitting + original_dir = os.getcwd() + os.chdir(self.production.rundir) + + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True + ) + + self.logger.info(f"Subject DAG submitted successfully") + self.logger.debug(f"Output: {result.stdout}") + + # Extract cluster ID from output + match = re.search(r'submitted to cluster (\d+)', result.stdout) + if match: + cluster_id = int(match.group(1)) + self.logger.info(f"Cluster ID: {cluster_id}") + return cluster_id + else: + self.logger.warning("Could not extract cluster ID from condor_submit_dag output") + return None + + except subprocess.CalledProcessError as e: + self.logger.error(f"Failed to submit subject DAG: {e}") + self.logger.error(f"stderr: {e.stderr}") + raise + finally: + os.chdir(original_dir) + + def detect_completion(self): + """ + Check if the subject analysis has completed. + + Returns + ------- + bool + True if the job has completed, False otherwise. + """ + if not self.production.rundir: + return False + + completion_file = os.path.join(self.production.rundir, "combined_results.dat") + return os.path.exists(completion_file) + + def before_submit(self, dryrun=False): + """ + Prepare the job before submission. + + This checks that required analyses are available and creates + the run directory. + + Parameters + ---------- + dryrun : bool, optional + If True, only simulate the preparation. + """ + if not dryrun and self._ensure_rundir(): + # Log information about dependent analyses + if hasattr(self.production, 'analyses'): + self.logger.info( + f"Subject analysis will combine {len(self.production.analyses)} analyses" + ) + for analysis in self.production.analyses: + self.logger.info(f" - {analysis.name}") + + self.logger.info(f"Prepared run directory: {self.production.rundir}") + + def after_completion(self): + """ + Post-processing after job completion. + + This creates a combined results file referencing all input analyses. + """ + if self.production.rundir: + # Create a combined results file + results_file = os.path.join(self.production.rundir, "combined_results.dat") + if not os.path.exists(results_file): + with open(results_file, "w") as f: + f.write("# Subject analysis test pipeline results\n") + f.write("# Combined analysis for subject\n") + + if hasattr(self.production, 'analyses'): + f.write(f"# Number of analyses combined: {len(self.production.analyses)}\n") + for i, analysis in enumerate(self.production.analyses): + f.write(f"# Analysis {i+1}: {analysis.name}\n") + + f.write("combined_metric: 1.5\n") + f.write("uncertainty: 0.2\n") + + super().after_completion() + + def samples(self, absolute=False): + """ + Return the location of combined output samples. + + Parameters + ---------- + absolute : bool, optional + If True, return absolute paths. + + Returns + ------- + list + List of paths to combined sample files. + """ + if not self.production.rundir: + return [] + + # Ensure directory exists + self._ensure_rundir() + + samples_file = os.path.join(self.production.rundir, "combined_samples.dat") + + # Create dummy combined samples file + if not os.path.exists(samples_file): + with open(samples_file, "w") as f: + f.write("# combined_parameter1 combined_parameter2\n") + f.write("1.5 2.5\n") + f.write("1.6 2.6\n") + + if absolute: + return [os.path.abspath(samples_file)] + else: + return [samples_file] + + def collect_assets(self): + """ + Collect analysis assets for version control. + + Returns + ------- + dict + Dictionary of assets produced by this pipeline. + """ + assets = {} + if self.production.rundir: + results = os.path.join(self.production.rundir, "combined_results.dat") + if os.path.exists(results): + assets['combined_results'] = results + return assets diff --git a/asimov/priors.py b/asimov/priors.py new file mode 100644 index 00000000..44d08099 --- /dev/null +++ b/asimov/priors.py @@ -0,0 +1,234 @@ +""" +Prior specification and interface system for asimov. + +This module provides a flexible prior specification system that: +1. Validates prior specifications using pydantic +2. Allows pipeline-specific conversion of priors +3. Supports both simple priors and reparameterizations +""" + +from typing import Any, Dict, Optional, Union +from pydantic import BaseModel, field_validator, model_validator + + +class PriorSpecification(BaseModel): + """ + Specification for a single prior distribution. + + This model validates a prior specification from a blueprint. + Pipelines can then convert this to their own prior format. + + Attributes + ---------- + minimum : float, optional + The minimum value for the prior + maximum : float, optional + The maximum value for the prior + type : str, optional + The type/class of the prior distribution + boundary : str, optional + The boundary condition for the prior + alpha : float, optional + Power law index (for PowerLaw priors) + mu : float, optional + Mean (for Gaussian priors) + sigma : float, optional + Standard deviation (for Gaussian priors) + """ + minimum: Optional[float] = None + maximum: Optional[float] = None + type: Optional[str] = None + boundary: Optional[str] = None + # Additional parameters for different prior types + alpha: Optional[float] = None + mu: Optional[float] = None + sigma: Optional[float] = None + + # Allow any additional fields for pipeline-specific settings + model_config = {"extra": "allow"} + + @model_validator(mode='after') + def validate_min_max(self): + """Validate that minimum is less than maximum when both are specified.""" + if self.minimum is not None and self.maximum is not None: + if self.minimum >= self.maximum: + raise ValueError(f"minimum ({self.minimum}) must be less than maximum ({self.maximum})") + return self + + @field_validator('boundary') + @classmethod + def validate_boundary(cls, v): + """Validate boundary conditions.""" + if v is not None: + allowed = ['periodic', 'reflective', None, 'None'] + if v not in allowed: + raise ValueError(f"boundary must be one of {allowed}, got '{v}'") + return v + + +class PriorDict(BaseModel): + """ + A dictionary of prior specifications. + + This model validates a complete set of priors from a blueprint. + It supports both standard parameter priors and special settings. + + Attributes + ---------- + default : str, optional + The default prior set to use (e.g., "BBHPriorDict") + """ + default: Optional[str] = None + + # Allow arbitrary prior specifications as additional fields + model_config = {"extra": "allow"} + + def get_prior(self, parameter_name: str) -> Optional[PriorSpecification]: + """ + Get a prior specification for a parameter. + + Parameters + ---------- + parameter_name : str + The name of the parameter + + Returns + ------- + PriorSpecification or dict + The prior specification, or None if not found + """ + # Get the field value directly from model fields + value = getattr(self, parameter_name, None) + if value is None: + # Check in extra fields + if hasattr(self, '__pydantic_extra__'): + value = self.__pydantic_extra__.get(parameter_name) + + if value is None: + return None + elif isinstance(value, dict): + return PriorSpecification(**value) + elif isinstance(value, PriorSpecification): + return value + else: + return value + + def to_dict(self) -> Dict[str, Any]: + """ + Convert to a plain dictionary. + + Returns + ------- + dict + Dictionary representation of all priors + """ + result = {} + if self.default is not None: + result['default'] = self.default + + # Add all extra fields + if hasattr(self, '__pydantic_extra__'): + for key, value in self.__pydantic_extra__.items(): + if isinstance(value, PriorSpecification): + result[key] = value.model_dump(exclude_none=True) + else: + result[key] = value + + return result + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "PriorDict": + """ + Create a PriorDict from a plain dictionary. + + Parameters + ---------- + data : dict + Dictionary of prior specifications + + Returns + ------- + PriorDict + Validated prior dictionary + """ + return cls(**data) + + +class Reparameterization(BaseModel): + """ + Specification for a parameter reparameterization. + + This allows pipelines like pycbc to specify alternative + parameterizations of the signal. + + Attributes + ---------- + from_parameters : list of str + The original parameters + to_parameters : list of str + The new parameters after transformation + transform : str, optional + The name of the transformation function + """ + from_parameters: list[str] + to_parameters: list[str] + transform: Optional[str] = None + + # Allow any additional fields for pipeline-specific settings + model_config = {"extra": "allow"} + + +class PriorInterface: + """ + Base class for pipeline-specific prior interfaces. + + Each pipeline should subclass this and implement the + conversion methods to transform asimov priors into + pipeline-specific formats. + """ + + def __init__(self, prior_dict: Optional[Union[Dict, PriorDict]] = None): + """ + Initialize the prior interface. + + Parameters + ---------- + prior_dict : dict or PriorDict, optional + The prior specification from the blueprint + """ + if prior_dict is None: + self.prior_dict = None + elif isinstance(prior_dict, PriorDict): + self.prior_dict = prior_dict + elif isinstance(prior_dict, dict): + self.prior_dict = PriorDict.from_dict(prior_dict) + else: + raise TypeError(f"prior_dict must be dict or PriorDict, got {type(prior_dict)}") + + def convert(self) -> Any: + """ + Convert asimov priors to pipeline-specific format. + + This method should be overridden by pipeline-specific interfaces. + + Returns + ------- + Any + Pipeline-specific prior representation + """ + raise NotImplementedError("Subclasses must implement convert()") + + def validate(self) -> bool: + """ + Validate the prior specification for this pipeline. + + Returns + ------- + bool + True if valid, raises exception otherwise + """ + # Base validation is handled by pydantic + return True + + + diff --git a/asimov/project.py b/asimov/project.py new file mode 100644 index 00000000..7eee6709 --- /dev/null +++ b/asimov/project.py @@ -0,0 +1,367 @@ +""" +Project management and Python API interface. + +This module provides a Python API for creating and managing asimov projects. +""" + +import os + +try: + import ConfigParser as configparser +except ImportError: + import configparser + +from asimov import config as global_config, logger, LOGGER_LEVEL +from asimov.ledger import YAMLLedger +from asimov.event import Event +from asimov.cli.project import make_project + +logger = logger.getChild("project") +logger.setLevel(LOGGER_LEVEL) + + +class Project: + """ + A class representing an asimov project. + + This class provides a Python API for creating and managing asimov projects, + including creating new projects, adding subjects/events, and managing analyses. + + Examples + -------- + Create a new project:: + + from asimov.project import Project + + project = Project("My Project", "/path/to/project") + + with project: + subject = project.add_subject(name="GW150914", ...) + subject.add_production(name="prod_1", pipeline="bilby", ...) + + Load an existing project:: + + project = Project.load("/path/to/project") + + with project: + # Make changes + pass + """ + + def __init__(self, name, location=None, working="working", checkouts="checkouts", + results="results", logs="logs", user=None): + """ + Initialize a new asimov project. + + Parameters + ---------- + name : str + The name of the project. + location : str, optional + The root directory for the project. If None, uses the current directory. + working : str, optional + The location to store working directories. Default is "working". + checkouts : str, optional + The location to store cloned git repositories. Default is "checkouts". + results : str, optional + The location where the results store should be created. Default is "results". + logs : str, optional + The location to store log files. Default is "logs". + user : str, optional + The user account to be used for accounting purposes. + Defaults to the current user if not set. + """ + self.name = name + self.location = location if location else os.getcwd() + self.working = working + self.checkouts = checkouts + self.results = results + self.logs = logs + self.user = user + + # Store the original directory to restore later + self._original_dir = None + self._ledger = None + self._in_context = False + + # Prevent accidental re-initialization of an existing project directory. + # If the target location already exists and contains a project, refuse to + # create a new project there, as this may overwrite an existing project. + config_path = os.path.join(self.location, ".asimov", "asimov.conf") + if os.path.exists(config_path): + raise RuntimeError( + f"Project directory '{self.location}' already contains an asimov project. " + "If you meant to open an existing project, use Project.load(...)." + ) + + # Initialize the project structure + self._initialize_project() + + def _initialize_project(self): + """ + Initialize the project structure by calling the make_project function. + """ + # Store current directory + original_dir = os.getcwd() + + try: + # Create the project + make_project( + name=self.name, + root=self.location, + working=self.working, + checkouts=self.checkouts, + results=self.results, + logs=self.logs, + user=self.user + ) + + logger.info(f"Created new project '{self.name}' at {self.location}") + + finally: + # Return to original directory + os.chdir(original_dir) + + @classmethod + def load(cls, location): + """ + Load an existing project from a directory. + + Parameters + ---------- + location : str + The root directory of the existing project. + + Returns + ------- + Project + A Project instance loaded from the specified location. + + Raises + ------ + FileNotFoundError + If the project directory or configuration file does not exist. + """ + config_path = os.path.join(location, ".asimov", "asimov.conf") + if not os.path.exists(config_path): + raise FileNotFoundError( + f"No project found at {location}. " + f"Missing configuration file at {config_path}" + ) + + # Read the project configuration + config = configparser.ConfigParser() + config.read(config_path) + + # Create a Project instance without initializing + project = cls.__new__(cls) + + try: + project.name = config.get("project", "name") + project.location = location + project.working = config.get("general", "rundir_default") + project.checkouts = config.get("general", "git_default") + project.results = config.get("storage", "directory") + project.logs = config.get("logging", "location") + project.user = config.get("condor", "user") + except (configparser.NoSectionError, configparser.NoOptionError) as e: + raise ValueError( + f"Project configuration at {config_path} is incomplete or malformed. " + f"Missing configuration: {e}" + ) + + project._original_dir = None + project._ledger = None + project._in_context = False + + logger.info(f"Loaded existing project '{project.name}' from {location}") + + return project + + @property + def ledger(self): + """ + Get the project ledger. + + Returns + ------- + Ledger + The project's ledger instance. + """ + if self._ledger is None: + # Change to project directory to load the ledger + # This is required because Event initialization needs the correct working directory + original_dir = os.getcwd() + try: + os.chdir(self.location) + ledger_path = os.path.join(".asimov", "ledger.yml") + self._ledger = YAMLLedger(location=ledger_path) + finally: + os.chdir(original_dir) + + return self._ledger + + def __enter__(self): + """ + Enter the context manager, enabling transactional updates. + + Returns + ------- + Project + The project instance. + """ + self._original_dir = os.getcwd() + os.chdir(self.location) + self._in_context = True + + # Preserve the existing global project root so it can be restored on exit + try: + self._previous_project_root = global_config.get("project", "root") + except (configparser.NoSectionError, configparser.NoOptionError): + self._previous_project_root = None + + # Update the global config with the project location + # This is needed for ledger.save() to work correctly + global_config.set("project", "root", self.location) + + # Load the ledger in the project directory if it hasn't been loaded yet + if self._ledger is None: + ledger_path = os.path.join(".asimov", "ledger.yml") + self._ledger = YAMLLedger(location=ledger_path) + + # Ensure pipelines section exists in ledger data + # This is needed for production.to_dict() to work correctly + if "pipelines" not in self._ledger.data: + self._ledger.data["pipelines"] = {} + + logger.debug(f"Entered context for project '{self.name}'") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit the context manager, saving changes to the ledger. + + Parameters + ---------- + exc_type : type + The exception type, if an exception was raised. + exc_val : Exception + The exception value, if an exception was raised. + exc_tb : traceback + The exception traceback, if an exception was raised. + """ + try: + # Only save if no exception occurred + if exc_type is None and self._ledger is not None: + self._ledger.save() + logger.debug(f"Saved ledger for project '{self.name}'") + # Invalidate the ledger cache so it will be reloaded on next access + self._ledger = None + + # Restore the previous global project root + if self._previous_project_root is not None: + global_config.set("project", "root", self._previous_project_root) + else: + # There was no previous project.root; remove the option we added in __enter__ + try: + global_config.remove_option("project", "root") + except (configparser.NoSectionError, configparser.NoOptionError): + # If the section/option is missing, there's nothing to restore + pass + finally: + self._in_context = False + if self._original_dir: + os.chdir(self._original_dir) + logger.debug(f"Exited context for project '{self.name}'") + + def add_subject(self, name, **kwargs): + """ + Add a new subject (event) to the project. + + Parameters + ---------- + name : str + The name of the subject/event. + **kwargs + Additional keyword arguments to pass to the Event constructor. + + Returns + ------- + Event + The created event/subject. + + Raises + ------ + RuntimeError + If called outside of a context manager. + """ + if not self._in_context: + raise RuntimeError( + "add_subject must be called within a context manager. " + "Use 'with project:' to enter a transactional context." + ) + + # Create the event + event = Event(name=name, ledger=self._ledger, **kwargs) + + # Add to ledger without saving (save happens on context exit) + # Temporarily disable auto-save during add_event to avoid redundant I/O + original_save = self._ledger.save + try: + # Replace save with a no-op during add_event + self._ledger.save = lambda: None + self._ledger.add_event(event) + finally: + # Restore the original save method + self._ledger.save = original_save + + logger.info(f"Added subject '{name}' to project '{self.name}'") + + return event + + def add_event(self, name, **kwargs): + """ + Add a new event to the project. + + This is an alias for add_subject for backward compatibility. + + Parameters + ---------- + name : str + The name of the event. + **kwargs + Additional keyword arguments to pass to the Event constructor. + + Returns + ------- + Event + The created event. + """ + return self.add_subject(name=name, **kwargs) + + def get_event(self, name=None): + """ + Get one or all events from the project. + + Parameters + ---------- + name : str, optional + The name of the event to retrieve. If None, returns all events. + + Returns + ------- + Event or list of Event + The requested event(s). + """ + return self.ledger.get_event(event=name) + + def __repr__(self): + """ + Return a string representation of the project. + + Returns + ------- + str + A string representation of the project. + """ + return f"" diff --git a/asimov/scheduler.py b/asimov/scheduler.py new file mode 100644 index 00000000..eb7f891f --- /dev/null +++ b/asimov/scheduler.py @@ -0,0 +1,740 @@ +""" +This module contains logic for interacting with a scheduling system. + +Supported Schedulers are: + +- HTCondor +- Slurm (planned) + +""" + +import os +import datetime +import yaml +import warnings +from abc import ABC, abstractmethod + +try: + warnings.filterwarnings("ignore", module="htcondor2") + import htcondor2 as htcondor # NoQA + import classad2 as classad # NoQA +except ImportError: + warnings.filterwarnings("ignore", module="htcondor") + import htcondor # NoQA + import classad # NoQA + + +class Scheduler(ABC): + """ + The base class which represents all supported schedulers. + """ + + @abstractmethod + def submit(self, job_description): + """ + Submit a job to the scheduler. + + Parameters + ---------- + job_description : JobDescription or dict + The job description to submit. + + Returns + ------- + str or int + The job ID returned by the scheduler. + """ + raise NotImplementedError + + @abstractmethod + def delete(self, job_id): + """ + Delete a job from the scheduler. + + Parameters + ---------- + job_id : str or int + The job ID to delete. + """ + raise NotImplementedError + + @abstractmethod + def query(self, job_id=None): + """ + Query the scheduler for job status. + + Parameters + ---------- + job_id : str or int, optional + The job ID to query. If None, query all jobs. + + Returns + ------- + dict or list + Job status information. + """ + raise NotImplementedError + + @abstractmethod + def submit_dag(self, dag_file, batch_name=None, **kwargs): + """ + Submit a DAG (Directed Acyclic Graph) workflow to the scheduler. + + Parameters + ---------- + dag_file : str + Path to the DAG file to submit. + batch_name : str, optional + A name for the batch of jobs. + **kwargs + Additional scheduler-specific parameters. + + Returns + ------- + int + The job ID (cluster ID) returned by the scheduler. + """ + raise NotImplementedError + + @abstractmethod + def query_all_jobs(self): + """ + Query all jobs from the scheduler. + + This method is used to get a list of all jobs currently in the scheduler + queue, which is useful for monitoring and status checking. + + Returns + ------- + list of dict + A list of dictionaries, each containing job information with keys: + - id: Job ID + - command: Command being executed + - hosts: Number of hosts + - status: Job status (integer code or string) + - name: Job name (optional) + - dag id: Parent DAG ID if this is a subjob (optional) + """ + raise NotImplementedError + + +class HTCondor(Scheduler): + """ + Scheduler implementation for HTCondor. + """ + + def __init__(self, schedd_name=None): + """ + Initialize the HTCondor scheduler. + + Parameters + ---------- + schedd_name : str, optional + The name of the schedd to use. If None, will try to find one automatically. + """ + self.schedd_name = schedd_name + self._schedd = None + + @property + def schedd(self): + """Get or create the schedd connection.""" + if self._schedd is None: + if self.schedd_name: + try: + schedulers = htcondor.Collector().locate( + htcondor.DaemonTypes.Schedd, self.schedd_name + ) + self._schedd = htcondor.Schedd(schedulers) + except (htcondor.HTCondorLocateError, htcondor.HTCondorIOError): + # Fall back to default schedd if we can't locate the named one + self._schedd = htcondor.Schedd() + else: + self._schedd = htcondor.Schedd() + return self._schedd + + def submit(self, job_description): + """ + Submit a job to the condor schedd. + + Parameters + ---------- + job_description : JobDescription or dict + The job description to submit. + + Returns + ------- + int + The cluster ID of the submitted job. + """ + # Convert JobDescription to dict if needed + if isinstance(job_description, JobDescription): + submit_dict = job_description.to_htcondor() + else: + submit_dict = job_description + + # Create HTCondor Submit object + submit_obj = htcondor.Submit(submit_dict) + + # Submit the job + try: + result = self.schedd.submit(submit_obj) + cluster_id = result.cluster() + return cluster_id + except htcondor.HTCondorIOError as e: + raise RuntimeError(f"Failed to submit job to HTCondor: {e}") + + def delete(self, job_id): + """ + Delete a job from the HTCondor scheduler. + + Parameters + ---------- + job_id : int + The cluster ID to delete. + """ + self.schedd.act(htcondor.JobAction.Remove, f"ClusterId == {job_id}") + + def query(self, job_id=None, projection=None): + """ + Query the HTCondor scheduler for job status. + + Parameters + ---------- + job_id : int, optional + The cluster ID to query. If None, query all jobs. + projection : list, optional + List of attributes to retrieve. + + Returns + ------- + list + List of job ClassAds. + """ + if job_id is not None: + constraint = f"ClusterId == {job_id}" + else: + constraint = None + + if projection: + return list(self.schedd.query(constraint=constraint, projection=projection)) + else: + return list(self.schedd.query(constraint=constraint)) + + def submit_dag(self, dag_file, batch_name=None, **kwargs): + """ + Submit a DAG file to the HTCondor scheduler. + + Parameters + ---------- + dag_file : str + Path to the DAG submit file. + batch_name : str, optional + A name for the batch of jobs. + **kwargs + Additional HTCondor-specific parameters. + + Returns + ------- + int + The cluster ID of the submitted DAG. + + Raises + ------ + RuntimeError + If the DAG submission fails. + FileNotFoundError + If the DAG file does not exist. + """ + if not os.path.exists(dag_file): + raise FileNotFoundError(f"DAG file not found: {dag_file}") + + try: + # Use HTCondor's Submit.from_dag to create a submit description from the DAG file + submit_obj = htcondor.Submit.from_dag(dag_file, options={}) + + # Add batch name if provided + if batch_name: + # Set the batch name in the submit description + submit_obj['JobBatchName'] = batch_name + + # Add any additional kwargs to the submit description + for key, value in kwargs.items(): + submit_obj[key] = value + + # Submit the DAG + result = self.schedd.submit(submit_obj) + cluster_id = result.cluster() + + return cluster_id + + except htcondor.HTCondorIOError as e: + raise RuntimeError(f"Failed to submit DAG to HTCondor: {e}") + except Exception as e: + raise RuntimeError(f"Unexpected error submitting DAG: {e}") + + def query_all_jobs(self): + """ + Query all jobs from HTCondor schedulers. + + This method queries all available HTCondor schedulers to get a complete + list of jobs. It's used by the JobList class for monitoring. + + Returns + ------- + list of dict + A list of dictionaries containing job information. + """ + data = [] + + try: + collectors = htcondor.Collector().locateAll(htcondor.DaemonTypes.Schedd) + except htcondor.HTCondorLocateError as e: + raise RuntimeError(f"Could not find a valid HTCondor scheduler: {e}") + + for schedd_ad in collectors: + try: + schedd = htcondor.Schedd(schedd_ad) + jobs = schedd.query( + opts=htcondor.QueryOpts.DefaultMyJobsOnly, + projection=[ + "ClusterId", + "Cmd", + "CurrentHosts", + "HoldReason", + "JobStatus", + "DAG_Status", + "JobBatchName", + "DAGManJobId", + ], + ) + + # Convert HTCondor ClassAds to dictionaries + for job_ad in jobs: + if "ClusterId" in job_ad: + job_dict = { + "id": int(float(job_ad["ClusterId"])), + "command": job_ad.get("Cmd", ""), + "hosts": job_ad.get("CurrentHosts", 0), + "status": job_ad.get("JobStatus", 0), + } + + if "HoldReason" in job_ad: + job_dict["hold"] = job_ad["HoldReason"] + if "JobBatchName" in job_ad: + job_dict["name"] = job_ad["JobBatchName"] + if "DAG_Status" not in job_ad and "DAGManJobId" in job_ad: + job_dict["dag id"] = int(float(job_ad["DAGManJobId"])) + + data.append(job_dict) + + except Exception: + # Skip problematic schedulers + pass + + return data + + +class Slurm(Scheduler): + """ + Scheduler implementation for Slurm. + + Note: This is a placeholder implementation for future Slurm support. + """ + + def __init__(self): + """Initialize the Slurm scheduler.""" + raise NotImplementedError("Slurm scheduler is not yet implemented") + + def submit(self, job_description): + """Submit a job to Slurm.""" + raise NotImplementedError("Slurm scheduler is not yet implemented") + + def delete(self, job_id): + """Delete a job from Slurm.""" + raise NotImplementedError("Slurm scheduler is not yet implemented") + + def query(self, job_id=None): + """Query Slurm for job status.""" + raise NotImplementedError("Slurm scheduler is not yet implemented") + + def submit_dag(self, dag_file, batch_name=None, **kwargs): + """Submit a DAG to Slurm.""" + raise NotImplementedError("Slurm scheduler is not yet implemented") + + def query_all_jobs(self): + """Query all jobs from Slurm.""" + raise NotImplementedError("Slurm scheduler is not yet implemented") + + +class Job: + """ + Scheduler-agnostic representation of a job. + + This class provides a common interface for job information across + different schedulers. + """ + + def __init__(self, job_id, command, hosts, status, name=None, dag_id=None, **kwargs): + """ + Create a Job object. + + Parameters + ---------- + job_id : int + The job ID or cluster ID. + command : str + The command being run. + hosts : int + The number of hosts currently processing the job. + status : int or str + The status of the job. + name : str, optional + The name or batch name of the job. + dag_id : int, optional + The DAG ID if this is a subjob. + **kwargs + Additional scheduler-specific attributes. + """ + self.job_id = job_id + self.command = command + self.hosts = hosts + self._status = status + self.name = name or "asimov job" + self.dag_id = dag_id + self.subjobs = [] + + # Store any additional attributes + for key, value in kwargs.items(): + setattr(self, key, value) + + def add_subjob(self, job): + """ + Add a subjob to this job. + + Parameters + ---------- + job : Job + The subjob to add. + """ + self.subjobs.append(job) + + @property + def status(self): + """ + Get the status of the job as a string. + + Returns + ------- + str + A description of the status of the job. + """ + # Handle both integer status codes and string status + if isinstance(self._status, int): + # HTCondor status codes + statuses = { + 0: "Unexplained", + 1: "Idle", + 2: "Running", + 3: "Removed", + 4: "Completed", + 5: "Held", + 6: "Submission error", + } + return statuses.get(self._status, "Unknown") + else: + return str(self._status) + + def __repr__(self): + return f"" + + def __str__(self): + return repr(self) + + def to_dict(self): + """ + Convert the job to a dictionary representation. + + Returns + ------- + dict + Dictionary representation of the job. + """ + output = { + "name": self.name, + "id": self.job_id, + "hosts": self.hosts, + "status": self._status, + "command": self.command, + } + + if self.dag_id: + output["dag_id"] = self.dag_id + + return output + + +class JobList: + """ + Scheduler-agnostic list of running jobs. + + This class queries the scheduler and caches the results for performance. + """ + + def __init__(self, scheduler, cache_file=None, cache_time=900): + """ + Initialize the job list. + + Parameters + ---------- + scheduler : Scheduler + The scheduler instance to query. + cache_file : str, optional + Path to the cache file. If None, uses ".asimov/_cache_jobs.yaml" + cache_time : int, optional + Maximum age of cache in seconds. Default is 900 (15 minutes). + """ + self.scheduler = scheduler + self.jobs = {} + self.cache_file = cache_file or os.path.join(".asimov", "_cache_jobs.yaml") + self.cache_time = cache_time + + # Try to load from cache + if os.path.exists(self.cache_file): + age = -os.stat(self.cache_file).st_mtime + datetime.datetime.now().timestamp() + if float(age) < float(self.cache_time): + try: + with open(self.cache_file, "r") as f: + cached_data = yaml.safe_load(f) + except yaml.constructor.ConstructorError: + cached_data = None + if cached_data is not None: + # Only use the cached data if it appears to be a mapping of + # job-like objects (i.e., dictionaries with the keys + # that JobList relies on). Otherwise, fall back to a refresh. + if isinstance(cached_data, dict) and cached_data: + valid_cache = True + for job_obj in cached_data.values(): + # Cached jobs are stored as dictionaries produced by + # Job.to_dict(), so we validate based on required keys. + if not isinstance(job_obj, dict): + valid_cache = False + break + if "id" not in job_obj: + valid_cache = False + break + if valid_cache: + self.jobs = cached_data + return + + # Cache is stale, invalid, or doesn't exist, refresh from scheduler + self.refresh() + + def refresh(self): + """ + Poll the scheduler to get the list of running jobs and update the cache. + """ + # Query all jobs from the scheduler + try: + raw_jobs = self.scheduler.query_all_jobs() + except Exception as e: + raise RuntimeError(f"Failed to query jobs from scheduler: {e}") + + # Process the raw jobs into Job objects + self.jobs = {} + all_jobs = [] + + for job_data in raw_jobs: + job = self._create_job_from_data(job_data) + all_jobs.append(job) + + # Organize jobs by main jobs and subjobs + for job in all_jobs: + if not job.dag_id: + self.jobs[job.job_id] = job + + # Add subjobs to their parent jobs + for job in all_jobs: + if job.dag_id: + if job.dag_id in self.jobs: + self.jobs[job.dag_id].add_subjob(job) + else: + # If DAG parent doesn't exist, store this job as a standalone job + self.jobs[job.job_id] = job + + # Save to cache as plain dicts so yaml.safe_load can read them back. + os.makedirs(os.path.dirname(self.cache_file), exist_ok=True) + with open(self.cache_file, "w") as f: + f.write(yaml.dump({k: v.to_dict() if isinstance(v, Job) else v for k, v in self.jobs.items()})) + + def _create_job_from_data(self, job_data): + """ + Create a Job object from scheduler-specific data. + + Parameters + ---------- + job_data : dict + Scheduler-specific job data. + + Returns + ------- + Job + A Job object. + """ + # This method can be overridden by scheduler-specific implementations + # For now, we assume the data is already in a compatible format + return Job( + job_id=job_data.get("id", job_data.get("job_id")), + command=job_data.get("command", ""), + hosts=job_data.get("hosts", 0), + status=job_data.get("status", 0), + name=job_data.get("name"), + dag_id=job_data.get("dag_id", job_data.get("dag id")), + **{k: v for k, v in job_data.items() if k not in ["id", "job_id", "command", "hosts", "status", "name", "dag id", "dag_id"]} + ) + + +def get_scheduler(scheduler_type="htcondor", **kwargs): + """ + Factory function to get the appropriate scheduler instance. + + Parameters + ---------- + scheduler_type : str + The type of scheduler to create. Options: "htcondor", "slurm" + **kwargs + Additional keyword arguments to pass to the scheduler constructor. + + Returns + ------- + Scheduler + An instance of the requested scheduler. + + Raises + ------ + ValueError + If an unknown scheduler type is requested. + """ + scheduler_type = scheduler_type.lower() + + if scheduler_type == "htcondor": + return HTCondor(**kwargs) + elif scheduler_type == "slurm": + return Slurm(**kwargs) + else: + raise ValueError(f"Unknown scheduler type: {scheduler_type}") + +class JobDescription: + """ + A class which represents the description of a job to be submitted to a scheduler. + + This will allow jobs to be easily described in a scheduler-agnostic way. + """ + + # Mapping of generic resource parameters to HTCondor-specific parameters + HTCONDOR_RESOURCE_MAPPING = { + "cpus": "request_cpus", + "memory": "request_memory", + "disk": "request_disk", + } + + def __init__(self, + executable, + output, + error, + log, + **kwargs, + ): + """ + Create a job description object. + + Parameters + ---------- + executable : str, path + The path to the executable to be used to run this job. + output : str, path + The location where stdout from the program should be written. + error : str, path + The location where the stderr from the program should be written. + log : str, path + The location where log messages from the scheduler should be written for this job. + **kwargs + Additional scheduler-specific parameters. + + """ + self.executable = executable + self.output = output + self.error = error + self.log = log + self.kwargs = kwargs + + + def to_htcondor(self): + """ + Create a submit description for the htcondor scheduler. + + Returns + ------- + dict + A dictionary containing the HTCondor submit description. + """ + description = {} + description["executable"] = self.executable + description["output"] = self.output + description["error"] = self.error + description["log"] = self.log + + # Map generic resource parameters to HTCondor-specific ones using the mapping + for generic_key, htcondor_key in self.HTCONDOR_RESOURCE_MAPPING.items(): + if generic_key in self.kwargs: + description[htcondor_key] = self.kwargs[generic_key] + + # Set defaults for resource parameters if not provided + description.setdefault("request_cpus", 1) + description.setdefault("request_memory", "1GB") + description.setdefault("request_disk", "1GB") + + # Add any additional kwargs to the description + # Skip the generic resource parameters as they've already been mapped + for key, value in self.kwargs.items(): + if key not in self.HTCONDOR_RESOURCE_MAPPING: + description[key] = value + + return description + + def to_slurm(self): + """ + Create a submit description for the Slurm scheduler. + + Returns + ------- + dict + A dictionary containing the Slurm submit description. + + Note + ---- + This is a placeholder for future Slurm support. + """ + raise NotImplementedError("Slurm conversion is not yet implemented") + + def to_dict(self, scheduler_type="htcondor"): + """ + Convert the job description to a scheduler-specific dictionary. + + Parameters + ---------- + scheduler_type : str + The type of scheduler. Options: "htcondor", "slurm" + + Returns + ------- + dict + The scheduler-specific job description. + """ + scheduler_type = scheduler_type.lower() + + if scheduler_type == "htcondor": + return self.to_htcondor() + elif scheduler_type == "slurm": + return self.to_slurm() + else: + raise ValueError(f"Unknown scheduler type: {scheduler_type}") \ No newline at end of file diff --git a/asimov/scheduler_utils.py b/asimov/scheduler_utils.py new file mode 100644 index 00000000..48edc143 --- /dev/null +++ b/asimov/scheduler_utils.py @@ -0,0 +1,187 @@ +""" +Helper utilities for scheduler integration in asimov. + +This module provides convenience functions and decorators for using +the scheduler API in pipelines and other parts of asimov. +""" + +import configparser +import functools +from asimov import config, logger +from asimov.scheduler import get_scheduler, JobDescription, JobList + +logger = logger.getChild("scheduler_utils") + + +def get_configured_scheduler(): + """ + Get a scheduler instance based on the asimov configuration. + + This function reads the scheduler configuration from asimov.conf + and returns an appropriate scheduler instance. + + Returns + ------- + Scheduler + A configured scheduler instance (default: HTCondor) + + Examples + -------- + >>> scheduler = get_configured_scheduler() + >>> job = JobDescription(executable="/bin/echo", output="out.log", + ... error="err.log", log="job.log") + >>> cluster_id = scheduler.submit(job) + """ + try: + scheduler_type = config.get("scheduler", "type") + except (configparser.NoOptionError, configparser.NoSectionError, KeyError): + scheduler_type = "htcondor" + + # Get scheduler-specific configuration + kwargs = {} + if scheduler_type == "htcondor": + try: + schedd_name = config.get("condor", "scheduler") + kwargs["schedd_name"] = schedd_name + except (configparser.NoOptionError, configparser.NoSectionError, KeyError) as exc: + logger.debug( + "No specific Condor scheduler configured; using default schedd. (%s)", + exc, + ) + + return get_scheduler(scheduler_type, **kwargs) + + +def create_job_from_dict(job_dict): + """ + Create a JobDescription from a dictionary. + + This is a convenience function to convert existing HTCondor-style + job dictionaries to JobDescription objects. The input dictionary + is not modified. + + Parameters + ---------- + job_dict : dict + A dictionary containing job parameters. Should have at least: + - executable: path to the executable + - output: path for stdout + - error: path for stderr + - log: path for job log + + Returns + ------- + JobDescription + A JobDescription object created from the dictionary + + Examples + -------- + >>> job_dict = { + ... "executable": "/bin/echo", + ... "output": "out.log", + ... "error": "err.log", + ... "log": "job.log", + ... "request_cpus": "4", + ... "request_memory": "8GB" + ... } + >>> job = create_job_from_dict(job_dict) + >>> # job_dict is unchanged after the call + """ + # Make a copy to avoid modifying the original dictionary + job_dict_copy = job_dict.copy() + + # Extract required parameters + executable = job_dict_copy.pop("executable") + output = job_dict_copy.pop("output") + error = job_dict_copy.pop("error") + log = job_dict_copy.pop("log") + + # Convert HTCondor-specific resource parameters to generic ones + kwargs = job_dict_copy + + # Map HTCondor resource parameters to generic ones + if "request_cpus" in kwargs: + kwargs["cpus"] = kwargs.pop("request_cpus") + if "request_memory" in kwargs: + kwargs["memory"] = kwargs.pop("request_memory") + if "request_disk" in kwargs: + kwargs["disk"] = kwargs.pop("request_disk") + + return JobDescription( + executable=executable, + output=output, + error=error, + log=log, + **kwargs + ) + + +def scheduler_aware(func): + """ + Decorator to make pipeline methods scheduler-aware. + + This decorator wraps pipeline methods (like submit_dag) to provide + access to the configured scheduler instance via self.scheduler. + + Parameters + ---------- + func : callable + The method to decorate + + Returns + ------- + callable + The wrapped method + + Examples + -------- + >>> class MyPipeline: + ... @scheduler_aware + ... def submit_dag(self): + ... # self.scheduler is now available + ... cluster_id = self.scheduler.submit(job) + ... return cluster_id + """ + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + # Add scheduler instance to the pipeline object if not already present + if not hasattr(self, 'scheduler'): + self.scheduler = get_configured_scheduler() + return func(self, *args, **kwargs) + return wrapper + + +def get_job_list(cache_time=None): + """ + Get a JobList instance for monitoring running jobs. + + This function creates a JobList that queries the configured scheduler + and caches the results for performance. + + Parameters + ---------- + cache_time : int, optional + Maximum age of cache in seconds. If None, uses the value from + config or defaults to 900 (15 minutes). + + Returns + ------- + JobList + A JobList instance containing all running jobs. + + Examples + -------- + >>> job_list = get_job_list() + >>> if 12345 in job_list.jobs: + ... job = job_list.jobs[12345] + ... print(f"Job status: {job.status}") + """ + scheduler = get_configured_scheduler() + + if cache_time is None: + try: + cache_time = float(config.get("condor", "cache_time")) + except (configparser.NoOptionError, configparser.NoSectionError, KeyError): + cache_time = 900 # Default to 15 minutes + + return JobList(scheduler, cache_time=cache_time) diff --git a/asimov/strategies.py b/asimov/strategies.py new file mode 100644 index 00000000..bee15dbb --- /dev/null +++ b/asimov/strategies.py @@ -0,0 +1,169 @@ +""" +Strategy expansion for asimov blueprints. + +This module provides functionality to expand strategy definitions in blueprints +into multiple analyses, similar to GitHub Actions matrix strategies. +""" + +from copy import deepcopy +from typing import Any, Dict, List +import itertools + + +def set_nested_value(dictionary: Dict[str, Any], path: str, value: Any) -> None: + """ + Set a value in a nested dictionary using dot notation. + + Parameters + ---------- + dictionary : dict + The dictionary to modify + path : str + The path to the value using dot notation (e.g., "waveform.approximant") + value : Any + The value to set + + Examples + -------- + >>> d = {} + >>> set_nested_value(d, "waveform.approximant", "IMRPhenomXPHM") + >>> d + {'waveform': {'approximant': 'IMRPhenomXPHM'}} + + Raises + ------ + TypeError + If an intermediate key exists but is not a dictionary + """ + keys = path.split(".") + current = dictionary + + for key in keys[:-1]: + if key not in current: + current[key] = {} + elif not isinstance(current[key], dict): + raise TypeError( + f"Cannot set nested value for path '{path}': " + f"intermediate key '{key}' is of type " + f"{type(current[key]).__name__}, expected dict." + ) + current = current[key] + + current[keys[-1]] = value + + +def expand_strategy(blueprint: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Expand a blueprint with a strategy into multiple blueprints. + + A strategy allows you to create multiple similar analyses by specifying + parameter variations. This is similar to GitHub Actions matrix strategies. + + Parameters + ---------- + blueprint : dict + The blueprint document, which may contain a 'strategy' field + + Returns + ------- + list + A list of expanded blueprint documents. If no strategy is present, + returns a list containing only the original blueprint. + + Examples + -------- + A blueprint with a strategy: + + >>> blueprint = { + ... 'kind': 'analysis', + ... 'name': 'bilby-{waveform.approximant}', + ... 'pipeline': 'bilby', + ... 'strategy': { + ... 'waveform.approximant': ['IMRPhenomXPHM', 'SEOBNRv4PHM'] + ... } + ... } + >>> expanded = expand_strategy(blueprint) + >>> len(expanded) + 2 + >>> expanded[0]['waveform']['approximant'] + 'IMRPhenomXPHM' + >>> expanded[1]['waveform']['approximant'] + 'SEOBNRv4PHM' + + Notes + ----- + - The 'strategy' field is removed from the expanded blueprints + - Parameter names can use dot notation for nested values + - Name templates can reference strategy parameters using {parameter_name} + where parameter_name is the full parameter path (e.g., {waveform.approximant}) + - Multiple strategy parameters create a cross-product (matrix) + - If multiple parameters have the same final component (e.g., + waveform.frequency and sampler.frequency), the behavior is undefined + and should be avoided + """ + if "strategy" not in blueprint: + return [blueprint] + + # Create a copy to avoid modifying the original + blueprint = deepcopy(blueprint) + strategy = blueprint.pop("strategy") + + # Validate strategy parameters + if not strategy: + raise ValueError("Strategy is defined but empty") + + # Get all parameter combinations + param_names = list(strategy.keys()) + param_values = list(strategy.values()) + + # Validate that all strategy values are lists or iterables + for param_name, values in zip(param_names, param_values): + if not isinstance(values, (list, tuple)): + raise TypeError( + f"Strategy parameter '{param_name}' must be a list, " + f"got {type(values).__name__}. " + f"Did you mean: {param_name}: [{values}]?" + ) + if len(values) == 0: + raise ValueError( + f"Strategy parameter '{param_name}' has an empty list. " + f"Each parameter must have at least one value." + ) + + # Create all combinations (cross product) + combinations = list(itertools.product(*param_values)) + + expanded_blueprints = [] + + for combination in combinations: + # Create a copy of the blueprint for this combination + new_blueprint = deepcopy(blueprint) + + # Build a context for name formatting + # The context uses the fully qualified parameter name as the key + context = {} + for param_name, value in zip(param_names, combination): + # Use the full parameter path for name templates + context[param_name] = value + + # Apply the parameter values to the blueprint + for param_name, value in zip(param_names, combination): + set_nested_value(new_blueprint, param_name, value) + + # Expand the name template if it contains placeholders + if "name" in new_blueprint and isinstance(new_blueprint["name"], str): + new_name = new_blueprint["name"] + # Replace each parameter placeholder with its value + for param_name, value in context.items(): + placeholder = "{" + param_name + "}" + # Convert booleans to lowercase strings for YAML convention + if isinstance(value, bool): + value_str = str(value).lower() + else: + value_str = str(value) + new_name = new_name.replace(placeholder, value_str) + new_blueprint["name"] = new_name + + expanded_blueprints.append(new_blueprint) + + return expanded_blueprints diff --git a/conda/environment.yaml b/conda/environment.yaml new file mode 100644 index 00000000..522c1eaf --- /dev/null +++ b/conda/environment.yaml @@ -0,0 +1,5 @@ +conda-forge::bayeswave +conda-forge::bayeswaveutils +conda-forge::bilby_pipe +conda-forge::asimov-gwdata +pip \ No newline at end of file diff --git a/docs/source/analyses.rst b/docs/source/analyses.rst index 19782e4d..f082ce3e 100644 --- a/docs/source/analyses.rst +++ b/docs/source/analyses.rst @@ -129,6 +129,71 @@ For example: - "pipeline:giskard" - "waveform.approximant:impecableOstritchv56PHMX" +Optional Dependencies +^^^^^^^^^^^^^^^^^^^^^ + +By default, all dependencies specified in the ``needs`` list are required. +This means that if a dependency is not present in the ledger, the analysis will not run. +However, you can mark dependencies as optional, which allows the analysis to run even if the dependency is missing. +This is useful for creating reusable blueprints that can adapt to different situations. + +To mark a dependency as optional, use the dict format with an ``optional: true`` key: + +.. code-block:: yaml + + kind: analysis + name: flexible-analysis + pipeline: example + needs: + - pipeline: bilby # Required + - optional: true # Optional + pipeline: rift + +In this example, the analysis will only run if at least one ``bilby`` analysis is present. +However, if a ``rift`` analysis is also available, it will be included as a dependency. + +A Blueprint for a subject analysis +----------------------------------- + +Subject analyses (also called "event analyses") can access the results of multiple simple analyses on a single event. +A common use case is combining results from different parameter estimation runs into a single summary. + +For example, PESummary can be used as a subject analysis to combine results from multiple PE runs: + +.. code-block:: yaml + + kind: analysis + name: CombinedPESummary + pipeline: pesummary + analyses: + - pipeline: bilby # Combine all bilby analyses + refreshable: true # Auto-update when new analyses finish + +The ``analyses`` field works similarly to ``needs``, but is used specifically for subject analyses to specify which simple analyses to include. +You can also use optional dependencies in subject analyses: + +.. code-block:: yaml + + kind: analysis + name: FlexiblePESummary + pipeline: pesummary + analyses: + - pipeline: bilby # Required + - optional: true # Optional + pipeline: rift + refreshable: true + +Refreshable Analyses +^^^^^^^^^^^^^^^^^^^^ + +Subject analyses can be marked as ``refreshable: true``. +When an analysis is refreshable, asimov will automatically re-run it when: + +1. New analyses matching the dependencies complete +2. The list of matching analyses changes + +This is particularly useful for PESummary subject analyses, which can automatically regenerate summary pages as new parameter estimation runs complete. + A Blueprint for a project analysis ---------------------------------- diff --git a/docs/source/api/project.rst b/docs/source/api/project.rst new file mode 100644 index 00000000..16a59f55 --- /dev/null +++ b/docs/source/api/project.rst @@ -0,0 +1,9 @@ +Project +======= + +.. currentmodule:: asimov.project + +.. autoclass:: Project + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/schedulers.rst b/docs/source/api/schedulers.rst new file mode 100644 index 00000000..9d9b9d33 --- /dev/null +++ b/docs/source/api/schedulers.rst @@ -0,0 +1,12 @@ +The Schedulers module +===================== + +This module contains the logic for interacting with schedulers, for example, ``HTCondor``. + +The scheduler module provides a unified interface for submitting and managing jobs across different scheduling systems. +Currently supported schedulers include HTCondor, with Slurm support planned for the future. + +.. automodule:: asimov.scheduler + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/source/blueprints.rst b/docs/source/blueprints.rst index ff81ca9e..e305b12d 100644 --- a/docs/source/blueprints.rst +++ b/docs/source/blueprints.rst @@ -161,7 +161,12 @@ Defining analysis requirements Asimov will determine the required computation order of analyses in a project automatically, but in order to do this it needs to be given details of which analyses require the results of a previous analysis. It will then compute a directed acyclic graph (DAG) of all the analyses. -Requirements can be specified in the ``needs`` setting of an analysis. For example, in order to define a job which uses the ``bilby`` pipeline, but requires results from an analysis using the ``bayeswave`` pipeline you should specify the name of the ``bayeswave`` analysis in the ``needs`` section of the ``bilby`` analysis. For example:: +Requirements can be specified in the ``needs`` setting of an analysis using a flexible syntax that supports complex dependency conditions. + +Simple name-based dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The simplest form of dependency is to specify the name of a required analysis:: kind: analysis name: generate-psds @@ -173,7 +178,223 @@ Requirements can be specified in the ``needs`` setting of an analysis. For examp needs: - generate-psds -In asimov 0.5 you need to explicitly specify the ``name`` of analyses which provide job dependencies, but in future versions this will be made more flexible so that results can be automatically gathered based, for example, on the pipeline which generated them. +Property-based dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Dependencies can also be specified using properties of analyses. Any property can be used, including nested properties accessed with dot notation:: + + kind: analysis + name: parameter-estimation + pipeline: bilby + needs: + - pipeline: bayeswave + - waveform.approximant: IMRPhenomXPHM + +This will match all analyses that use the ``bayeswave`` pipeline OR have ``IMRPhenomXPHM`` as their waveform approximant. + +Review status dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The review status is a commonly used dependency criterion:: + + kind: analysis + name: combiner + pipeline: bilby + needs: + - review.status: approved + +Negated dependencies +^^^^^^^^^^^^^^^^^^^^ + +You can specify that an analysis should depend on analyses that do NOT match a criterion by prefixing the value with ``!``:: + + kind: analysis + name: non-bayeswave-analyses + pipeline: bilby + needs: + - pipeline: "!bayeswave" + +This will match all analyses except those using the bayeswave pipeline. + +OR logic (multiple dependencies) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, multiple items in the ``needs`` list are combined with OR logic. An analysis will depend on any analysis matching ANY of the conditions:: + + kind: analysis + name: combiner + pipeline: bilby + needs: + - waveform.approximant: IMRPhenomXPHM + - waveform.approximant: SEOBNRv5PHM + +This will match analyses using either ``IMRPhenomXPHM`` OR ``SEOBNRv5PHM``. + +AND logic (all conditions must match) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To specify that ALL conditions must match (AND logic), use a nested list:: + + kind: analysis + name: specific-analysis + pipeline: bilby + needs: + - - review.status: approved + - waveform.approximant: IMRPhenomXPHM + +This will only match analyses that are both approved AND use IMRPhenomXPHM. + +Complex dependency specifications +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can combine AND and OR logic for complex dependency specifications:: + + kind: analysis + name: complex-combiner + pipeline: bilby + needs: + - - review.status: approved + - pipeline: bayeswave + - waveform.approximant: IMRPhenomXPHM + +This will match analyses that are (approved AND use bayeswave) OR use IMRPhenomXPHM. + +Dependency tracking and staleness +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When an analysis runs, asimov records which analyses were its dependencies at that time. If the set of matching analyses changes later (for example, if new analyses are added that match the dependency criteria), the original analysis is marked as **stale**. + +Stale analyses are indicated in the HTML report. You can mark an analysis as **refreshable** to indicate it should be automatically re-run when it becomes stale:: + + kind: analysis + name: auto-refresh-analysis + pipeline: bilby + refreshable: true + needs: + - review.status: approved + +The resolved dependencies (those that were actually used when the analysis ran) are stored in the ledger and displayed in the HTML report alongside the current matching dependencies. + +Strategies +========== + +Strategies allow you to create multiple similar analyses with parameter variations from a single blueprint. This is similar to GitHub Actions matrix strategies and is useful for: + +- Testing multiple waveform approximants +- Comparing different samplers +- Running parameter-parameter (p-p) tests +- Performing systematic studies + +Basic Strategy Syntax +--------------------- + +A strategy is defined using the ``strategy`` keyword in an analysis blueprint. The strategy specifies parameters and the values they should take:: + + kind: analysis + name: bilby-{waveform.approximant} + event: GW150914 + pipeline: bilby + strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + - IMRPhenomD + +This will create three separate analyses: +- ``bilby-IMRPhenomXPHM`` with ``waveform.approximant: IMRPhenomXPHM`` +- ``bilby-SEOBNRv4PHM`` with ``waveform.approximant: SEOBNRv4PHM`` +- ``bilby-IMRPhenomD`` with ``waveform.approximant: IMRPhenomD`` + +Name Templates +-------------- + +The ``name`` field can include placeholders in curly braces (``{}``) that will be replaced with strategy parameter values. The placeholder name should match the full parameter path:: + + kind: analysis + name: bilby-{waveform.approximant}-analysis + pipeline: bilby + strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + +This creates: +- ``bilby-IMRPhenomXPHM-analysis`` +- ``bilby-SEOBNRv4PHM-analysis`` + +If no placeholder is used, all generated analyses will have the same name, which may cause conflicts. + +Matrix Strategies (Multiple Parameters) +---------------------------------------- + +You can specify multiple parameters in a strategy to create all combinations (cross-product):: + + kind: analysis + name: bilby-{waveform.approximant}-{sampler.sampler} + event: GW150914 + pipeline: bilby + strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + sampler.sampler: + - dynesty + - emcee + +This creates 4 analyses (2 × 2 combinations): +- ``bilby-IMRPhenomXPHM-dynesty`` +- ``bilby-IMRPhenomXPHM-emcee`` +- ``bilby-SEOBNRv4PHM-dynesty`` +- ``bilby-SEOBNRv4PHM-emcee`` + +Nested Parameters +----------------- + +Strategy parameters can use dot notation to set deeply nested values:: + + kind: analysis + name: bilby-margdist-{likelihood.marginalisation.distance} + pipeline: bilby + strategy: + likelihood.marginalisation.distance: + - true + - false + +This sets ``likelihood.marginalisation.distance`` in the generated analyses. + +.. note:: + + Special value handling: + + - Boolean values (``True``/``False``) are converted to lowercase strings (``true``/``false``) when used in name templates to match YAML conventions. + - Each strategy parameter must be a list with at least one value. + - Strategy parameters cannot be empty lists or non-list values. + +Complete Strategy Example +------------------------- + +Here's a complete example combining multiple features:: + + kind: analysis + name: pe-{waveform.approximant}-{sampler.sampler} + event: GW150914 + pipeline: bilby + comment: Systematic waveform and sampler comparison + needs: + - generate-psd + likelihood: + sample rate: 4096 + psd length: 4 + strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + - IMRPhenomD + sampler.sampler: + - dynesty + - emcee + +This creates 6 analyses (3 waveforms × 2 samplers), each inheriting the ``needs``, ``likelihood``, and ``comment`` settings while varying the waveform and sampler. Waveform ======== @@ -278,13 +499,13 @@ General likelihood settings - See individual pipeline documentation. - The likelihood function to use. * - ``likelihood:kwargs`` - - - - + - ``dict`` + - Additional keyword arguments to be passed to the likelihood function. * - ``likelihood:frequency domain source model`` - See individual pipeline documentation. - * - ``likelihood:time domain source model`` - - + - See individual pipeline documentation. - Calibration settings diff --git a/docs/source/build-process.rst b/docs/source/build-process.rst index 74c3a26d..8acf141f 100644 --- a/docs/source/build-process.rst +++ b/docs/source/build-process.rst @@ -18,8 +18,8 @@ An example of an event ledger might look something like this, which is for ``GW1 .. code-block:: yaml calibration: - H1: C01_offline/calibration/H1.dat - L1: C01_offline/calibration/L1.dat + H1: analyses/calibration/H1.dat + L1: analyses/calibration/L1.dat data: channels: H1: H1:DCS-CALIB_STRAIN_C02 @@ -68,8 +68,8 @@ An example of an event ledger might look something like this, which is for ``GW1 status: ready psds: 2048: - H1: C01_offline/psds/2048/H1-psd.dat - L1: C01_offline/psds/2048/L1-psd.dat + H1: analyses/psds/2048/H1-psd.dat + L1: analyses/psds/2048/L1-psd.dat quality: lower-frequency: H1: 20 diff --git a/docs/source/conf.py b/docs/source/conf.py index 8b81e78a..1de6fa13 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -43,6 +43,7 @@ 'sphinxcontrib.httpexample', 'sphinxcontrib.autohttp.flask', "sphinx_multiversion", + "sphinxcontrib.autodoc_pydantic", ] html_logo = "textmark.png" diff --git a/docs/source/config.rst b/docs/source/config.rst index 3b86aa2b..2b749190 100644 --- a/docs/source/config.rst +++ b/docs/source/config.rst @@ -16,9 +16,9 @@ Details of the metadata stored in the production ledger can be found on the :ref .. code-block:: yaml calibration: - H1: C01_offline/calibration/H1.dat - L1: C01_offline/calibration/L1.dat - V1: C01_offline/calibration/V1.dat + H1: analyses/calibration/H1.dat + L1: analyses/calibration/L1.dat + V1: analyses/calibration/V1.dat data: channels: H1: H1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01 @@ -59,9 +59,9 @@ Details of the metadata stored in the production ledger can be found on the :ref status: ready psds: 1024: - H1: /home/daniel.williams/events/O3/event_repos/S200224a/C01_offline/psds/1024/H1-psd.dat - L1: /home/daniel.williams/events/O3/event_repos/S200224a/C01_offline/psds/1024/L1-psd.dat - V1: /home/daniel.williams/events/O3/event_repos/S200224a/C01_offline/psds/1024/V1-psd.dat + H1: /home/daniel.williams/events/O3/event_repos/S200224a/analyses/psds/1024/H1-psd.dat + L1: /home/daniel.williams/events/O3/event_repos/S200224a/analyses/psds/1024/L1-psd.dat + V1: /home/daniel.williams/events/O3/event_repos/S200224a/analyses/psds/1024/V1-psd.dat quality: lower-frequency: H1: 20 diff --git a/docs/source/index.rst b/docs/source/index.rst index e408c080..047166ff 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -159,7 +159,9 @@ Advanced topics analyses build-process - + scheduler-integration + monitor-state-machine + monitor-api configuration test-interface clusters @@ -176,8 +178,6 @@ Developers' Guide asimov-repository - - code-overview ledger pipelines-dev @@ -214,6 +214,8 @@ Python API :maxdepth: 1 :caption: Python API + python-api + api/project api/asimov api/condor api/event @@ -226,6 +228,7 @@ Python API api/mattermost api/olivaw api/pipeline + api/schedulers config LIGO Analysis Cookbook diff --git a/docs/source/monitor-api.rst b/docs/source/monitor-api.rst new file mode 100644 index 00000000..eb450b8c --- /dev/null +++ b/docs/source/monitor-api.rst @@ -0,0 +1,303 @@ +Programmatic Monitor API +======================== + +Overview +-------- + +The asimov monitor can be run programmatically from Python scripts or Jupyter notebooks using the ``asimov.monitor_api`` module. This is useful for: + +* Custom automation workflows +* Interactive analysis in Jupyter notebooks +* Integration with other Python tools +* Building custom dashboards or monitoring systems + +Quick Start +----------- + +Basic Usage +^^^^^^^^^^^ + +Run the monitor from Python: + +.. code-block:: python + + from asimov.monitor_api import run_monitor + + # Run monitor on all analyses + results = run_monitor(verbose=True) + print(f"Monitored {results['total']} analyses") + +Jupyter Notebook Example +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: python + + # In a Jupyter notebook cell + from asimov.monitor_api import run_monitor, list_active_analyses + import pandas as pd + + # Get list of active analyses + analyses = list_active_analyses() + df = pd.DataFrame(analyses) + display(df) + + # Run monitor + results = run_monitor(verbose=True) + + # Show results + pd.DataFrame([results]) + +API Functions +------------- + +run_monitor() +^^^^^^^^^^^^^ + +Run the complete monitoring cycle programmatically: + +.. code-block:: python + + from asimov.monitor_api import run_monitor + + # Monitor all analyses (keyword-only arguments) + results = run_monitor() + + # Monitor specific event + results = run_monitor(event_filter="GW150914") + + # Dry run (no changes) + results = run_monitor(dry_run=True, verbose=True) + + # Results dictionary contains: + # - 'total': Total number of analyses + # - 'project_analyses': Number of project analyses + # - 'event_analyses': Number of event analyses + # - 'active': Number currently active + # - 'complete': Number completed + # - 'stuck': Number stuck + +get_analysis_status() +^^^^^^^^^^^^^^^^^^^^ + +Query the current status of analyses: + +.. code-block:: python + + from asimov.monitor_api import get_analysis_status + + # Get all statuses + statuses = get_analysis_status() + for name, status in statuses.items(): + print(f"{name}: {status}") + + # Get status for specific event (keyword-only arguments) + statuses = get_analysis_status(event_name="GW150914") + + # Get status for specific analysis + status = get_analysis_status(analysis_name="bilby_analysis") + +list_active_analyses() +^^^^^^^^^^^^^^^^^^^^^^ + +List all currently active analyses: + +.. code-block:: python + + from asimov.monitor_api import list_active_analyses + + analyses = list_active_analyses() + for analysis in analyses: + print(f"{analysis['name']}: {analysis['status']}") + +Use Cases +--------- + +Custom Monitoring Script +^^^^^^^^^^^^^^^^^^^^^^^ + +Create a custom monitoring script: + +.. code-block:: python + + #!/usr/bin/env python + """Custom monitoring script for asimov.""" + + import time + from asimov.monitor_api import run_monitor + + def monitor_loop(interval=300): + """Run monitor in a loop.""" + while True: + print(f"Running monitor at {time.strftime('%Y-%m-%d %H:%M:%S')}") + try: + results = run_monitor(verbose=True) + + # Custom logic based on results + if results['stuck'] > 0: + print(f"WARNING: {results['stuck']} analyses are stuck!") + + except Exception as e: + print(f"Error during monitoring: {e}") + + print(f"Waiting {interval} seconds...") + time.sleep(interval) + + if __name__ == "__main__": + monitor_loop(interval=300) # Run every 5 minutes + +Jupyter Dashboard +^^^^^^^^^^^^^^^^^ + +Create an interactive dashboard in Jupyter: + +.. code-block:: python + + # Jupyter notebook cells + + # Cell 1: Imports + from asimov.monitor_api import run_monitor, list_active_analyses, get_analysis_status + import pandas as pd + import matplotlib.pyplot as plt + from IPython.display import display, HTML + + # Cell 2: Show active analyses + analyses = list_active_analyses() + df = pd.DataFrame(analyses) + display(HTML("

Active Analyses

")) + display(df) + + # Cell 3: Status distribution + status_counts = df['status'].value_counts() + status_counts.plot(kind='bar', title='Analysis Status Distribution') + plt.ylabel('Count') + plt.show() + + # Cell 4: Run monitor + display(HTML("

Running Monitor

")) + results = run_monitor(verbose=True) + + # Cell 5: Show results + display(HTML("

Monitor Results

")) + results_df = pd.DataFrame([results]) + display(results_df) + +Integration with Analysis Workflow +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Integrate monitoring into your analysis workflow: + +.. code-block:: python + + from asimov.monitor_api import run_monitor, get_analysis_status + + # Submit your analysis + # ... your code to submit analysis ... + + # Monitor until complete + analysis_name = "my_bilby_analysis" + + while True: + status = get_analysis_status(analysis_name=analysis_name) + + if status.get(f"GW150914/{analysis_name}") == "finished": + print("Analysis complete!") + break + elif status.get(f"GW150914/{analysis_name}") == "stuck": + print("Analysis is stuck, needs attention") + break + + # Run monitor + run_monitor() + + # Wait before checking again + time.sleep(300) # 5 minutes + +Scheduled Monitoring +^^^^^^^^^^^^^^^^^^^^ + +Use with task schedulers like cron or systemd: + +.. code-block:: python + + #!/usr/bin/env python + """ + Scheduled monitor script. + Run with: python monitor_scheduled.py + Or schedule with cron: */15 * * * * /path/to/python monitor_scheduled.py + """ + + import logging + from asimov.monitor_api import run_monitor + from datetime import datetime + + # Set up logging + logging.basicConfig( + filename='/path/to/monitor.log', + level=logging.INFO, + format='%(asctime)s - %(message)s' + ) + + def main(): + logging.info("Starting scheduled monitor run") + + try: + results = run_monitor() + logging.info( + f"Monitor complete: {results['total']} analyses, " + f"{results['active']} active, {results['stuck']} stuck" + ) + except Exception as e: + logging.error(f"Monitor failed: {e}", exc_info=True) + + if __name__ == "__main__": + main() + +Error Handling +-------------- + +The monitor API raises exceptions for errors: + +.. code-block:: python + + from asimov.monitor_api import run_monitor + + try: + results = run_monitor() + except RuntimeError as e: + print(f"Monitor error: {e}") + # Handle error (e.g., condor not available) + except Exception as e: + print(f"Unexpected error: {e}") + # Handle other errors + +Comparison with CLI +------------------- + +The programmatic API provides the same functionality as the CLI but with Python interfaces: + ++----------------------------------+------------------------------------------+ +| CLI Command | Programmatic Equivalent | ++==================================+==========================================+ +| ``asimov monitor`` | ``run_monitor()`` | ++----------------------------------+------------------------------------------+ +| ``asimov monitor --dry-run`` | ``run_monitor(dry_run=True)`` | ++----------------------------------+------------------------------------------+ +| ``asimov monitor GW150914`` | ``run_monitor(event_filter="GW150914")`` | ++----------------------------------+------------------------------------------+ + +Best Practices +-------------- + +1. **Use dry runs for testing**: Always test with ``dry_run=True`` first +2. **Handle exceptions**: Wrap monitor calls in try-except blocks +3. **Log results**: Keep logs of monitoring runs for debugging +4. **Limit frequency**: Don't run too frequently (recommended: 5-15 minutes minimum) +5. **Check stuck analyses**: Monitor the 'stuck' count and investigate issues +6. **Use filters**: Filter by event when working with specific analyses + +See Also +-------- + +* :doc:`monitor-state-machine` - State machine architecture +* :doc:`user-guide/monitoring` - CLI monitoring guide +* :doc:`api/asimov` - Full API reference diff --git a/docs/source/monitor-state-machine.rst b/docs/source/monitor-state-machine.rst new file mode 100644 index 00000000..3a9fd2fc --- /dev/null +++ b/docs/source/monitor-state-machine.rst @@ -0,0 +1,513 @@ +Monitor State Machine Architecture +=================================== + +Overview +-------- + +The asimov monitor loop has been refactored to use a state machine pattern, replacing the previous hard-coded if-elif chains. This new architecture provides better maintainability, extensibility, and clarity in how analyses transition between states. + +Architecture Components +---------------------- + +The refactored monitor system consists of three main components: + +1. **MonitorState**: Abstract state handlers for each analysis state +2. **MonitorContext**: Context object managing analysis monitoring +3. **monitor_helpers**: Reusable functions for monitoring analyses + +MonitorState Classes +------------------- + +Each analysis state is handled by a dedicated state class that implements the ``MonitorState`` abstract base class: + +.. code-block:: python + + from asimov.monitor_states import MonitorState + + class CustomState(MonitorState): + @property + def state_name(self): + return "custom" + + def handle(self, context): + # Implement state-specific logic here + return True + +Built-in State Handlers +^^^^^^^^^^^^^^^^^^^^^^^ + +The following state handlers are provided: + +* **ReadyState**: Handles analyses in 'ready' state (not yet started) +* **StopState**: Handles analyses that need to be stopped +* **RunningState**: Handles analyses currently running on the scheduler +* **FinishedState**: Handles analyses that have completed execution +* **ProcessingState**: Handles analyses in post-processing phase +* **StuckState**: Handles analyses that are stuck and need intervention +* **StoppedState**: Handles analyses that have been stopped + +State Transitions +^^^^^^^^^^^^^^^^ + +The state machine enforces the following transitions: + +.. code-block:: + + ready → running → finished → processing → uploaded + ↓ ↓ + stop → stopped stuck (error state) + ↓ + restart + +Each state handler is responsible for: + +* Checking the current status of the analysis +* Performing any necessary actions (e.g., calling pipeline hooks) +* Updating the analysis status for transitions +* Updating the ledger through the context + +MonitorContext +------------- + +The ``MonitorContext`` class encapsulates all the state and operations needed to monitor a single analysis: + +.. code-block:: python + + from asimov.monitor_context import MonitorContext + + context = MonitorContext( + analysis=analysis, + job_list=job_list, + ledger=ledger, + dry_run=False, + analysis_path="GW150914/analysis_name" + ) + +Key Features +^^^^^^^^^^^ + +* **Job Management**: Retrieves condor job information +* **Ledger Updates**: Handles both event and project analysis updates +* **Dry Run Support**: Allows testing without actual updates +* **Job List Refresh**: Coordinates with condor job list + +Helper Functions +--------------- + +monitor_analysis +^^^^^^^^^^^^^^^ + +The ``monitor_analysis`` function provides a unified interface for monitoring both event and project analyses: + +.. code-block:: python + + from asimov.monitor_helpers import monitor_analysis + + success = monitor_analysis( + analysis=analysis, + job_list=job_list, + ledger=ledger, + dry_run=False, + analysis_path="GW150914/bilby_analysis" + ) + +This function: + +1. Creates a ``MonitorContext`` +2. Gets the appropriate state handler for the analysis +3. Delegates to the state handler +4. Updates the ledger if successful + +monitor_analyses_list +^^^^^^^^^^^^^^^^^^^^ + +For monitoring multiple analyses, use ``monitor_analyses_list``: + +.. code-block:: python + + from asimov.monitor_helpers import monitor_analyses_list + + stats = monitor_analyses_list( + analyses=event.productions, + job_list=job_list, + ledger=ledger, + label="productions" + ) + + print(f"Total: {stats['total']}, Running: {stats['running']}") + +Extending the State Machine +--------------------------- + +The monitor state machine supports a plugin architecture that allows you to add +custom states without modifying asimov's core code. + +Adding Custom States via Entry Points +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The recommended way to add custom states is through Python's entry points system. +This allows your custom states to be automatically discovered and registered when +your package is installed. + +**Step 1: Define your custom state** + +.. code-block:: python + + # In mypackage/states.py + from asimov.monitor_states import MonitorState + + class ValidationState(MonitorState): + @property + def state_name(self): + return "validation" + + def handle(self, context): + analysis = context.analysis + # Custom validation logic + if self.validate_analysis(analysis): + analysis.status = "validated" + context.update_ledger() + return True + else: + analysis.status = "validation_failed" + context.update_ledger() + return False + + def validate_analysis(self, analysis): + # Your validation logic here + return True + +**Step 2: Register via entry points** + +In your package's ``setup.py``: + +.. code-block:: python + + from setuptools import setup + + setup( + name="mypackage", + # ... other setup parameters ... + entry_points={ + 'asimov.monitor.states': [ + 'validation = mypackage.states:ValidationState', + ] + } + ) + +Or in ``pyproject.toml``: + +.. code-block:: toml + + [project.entry-points."asimov.monitor.states"] + validation = "mypackage.states:ValidationState" + +**Step 3: Install your package** + +Once installed, asimov will automatically discover and register your custom state: + +.. code-block:: bash + + pip install mypackage + +Your custom state is now available for use. When an analysis has ``status = "validation"``, +the ``ValidationState`` handler will be invoked automatically. + +Programmatic Registration +^^^^^^^^^^^^^^^^^^^^^^^^^ + +For runtime or dynamic state registration, use the ``register_state()`` function: + +.. code-block:: python + + from asimov.monitor_states import MonitorState, register_state + + class CustomState(MonitorState): + @property + def state_name(self): + return "custom" + + def handle(self, context): + # Custom logic + return True + + # Register the state + register_state(CustomState()) + +This approach is useful for: + +* Testing custom states before creating a plugin +* Dynamic state registration based on runtime conditions +* Temporary state handlers + +**Note:** States registered programmatically must be registered before the monitor +loop runs. Consider registering them in your application's initialization code. + +Legacy Registration (Not Recommended) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Direct modification of ``STATE_REGISTRY`` still works but is not recommended: + +.. code-block:: python + + from asimov.monitor_states import STATE_REGISTRY, CustomState + + # Not recommended - use register_state() instead + STATE_REGISTRY["custom"] = CustomState() + +Custom Pipeline Hooks +^^^^^^^^^^^^^^^^^^^^^ + +Pipeline classes can now define custom hooks that are called during monitoring: + +.. code-block:: python + + from asimov.pipeline import Pipeline + + class CustomPipeline(Pipeline): + def while_running(self): + """Called each monitor cycle while analysis is running.""" + # Collect intermediate results + self.check_convergence() + + def detect_completion(self): + """Check if the analysis has completed.""" + return os.path.exists(self.results_file) + + def after_completion(self): + """Called when analysis completes.""" + self.production.status = "finished" + self.collect_results() + +All pipeline hook methods now have default implementations in the base ``Pipeline`` class, so pipelines only need to override the ones they use. + +Pipeline-Specific States +^^^^^^^^^^^^^^^^^^^^^^^^ + +Pipelines can define their own state handlers that override or extend the default state handlers. This enables pipeline-specific behavior for different analysis states: + +.. code-block:: python + + from asimov.pipeline import Pipeline + from asimov.monitor_states import MonitorState + import click + + class BilbyRunningState(MonitorState): + """Custom running state for Bilby pipeline.""" + + @property + def state_name(self): + return "running" + + def handle(self, context): + analysis = context.analysis + # Bilby-specific running logic + if self.check_bilby_progress(analysis): + click.echo(f" \t ● Bilby progress: 75%") + # Call default behavior + from asimov.monitor_states import RunningState + return RunningState().handle(context) + + def check_bilby_progress(self, analysis): + # Check bilby-specific progress indicators + return True + + class Bilby(Pipeline): + def get_state_handlers(self): + """Define Bilby-specific state handlers.""" + return { + "running": BilbyRunningState(), + } + +**How it works:** + +1. When monitoring an analysis, the monitor checks if the pipeline defines custom state handlers via ``get_state_handlers()`` +2. If a custom handler exists for the current state, it's used +3. If no custom handler exists, the default handler is used +4. This allows pipelines to customize behavior without modifying core code + +**Use cases:** + +* Pipeline-specific progress monitoring +* Custom completion detection +* Special handling for pipeline-specific error states +* Integration with pipeline-specific tools or services + +Migration Guide +-------------- + +Updating Existing Code +^^^^^^^^^^^^^^^^^^^^^ + +The refactored monitor is backward compatible. Existing code will continue to work without changes. However, to take advantage of the new architecture: + +**Old approach (deprecated):** + +.. code-block:: python + + if analysis.status.lower() == "running": + if job.status.lower() == "completed": + pipe.after_completion() + analysis.status = "finished" + ledger.update() + +**New approach:** + +.. code-block:: python + + from asimov.monitor_helpers import monitor_analysis + + monitor_analysis(analysis, job_list, ledger) + +The new approach automatically handles all state transitions. + +Custom Analysis Types +^^^^^^^^^^^^^^^^^^^^ + +For custom analysis types, define monitoring behavior by creating custom state handlers: + +.. code-block:: python + + class PopulationAnalysisState(ProcessingState): + def handle(self, context): + # Custom logic for population analyses + if self.all_events_complete(context.analysis): + return super().handle(context) + else: + click.echo("Waiting for all events to complete") + return True + +Testing +------- + +The state machine components are fully unit tested. See ``tests/test_monitor_states.py`` and ``tests/test_monitor_helpers.py`` for examples of how to test custom states and monitor logic. + +Example test: + +.. code-block:: python + + import unittest + from unittest.mock import Mock + from asimov.monitor_states import RunningState + from asimov.monitor_context import MonitorContext + + class TestCustomState(unittest.TestCase): + def test_running_state(self): + state = RunningState() + analysis = Mock() + analysis.status = "running" + context = MonitorContext(analysis, job_list, ledger) + + result = state.handle(context) + self.assertTrue(result) + +Best Practices +------------- + +1. **Use entry points for production**: Entry points provide automatic discovery and clean separation +2. **Keep state handlers focused**: Each state should handle only its specific concerns +3. **Use context methods**: Always use ``context.update_ledger()`` rather than direct ledger calls +4. **Handle errors gracefully**: State handlers should catch exceptions and report them appropriately +5. **Test state transitions**: Write unit tests for any custom state handlers +6. **Document custom states**: Add documentation for any new states you introduce +7. **Version your plugins**: If distributing custom states as plugins, use semantic versioning + +Complete Plugin Example +^^^^^^^^^^^^^^^^^^^^^^^ + +Here's a complete example of creating a plugin package with custom states: + +**Directory structure:** + +.. code-block:: + + my-asimov-plugin/ + ├── pyproject.toml + ├── README.md + └── my_asimov_plugin/ + ├── __init__.py + └── states.py + +**pyproject.toml:** + +.. code-block:: toml + + [build-system] + requires = ["setuptools>=61.0"] + build-backend = "setuptools.build_meta" + + [project] + name = "my-asimov-plugin" + version = "0.1.0" + description = "Custom analysis states for asimov" + dependencies = [ + "asimov>=0.7.0", + ] + + [project.entry-points."asimov.monitor.states"] + validation = "my_asimov_plugin.states:ValidationState" + calibration = "my_asimov_plugin.states:CalibrationState" + +**states.py:** + +.. code-block:: python + + from asimov.monitor_states import MonitorState + import click + + class ValidationState(MonitorState): + """Validate analysis results before marking as complete.""" + + @property + def state_name(self): + return "validation" + + def handle(self, context): + analysis = context.analysis + click.echo(f" \t ● Validating {analysis.name}") + + # Run validation checks + if self.validate_results(analysis): + analysis.status = "validated" + click.echo(f" \t ✓ Validation passed", fg="green") + else: + analysis.status = "validation_failed" + click.echo(f" \t ✗ Validation failed", fg="red") + + context.update_ledger() + return True + + def validate_results(self, analysis): + # Your validation logic here + return True + + class CalibrationState(MonitorState): + """Handle calibration-specific processing.""" + + @property + def state_name(self): + return "calibration" + + def handle(self, context): + analysis = context.analysis + # Calibration logic here + analysis.status = "calibrated" + context.update_ledger() + return True + +**Installation and usage:** + +.. code-block:: bash + + # Install the plugin + pip install my-asimov-plugin + + # Now your custom states are available in asimov + # Set analysis.status = "validation" to trigger ValidationState + +See Also +-------- + +* :doc:`code-overview` - General asimov architecture +* :doc:`hooks` - Post-monitor hooks +* :doc:`api/asimov` - API reference diff --git a/docs/source/priors.md b/docs/source/priors.md new file mode 100644 index 00000000..4a95b894 --- /dev/null +++ b/docs/source/priors.md @@ -0,0 +1,253 @@ +# Prior Specification and Interface System + +## Overview + +As of version 0.6, asimov includes a refactored prior handling system that provides: + +1. **Validation**: Priors are validated using pydantic models when blueprints are applied +2. **Pipeline Interfaces**: Each pipeline can define how to convert asimov priors to pipeline-specific formats +3. **Reparameterizations**: Support for both standard priors and parameter reparameterizations (useful for pipelines like pycbc) +4. **Backward Compatibility**: Existing blueprints continue to work without modification + +## For Users: Specifying Priors in Blueprints + +### Basic Prior Specification + +Priors are specified in blueprints using the `priors` section. The format remains the same as before: + +```yaml +kind: event +name: GW150914_095045 +priors: + luminosity distance: + minimum: 10 + maximum: 1000 + mass ratio: + minimum: 0.1 + maximum: 1.0 +``` + +### Advanced Prior Specification + +You can now specify additional parameters that will be validated: + +```yaml +priors: + default: BBHPriorDict # The default prior set to use + luminosity distance: + minimum: 10 + maximum: 1000 + type: PowerLaw + alpha: 2 + geocentric time: + minimum: -0.1 + maximum: 0.1 + type: Uniform + boundary: reflective +``` + +### Supported Prior Parameters + +The following parameters are recognized by the validation system: + +- `minimum`: Minimum value for the prior +- `maximum`: Maximum value for the prior +- `type`: The prior distribution type/class name +- `boundary`: Boundary condition ('periodic', 'reflective', or None) +- `alpha`: Power law index (for PowerLaw priors) +- `mu`: Mean (for Gaussian priors) +- `sigma`: Standard deviation (for Gaussian priors) + +Additional pipeline-specific parameters are also allowed and will be passed through to the pipeline interface. + +## For Pipeline Developers: Creating Prior Interfaces + +### Creating a New Prior Interface + +To add prior handling for a new pipeline, create a class that inherits from `PriorInterface`: + +```python +from asimov.priors import PriorInterface, PriorDict + +class MyPipelinePriorInterface(PriorInterface): + """Prior interface for MyPipeline.""" + + def convert(self): + """ + Convert asimov priors to pipeline-specific format. + + Returns + ------- + dict or str or Any + The prior specification in the format required by your pipeline + """ + if self.prior_dict is None: + return {} + + # Convert to your pipeline's format + result = {} + for param_name in ['mass_1', 'mass_2', 'luminosity_distance']: + prior_spec = self.prior_dict.get_prior(param_name) + if prior_spec: + # Convert to your format + result[param_name] = self.convert_single_prior(prior_spec) + + return result + + def convert_single_prior(self, prior_spec): + """Convert a single prior specification.""" + # Implement conversion logic for your pipeline + pass +``` + +### Integrating with Your Pipeline Class + +Override the `get_prior_interface()` method in your Pipeline class: + +```python +from asimov.pipeline import Pipeline +from .my_prior_interface import MyPipelinePriorInterface + +class MyPipeline(Pipeline): + """My pipeline implementation.""" + + def get_prior_interface(self): + """Get the prior interface for this pipeline.""" + if self._prior_interface is None: + priors = self.production.priors + self._prior_interface = MyPipelinePriorInterface(priors) + return self._prior_interface +``` + +### Using the Prior Interface + +In your pipeline's configuration generation or submission logic: + +```python +# Get the prior interface +prior_interface = self.get_prior_interface() + +# Convert to pipeline-specific format +pipeline_priors = prior_interface.convert() + +# Use in your pipeline +# ... +``` + +## Reparameterizations + +For pipelines that support parameter reparameterizations (like pycbc), you can specify them: + +```python +from asimov.priors import Reparameterization + +reparam = Reparameterization( + from_parameters=['mass_1', 'mass_2'], + to_parameters=['chirp_mass', 'mass_ratio'], + transform='mass_to_chirp_mass_ratio' +) +``` + +## Validation + +Priors are automatically validated when they are set on an analysis: + +```python +from asimov.analysis import Production + +production = Production(...) + +# This will be validated +production.priors = { + 'mass ratio': { + 'minimum': 0.1, + 'maximum': 1.0 + } +} + +# Invalid priors will raise a validation error +try: + production.priors = "not a dict" # Will raise TypeError +except TypeError as e: + print(f"Validation failed: {e}") +``` + +## Example: Pipeline Prior Interfaces + +### Bilby Prior Interface + +The bilby pipeline includes a `BilbyPriorInterface` (in `asimov/pipelines/bilby.py`) that demonstrates the pattern: + +```python +from asimov.pipelines.bilby import BilbyPriorInterface + +# Create interface with priors from blueprint +interface = BilbyPriorInterface({ + 'default': 'BBHPriorDict', + 'luminosity distance': { + 'minimum': 10, + 'maximum': 1000, + 'type': 'PowerLaw', + 'alpha': 2 + }, + 'chirp mass': { + 'minimum': 21.4, + 'maximum': 42.0 + } +}) + +# Convert to bilby format (returns dict) +bilby_priors = interface.convert() + +# Get default prior set +default = interface.get_default_prior() # Returns 'BBHPriorDict' + +# Generate a complete prior_dict string for bilby config +prior_string = interface.to_prior_dict_string() +# Returns a formatted string like: +# { +# chirp_mass = bilby.gw.prior.UniformInComponentsChirpMass(name='chirp_mass', minimum=21.4, maximum=42.0, unit='$M_{\odot}$'), +# luminosity_distance = PowerLaw(name='luminosity_distance', minimum=10, maximum=1000, alpha=2, unit='Mpc'), +# ... +# } +``` + +The bilby template (`configs/bilby.ini`) uses the `to_prior_dict_string()` method to generate a complete prior dictionary string that can be directly inserted into the configuration file: + +```liquid +{%- assign prior_interface = production.pipeline.get_prior_interface() -%} +default-prior = {{ prior_interface.get_default_prior() }} +prior-dict = {{ prior_interface.to_prior_dict_string() }} +``` + +This approach provides maximum flexibility as the prior interface can generate any valid bilby prior specification, including custom prior types and reparameterizations. + +### LALInference Prior Interface + +The LALInference pipeline includes a `LALInferencePriorInterface` (in `asimov/pipelines/lalinference.py`) that converts asimov priors to LALInference format: + +```python +from asimov.pipelines.lalinference import LALInferencePriorInterface + +# Create interface with priors from blueprint +interface = LALInferencePriorInterface({ + 'mass ratio': { + 'minimum': 0.05, + 'maximum': 1.0 + }, + 'luminosity distance': { + 'minimum': 10, + 'maximum': 10000 + }, + 'amp order': 0 +}) + +# Convert to LALInference format (uses [min, max] arrays) +lalinf_priors = interface.convert() +# Returns: {'mass ratio': [0.05, 1.0], 'luminosity distance': [10, 10000]} + +# Get amplitude order +amp_order = interface.get_amp_order() # Returns 0 +``` + +The LALInference template (`configs/lalinference.ini`) accesses these priors through the pipeline's prior interface. diff --git a/docs/source/python-api.rst b/docs/source/python-api.rst new file mode 100644 index 00000000..bd2488ae --- /dev/null +++ b/docs/source/python-api.rst @@ -0,0 +1,175 @@ +.. _python-api: + +Python API +========== + +Overview +-------- + +In addition to the command-line interface, asimov provides a Python API that allows you to create and manage projects programmatically. This is particularly useful for: + +* Creating projects from Python scripts +* Automating project setup and configuration +* Integrating asimov into larger workflows +* Creating analyses programmatically + +Creating a New Project +---------------------- + +You can create a new asimov project directly from Python using the ``Project`` class: + +.. code-block:: python + + from asimov.project import Project + + # Create a new project + project = Project( + name="My Project", + location="/path/to/project" + ) + +This creates the same directory structure and configuration files as the ``asimov init`` command. + +Working with Projects +--------------------- + +The ``Project`` class provides a context manager interface that ensures the project ledger is properly saved after making changes: + +.. code-block:: python + + from asimov.project import Project + + # Create a new project (see "Loading an Existing Project" below for loading existing projects) + project = Project("My Project", location="/path/to/project") + + # Use the context manager to make changes + with project: + # Add a subject (event) to the project + subject = project.add_subject(name="GW150914") + + # Add an analysis to the subject + from asimov.analysis import GravitationalWaveTransient + + production = GravitationalWaveTransient( + subject=subject, + name="bilby_production", + pipeline="bilby", + status="ready", + ledger=project.ledger + ) + + subject.add_production(production) + # The ledger will be updated when exiting the context manager + project.ledger.update_event(subject) + + # When the context exits, changes are automatically saved + +Loading an Existing Project +---------------------------- + +You can load an existing asimov project using the ``Project.load()`` class method: + +.. code-block:: python + + from asimov.project import Project + + # Load an existing project + project = Project.load("/path/to/existing/project") + + # Access events in the project + events = project.get_event() + for event in events: + print(f"Event: {event.name}") + for production in event.productions: + print(f" - {production.name}: {production.status}") + +Adding Multiple Subjects +------------------------- + +You can add multiple subjects to a project within the same context: + +.. code-block:: python + + from asimov.project import Project + + project = Project("Multi-Event Project", location="/path/to/project") + + with project: + # Add multiple events + gw150914 = project.add_subject(name="GW150914") + gw151012 = project.add_subject(name="GW151012") + gw151226 = project.add_subject(name="GW151226") + +Accessing the Ledger +--------------------- + +The project's ledger can be accessed through the ``ledger`` property: + +.. code-block:: python + + project = Project.load("/path/to/project") + + # Access the ledger + ledger = project.ledger + + # Get all events + all_events = ledger.get_event() + + # Get a specific event + specific_event = ledger.get_event("GW150914") + +Complete Example +---------------- + +Here's a complete example showing how to create a project, add events, and configure analyses: + +.. code-block:: python + + from asimov.project import Project + from asimov.analysis import GravitationalWaveTransient + + # Create a new project + project = Project( + name="GWTC-1 Reanalysis", + location="/data/projects/gwtc1" + ) + + with project: + # Add events from GWTC-1 + for event_name in ["GW150914", "GW151012", "GW151226"]: + subject = project.add_subject(name=event_name) + + # Add a Bilby analysis + bilby_prod = GravitationalWaveTransient( + subject=subject, + name=f"{event_name}_bilby", + pipeline="bilby", + status="ready", + ledger=project.ledger + ) + subject.add_production(bilby_prod) + project.ledger.update_event(subject) + + # After exiting the context, all changes are saved + print(f"Project created with {len(project.get_event())} events") + +Context Manager Benefits +------------------------- + +The context manager approach ensures that: + +1. **Transactional Updates**: Changes to the ledger are grouped together and saved atomically +2. **Automatic Saving**: You don't need to manually call ``save()`` on the ledger +3. **Clean Resource Management**: The project directory is properly managed during operations +4. **Error Handling**: If an error occurs, the ledger is not saved, preventing partial updates + +API Reference +------------- + +Project Class +~~~~~~~~~~~~~ + +.. autoclass:: asimov.project.Project + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/scheduler-integration.rst b/docs/source/scheduler-integration.rst new file mode 100644 index 00000000..e24d2f28 --- /dev/null +++ b/docs/source/scheduler-integration.rst @@ -0,0 +1,131 @@ +Scheduler Integration Guide +============================ + +This guide explains how to use the scheduler abstraction in asimov pipelines and other components. + +Overview +-------- + +Asimov now includes a scheduler abstraction layer that provides a uniform interface for +interacting with different job schedulers (HTCondor, Slurm, etc.). This reduces code +duplication and makes it easier to switch between schedulers. + +Using the Scheduler in Pipelines +--------------------------------- + +All Pipeline objects now have a ``scheduler`` property that provides access to the configured +scheduler instance. This can be used for custom job submissions within pipeline methods. + +Example +~~~~~~~ + +.. code-block:: python + + from asimov.pipeline import Pipeline + from asimov.scheduler import JobDescription + + class MyPipeline(Pipeline): + def submit_custom_job(self): + """Submit a custom job using the scheduler.""" + + # The scheduler is automatically available + job = JobDescription( + executable="/path/to/script", + output="output.log", + error="error.log", + log="job.log", + cpus=4, + memory="8GB" + ) + + cluster_id = self.scheduler.submit(job) + self.logger.info(f"Submitted job with cluster ID: {cluster_id}") + return cluster_id + +DAG Submission +-------------- + +DAG submission (via ``submit_dag`` methods) now uses the scheduler API. For HTCondor backends, +this wraps the Python bindings (e.g., ``htcondor.Submit.from_dag()``) rather than calling +``condor_submit_dag`` directly. The scheduler property remains available in these methods for +any additional, non-DAG job submissions that may be needed. + +Using the Scheduler in CLI Commands +------------------------------------ + +The monitor loop and other CLI commands can use the scheduler API directly: + +.. code-block:: python + + from asimov.scheduler_utils import get_configured_scheduler, create_job_from_dict + + # Get the configured scheduler + scheduler = get_configured_scheduler() + + # Submit a job using a dictionary + job_dict = { + "executable": "/bin/echo", + "output": "out.log", + "error": "err.log", + "log": "job.log", + "request_cpus": "1", + "request_memory": "1GB" + } + + job = create_job_from_dict(job_dict) + cluster_id = scheduler.submit(job) + +The ``asimov monitor start`` and ``asimov monitor stop`` commands now support the +``--use-scheduler-api`` flag to use the new scheduler API directly: + +.. code-block:: bash + + # Use the new scheduler API + asimov monitor start --use-scheduler-api + + # Use the legacy interface (default) + asimov monitor start + +Backward Compatibility +---------------------- + +The existing ``asimov.condor`` module continues to work unchanged. Functions like +``condor.submit_job()`` and ``condor.delete_job()`` now use the scheduler API internally +while maintaining full backward compatibility. + +This means existing code continues to work without modification: + +.. code-block:: python + + from asimov import condor + + # This still works and uses the scheduler internally + cluster = condor.submit_job(submit_description) + condor.delete_job(cluster) + +Configuration +------------- + +You can configure the scheduler in your ``asimov.conf`` file: + +.. code-block:: ini + + [scheduler] + type = htcondor + + [condor] + scheduler = my-schedd.example.com # Optional: specific schedd + +Future Schedulers +----------------- + +When Slurm or other schedulers are fully implemented, you'll be able to switch by +simply changing the configuration: + +.. code-block:: ini + + [scheduler] + type = slurm + +All code using the scheduler API will automatically use the new scheduler without +requiring any code changes. diff --git a/docs/source/test-interface.rst b/docs/source/test-interface.rst index aeb819b3..a82d979e 100644 --- a/docs/source/test-interface.rst +++ b/docs/source/test-interface.rst @@ -25,7 +25,7 @@ Example: Checking prior files with self.subTest(event=event.title, production=production.name): repo = event.event_object.repository.directory try: - with open(f"{repo}/C01_offline/{production.name}.prior", "r") as priorfile: + with open(f"{repo}/analyses/{production.name}.prior", "r") as priorfile: self.assertFalse("name='chirp_mass', minimum=7.932707, maximum=14.759644" in priorfile.read()) except FileNotFoundError: pass diff --git a/docs/source/user-guide/running.rst b/docs/source/user-guide/running.rst index 15a30309..ad5a9509 100644 --- a/docs/source/user-guide/running.rst +++ b/docs/source/user-guide/running.rst @@ -20,7 +20,7 @@ For example running in the root directory of an asimov project, ● Working on GW150914_095045 Working on production Prod0 - Prod0 C01_offline checkouts/GW150914_095045 + Prod0 analyses checkouts/GW150914_095045 Production config Prod0 created. We can see that an analysis called ``Prod0`` has been build, and the configuration file required for the submission step has been created. @@ -40,7 +40,7 @@ For example $ asimov manage submit - Prod0 C01_offline checkouts/GW150914_095045 + Prod0 analyses checkouts/GW150914_095045 ● Submitted GW150914_095045/Prod0 You can also run the ``build`` and ``submit`` stages in a single command: diff --git a/examples/MODAL_DEPENDENCIES_GUIDE.md b/examples/MODAL_DEPENDENCIES_GUIDE.md new file mode 100644 index 00000000..a1db6a93 --- /dev/null +++ b/examples/MODAL_DEPENDENCIES_GUIDE.md @@ -0,0 +1,109 @@ +# Modal Dependencies Feature - Visual Guide + +## How It Works + +When you click on an analysis node in the workflow graph, a modal popup appears with detailed information about the analysis. With the new update, this modal now includes a **Dependencies** section. + +## Example Modal Content + +``` +┌─────────────────────────────────────────────────────────┐ +│ Analysis Details × │ +├─────────────────────────────────────────────────────────┤ +│ │ +│ Status │ +│ ┌─────────┐ │ +│ │ ready │ │ +│ └─────────┘ │ +│ │ +│ Pipeline │ +│ bilby │ +│ │ +│ Comment │ +│ Combines IMRPhenomXPHM and SEOBNRv5PHM results │ +│ │ +│ Run Directory │ +│ /path/to/combiner/run │ +│ │ +│ Dependencies ← NEW! │ +│ IMRPhenomXPHM-PE, SEOBNRv5PHM-PE │ +│ │ +└─────────────────────────────────────────────────────────┘ +``` + +## What Gets Displayed + +### For analyses WITH dependencies: +- Shows comma-separated list of dependency names +- Example: `IMRPhenomXPHM-PE, SEOBNRv5PHM-PE` +- Example: `BayesWave-PSD` + +### For analyses WITHOUT dependencies: +- Shows: `None` + +## Property-Based Dependencies + +The dependencies shown are the **resolved** dependencies - the actual analysis names that match the property-based queries. + +### Example 1: Simple name dependency +```yaml +needs: + - BayesWave-PSD +``` +Modal shows: `BayesWave-PSD` + +### Example 2: Property-based OR dependencies +```yaml +needs: + - waveform.approximant: IMRPhenomXPHM + - waveform.approximant: SEOBNRv5PHM +``` +Modal shows: `IMRPhenomXPHM-PE, SEOBNRv5PHM-PE` + +### Example 3: Property-based AND dependencies +```yaml +needs: + - - pipeline: bayeswave + - review.status: approved +``` +Modal shows: `Approved-BayesWave-1, Approved-BayesWave-2` (all approved bayeswave analyses) + +## Implementation Details + +### Data Flow + +1. **Event HTML Generation** (`event.py`) + - Calls `update_graph()` to ensure edges are current + - For each node, evaluates `node.dependencies` + - Stores as `data-dependencies` attribute in hidden div + +2. **Modal Population** (`report.py`) + - JavaScript reads `data-dependencies` from hidden div + - Populates modal section with dependency names + - Shows/hides section based on presence of data + +3. **User Experience** + - Click analysis node → Modal opens + - Dependencies section automatically populated + - Clear indication of what this analysis depends on + +## Technical Changes + +### Files Modified + +**asimov/event.py:** +- Added `update_graph()` method to rebuild edges dynamically +- Enhanced HTML generation to include dependencies data +- Calls `update_graph()` before using graph in `html()` and `get_all_latest()` + +**asimov/cli/report.py:** +- Added dependencies section to modal HTML structure +- Updated JavaScript to populate dependencies from data attribute +- Section always shows (displays "None" if no dependencies) + +## Benefits + +1. **Transparency**: Users can see exactly what an analysis depends on +2. **Debugging**: Quickly identify dependency relationships +3. **Property-based clarity**: See resolved names from property queries +4. **Graph consistency**: Connections in graph match displayed dependencies diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..8da4f7b7 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,55 @@ +# Asimov Dependency Management Examples + +This directory contains example blueprint files demonstrating the new flexible dependency management features. + +## Features Demonstrated + +### dependency-examples.yaml + +This file shows examples of: + +1. **Simple name-based dependencies** (backward compatible) + - Traditional style: `needs: - generate-psds` + +2. **Property-based dependencies** + - Filter by any property: `needs: - pipeline: bayeswave` + - Nested properties: `needs: - waveform.approximant: IMRPhenomXPHM` + +3. **OR logic** + - Multiple conditions are OR'd together by default + - An analysis depends on anything matching ANY condition + +4. **AND logic** + - Use nested lists for AND conditions + - Example: `needs: - - pipeline: bayeswave` + ` - status: finished` + - All conditions in the nested list must match + +5. **Negation** + - Prefix values with `!` to negate + - Example: `needs: - pipeline: "!bayeswave"` + - Matches everything except the specified value + +6. **Complex combinations** + - Mix AND and OR logic + - Nested lists are AND'd internally, then OR'd with other items + +7. **Staleness tracking** + - Dependencies are recorded when an analysis runs + - If matching analyses change, the analysis is marked as stale + +8. **Auto-refresh** + - Set `refreshable: true` to auto-refresh stale analyses + - Indicated differently in the HTML report + +## Using These Examples + +To use these examples in your own project: + +1. Copy the relevant sections to your blueprint files +2. Modify the property names and values to match your needs +3. Apply the blueprint with `asimov apply -f your-blueprint.yaml` + +## More Information + +See the main documentation at `docs/source/blueprints.rst` for complete details on the dependency syntax. diff --git a/examples/README.rst b/examples/README.rst new file mode 100644 index 00000000..1c560093 --- /dev/null +++ b/examples/README.rst @@ -0,0 +1,24 @@ +Examples +======== + +This directory contains example scripts demonstrating the usage of asimov features. + +Scheduler Examples +------------------ + +``scheduler_example.py`` + Demonstrates how to use the asimov scheduler module to submit, query, and delete jobs. + + Usage:: + + python scheduler_example.py + + This example shows: + + - Creating a scheduler instance using the factory function + - Creating a JobDescription object + - Submitting a job to HTCondor + - Querying job status + - Deleting a job (commented out by default) + + Note: This example requires a working HTCondor installation and configuration. diff --git a/examples/demo_graph_fix.py b/examples/demo_graph_fix.py new file mode 100755 index 00000000..87fd96f6 --- /dev/null +++ b/examples/demo_graph_fix.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +""" +Visual demonstration of the graph connection fix and modal dependencies feature. +""" + +from unittest.mock import Mock +from asimov.event import Event +from asimov.analysis import Analysis + + +class DemoAnalysis(Analysis): + """Demo analysis for testing.""" + + def __init__(self, name, needs=None, **kwargs): + self.name = name + self.meta = kwargs.get('meta', {}) + self._needs = needs or [] + self.event = None + self.subject = None + self.status_str = kwargs.get('status', 'finished') + self._reviews = Mock() + self._reviews.status = kwargs.get('review_status', 'none') + self._reviews.__len__ = Mock(return_value=0) + self.pipeline = Mock() + self.pipeline.name = kwargs.get('pipeline', 'bilby') + self.pipeline.html = Mock(return_value='') + self.comment = kwargs.get('comment', None) + + @property + def review(self): + return self._reviews + + @property + def finished(self): + return self.status_str == 'finished' + + +def main(): + print("=" * 80) + print("GRAPH CONNECTION FIX DEMONSTRATION") + print("=" * 80) + print() + + # Create event + event_data = {'name': 'GW150914', 'productions': []} + event = Event(**event_data) + event.meta = {} + + # Add analyses in sequence + print("Step 1: Adding BayesWave PSD analysis") + a1 = DemoAnalysis('BayesWave-PSD', + pipeline='bayeswave', + status='finished', + meta={'pipeline': 'bayeswave'}) + a1.event = event + a1.subject = event + event.add_production(a1) + print(f" Added: {a1.name}") + + print("\nStep 2: Adding Bilby PE analyses with waveform approximants") + a2 = DemoAnalysis('IMRPhenomXPHM-PE', + pipeline='bilby', + status='finished', + meta={'pipeline': 'bilby', + 'waveform': {'approximant': 'IMRPhenomXPHM'}}) + a2.event = event + a2.subject = event + event.add_production(a2) + print(f" Added: {a2.name} (waveform.approximant: IMRPhenomXPHM)") + + a3 = DemoAnalysis('SEOBNRv5PHM-PE', + pipeline='bilby', + status='finished', + meta={'pipeline': 'bilby', + 'waveform': {'approximant': 'SEOBNRv5PHM'}}) + a3.event = event + a3.subject = event + event.add_production(a3) + print(f" Added: {a3.name} (waveform.approximant: SEOBNRv5PHM)") + + print("\nStep 3: Adding Combiner with property-based dependency") + combiner = DemoAnalysis('Combiner', + pipeline='bilby', + status='ready', + comment='Combines IMRPhenomXPHM and SEOBNRv5PHM results', + needs=['waveform.approximant: IMRPhenomXPHM', + 'waveform.approximant: SEOBNRv5PHM']) + combiner.event = event + combiner.subject = event + event.add_production(combiner) + print(f" Added: {combiner.name}") + print(f" Dependencies (property-based): {combiner._needs}") + + # Show resolved dependencies + print("\n" + "=" * 80) + print("DEPENDENCY RESOLUTION") + print("=" * 80) + print(f"\n{combiner.name} dependencies resolved to:") + for dep in combiner.dependencies: + print(f" - {dep}") + + # Show graph edges BEFORE update + print("\n" + "=" * 80) + print("GRAPH EDGES (before update_graph)") + print("=" * 80) + edges = list(event.graph.edges()) + if edges: + for src, dst in edges: + print(f" {src.name} -> {dst.name}") + else: + print(" No edges") + + # Call update_graph + print("\nCalling update_graph()...") + event.update_graph() + + # Show graph edges AFTER update + print("\n" + "=" * 80) + print("GRAPH EDGES (after update_graph)") + print("=" * 80) + edges = list(event.graph.edges()) + if edges: + for src, dst in edges: + print(f" {src.name} -> {dst.name}") + else: + print(" No edges") + + # Verify graph connections + import networkx as nx + print("\n" + "=" * 80) + print("GRAPH VERIFICATION") + print("=" * 80) + + predecessors = list(event.graph.predecessors(combiner)) + print(f"\nPredecessors of {combiner.name} in graph:") + for pred in predecessors: + print(f" - {pred.name}") + + expected = set(['IMRPhenomXPHM-PE', 'SEOBNRv5PHM-PE']) + actual = set([p.name for p in predecessors]) + + if expected == actual: + print("\n✅ PASS: Graph correctly connects property-based dependencies!") + else: + print(f"\n❌ FAIL: Expected {expected}, got {actual}") + + # Check HTML generation + print("\n" + "=" * 80) + print("MODAL DEPENDENCIES DATA") + print("=" * 80) + + html = event.html() + + # Extract dependencies data from HTML + import re + dep_pattern = r'data-dependencies="([^"]*)"' + matches = re.findall(dep_pattern, html) + + print(f"\nDependencies in HTML data attributes:") + for i, match in enumerate(matches): + if match: + print(f" Analysis {i+1}: {match}") + else: + print(f" Analysis {i+1}: None") + + # Check for Combiner specifically + combiner_dep_pattern = rf'id="analysis-data-{combiner.name}"[^>]*data-dependencies="([^"]*)"' + combiner_match = re.search(combiner_dep_pattern, html) + + if combiner_match: + deps_str = combiner_match.group(1) + print(f"\n{combiner.name} dependencies in modal: {deps_str}") + + if 'IMRPhenomXPHM-PE' in deps_str and 'SEOBNRv5PHM-PE' in deps_str: + print("✅ PASS: Modal includes correct dependencies!") + else: + print("❌ FAIL: Modal dependencies incorrect") + else: + print(f"❌ FAIL: Could not find dependencies data for {combiner.name}") + + print("\n" + "=" * 80) + print("SUMMARY") + print("=" * 80) + print("\n✅ Property-based dependencies resolve correctly") + print("✅ Graph connections update dynamically with update_graph()") + print("✅ Dependencies appear in modal data attributes") + print("✅ HTML report will display dependencies in analysis details popup") + print() + + +if __name__ == '__main__': + main() diff --git a/examples/demo_html_output.py b/examples/demo_html_output.py new file mode 100755 index 00000000..5b95391c --- /dev/null +++ b/examples/demo_html_output.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python +""" +Demonstration script showing HTML output for analyses with new dependency features. +This generates sample HTML to show how the new dependency indicators appear. +""" + +from unittest.mock import Mock +from asimov.analysis import Analysis + + +class DemoAnalysis(Analysis): + """Demo analysis for HTML generation.""" + + def __init__(self, name, **kwargs): + self.name = name + self.meta = kwargs.get('meta', {}) + self._needs = kwargs.get('needs', []) + self.event = kwargs.get('event', None) + self.subject = self.event # Add subject attribute + self.status_str = kwargs.get('status', 'ready') + self.comment = kwargs.get('comment', None) + self._reviews = Mock() + self._reviews.status = kwargs.get('review_status', 'none') + self._reviews.__len__ = Mock(return_value=0) + + # Set pipeline + self.pipeline = Mock() + self.pipeline.name = kwargs.get('pipeline', 'bilby') + self.pipeline.html = Mock(return_value='') + + self.rundir = kwargs.get('rundir', None) + + @property + def review(self): + return self._reviews + + +def generate_demo_html(): + """Generate sample HTML showing the new features.""" + + # Create mock event + event = Mock() + event.meta = {} # Add meta dict to event + + # Create sample analyses + analyses = [ + DemoAnalysis('BayesWave-PSD', + pipeline='bayeswave', + status='finished', + event=event, + meta={'pipeline': 'bayeswave'}), + DemoAnalysis('IMRPhenomXPHM-PE', + pipeline='bilby', + status='finished', + event=event, + meta={'pipeline': 'bilby', 'waveform': {'approximant': 'IMRPhenomXPHM'}}), + DemoAnalysis('SEOBNRv5PHM-PE', + pipeline='bilby', + status='finished', + event=event, + meta={'pipeline': 'bilby', 'waveform': {'approximant': 'SEOBNRv5PHM'}}), + ] + + event.analyses = analyses + + # Create an analysis with dependencies + combiner = DemoAnalysis('Combiner', + pipeline='bilby', + status='ready', + event=event, + comment='Combines multiple PE analyses', + needs=['waveform.approximant: IMRPhenomXPHM', + 'waveform.approximant: SEOBNRv5PHM'], + rundir='/path/to/combiner') + + # Create a stale analysis + stale_analysis = DemoAnalysis('Stale-Analysis', + pipeline='bilby', + status='finished', + event=event, + needs=['pipeline: bayeswave'], + rundir=None, + meta={'resolved_dependencies': ['BayesWave-PSD']}) + + # Manually set resolved dependencies to make it stale + stale_analysis.meta['resolved_dependencies'] = ['BayesWave-PSD'] + # Current dependencies would be different if we add more bayeswave analyses + + print("=" * 80) + print("HTML OUTPUT DEMONSTRATION") + print("=" * 80) + print() + + print("1. Analysis with dependencies shown:") + print("-" * 80) + print(combiner.html()) + print() + + print("\n2. Stale analysis (dependencies changed):") + print("-" * 80) + # Manually show what stale would look like + print("
") + print('Stale') + print("

Stale-Analysis

") + print('

') + print(' finished') + print('

') + print('

Pipeline: bilby

') + print('▶ Show details') + print('
') + print('

Current Dependencies:
BayesWave-PSD, NewBayesWaveAnalysis

') + print('

Resolved Dependencies (when run):
BayesWave-PSD

') + print('
') + print('
') + print() + + print("\n3. Refreshable stale analysis:") + print("-" * 80) + print("
") + print('Stale (will refresh)') + print("

Auto-Refresh

") + print('

') + print(' finished') + print('

') + print('

Pipeline: bilby

') + print('
') + print() + + print("\n" + "=" * 80) + print("CSS STYLES FOR NEW FEATURES") + print("=" * 80) + print(""" +.stale-indicator { + display: inline-block; + padding: 0.25rem 0.5rem; + border-radius: 0.25rem; + font-size: 0.75rem; + font-weight: 600; + margin-left: 0.5rem; + text-transform: uppercase; +} + +.stale-indicator.stale { + background-color: #ffeaa7; + color: #d63031; + border: 1px solid #fdcb6e; +} + +.stale-indicator.stale-refreshable { + background-color: #74b9ff; + color: #0984e3; + border: 1px solid #0984e3; +} + +.asimov-dependencies, +.asimov-resolved-dependencies { + font-size: 0.9rem; + color: #586069; + margin: 0.5rem 0; + padding: 0.5rem; + background: #f6f8fa; + border-radius: 0.25rem; +} + +.asimov-resolved-dependencies { + background: #fff3cd; + border-left: 3px solid #ffc107; +} + """) + + +if __name__ == '__main__': + generate_demo_html() diff --git a/examples/dependency-examples.yaml b/examples/dependency-examples.yaml new file mode 100644 index 00000000..931ff63f --- /dev/null +++ b/examples/dependency-examples.yaml @@ -0,0 +1,70 @@ +--- +# Example blueprint demonstrating the new dependency management features + +# Simple name-based dependency (backward compatible) +kind: analysis +name: generate-psds +pipeline: bayeswave +status: finished + +--- +# Property-based dependency +kind: analysis +name: pe-analysis-1 +pipeline: bilby +waveform: + approximant: IMRPhenomXPHM +status: finished +needs: + - pipeline: bayeswave + +--- +# Multiple OR dependencies +kind: analysis +name: combiner +pipeline: bilby +status: ready +needs: + - waveform.approximant: IMRPhenomXPHM + - pipeline: bayeswave + +--- +# AND logic using nested list +kind: analysis +name: approved-bayeswave +pipeline: bilby +status: ready +needs: + - - pipeline: bayeswave + - review.status: approved + +--- +# Negation example +kind: analysis +name: non-bayeswave-combiner +pipeline: bilby +status: ready +needs: + - pipeline: "!bayeswave" + +--- +# Complex AND/OR combination +kind: analysis +name: complex-analysis +pipeline: bilby +status: ready +refreshable: true +needs: + - - pipeline: bayeswave + - status: finished + - waveform.approximant: IMRPhenomXPHM + +--- +# Analysis with refreshable flag set +kind: analysis +name: auto-refresh +pipeline: bilby +status: ready +refreshable: true +needs: + - review.status: approved diff --git a/examples/pesummary_subject_analysis.yaml b/examples/pesummary_subject_analysis.yaml new file mode 100644 index 00000000..2aa86c04 --- /dev/null +++ b/examples/pesummary_subject_analysis.yaml @@ -0,0 +1,41 @@ +# PESummary SubjectAnalysis Example +# +# This blueprint demonstrates how to create a PESummary SubjectAnalysis +# that combines results from multiple parameter estimation analyses. +# +# The PESummary analysis will: +# 1. Wait for bilby analyses to complete +# 2. Automatically combine their results into a single summary page +# 3. Be marked as refreshable so it updates when new analyses finish +--- +kind: analysis +name: CombinedPESummary +pipeline: pesummary +# Use 'analyses' to specify which analyses to combine +analyses: + - pipeline: bilby # All bilby analyses +# Mark as refreshable to auto-update when dependencies change +refreshable: true +status: ready +--- +# Example with optional dependencies +# This will combine bilby results, and also include RIFT if available +kind: analysis +name: CombinedPESummaryWithOptional +pipeline: pesummary +analyses: + - pipeline: bilby # Required + - optional: true # Optional dependency + pipeline: rift +refreshable: true +status: ready +--- +# Example combining specific analyses by status +kind: analysis +name: ApprovedPESummary +pipeline: pesummary +analyses: + - - review: approved # Only approved analyses + - pipeline: bilby # That use bilby (AND logic) +refreshable: true +status: ready diff --git a/examples/scheduler_example.py b/examples/scheduler_example.py new file mode 100644 index 00000000..3a79da5f --- /dev/null +++ b/examples/scheduler_example.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +""" +Example script demonstrating the use of the asimov scheduler module. + +This script shows how to: +1. Create a scheduler instance +2. Create a job description +3. Submit a job +4. Query job status +5. Delete a job +""" + +from asimov.scheduler import get_scheduler, JobDescription + +def main(): + # Example 1: Using the factory function + print("Creating HTCondor scheduler...") + scheduler = get_scheduler("htcondor") + + # Example 2: Create a job description + print("\nCreating job description...") + job = JobDescription( + executable="/bin/echo", + output="echo.out", + error="echo.err", + log="echo.log", + arguments="Hello from asimov scheduler!", + cpus=1, + memory="1GB", + disk="1GB", + universe="vanilla" + ) + + # Example 3: Submit the job + print("\nSubmitting job...") + try: + cluster_id = scheduler.submit(job) + print(f"Job submitted successfully with cluster ID: {cluster_id}") + except Exception as e: + print(f"Failed to submit job: {e}") + return + + # Example 4: Query job status + print("\nQuerying job status...") + try: + status = scheduler.query(cluster_id) + print(f"Job status: {status}") + except Exception as e: + print(f"Failed to query job: {e}") + + # Example 5: Delete the job (optional) + # To delete the job in your own code, you can call: + # scheduler.delete(cluster_id) + # and handle any exceptions as appropriate for your application. + # + # This example script leaves the deletion disabled so that + # submitted jobs remain available for inspection. + + print("\nExample completed!") + +if __name__ == "__main__": + main() diff --git a/examples/strategy_examples.yaml b/examples/strategy_examples.yaml new file mode 100644 index 00000000..e9ef20c8 --- /dev/null +++ b/examples/strategy_examples.yaml @@ -0,0 +1,106 @@ +# Example blueprint demonstrating strategy functionality +# +# This file shows how to use strategies to create multiple similar analyses +# with parameter variations, similar to GitHub Actions matrix strategies. +# +# IMPORTANT: Parameter names in strategies must match the exact keys used in +# the pipeline configuration templates. For bilby, see asimov/configs/bilby.ini +# to find the correct parameter paths. Keys with spaces are supported +# (e.g., "waveform.reference frequency"). + +# First, define an event +--- +kind: event +name: GW150914_095045 +event time: 1126259462.391 + +# Example 1: Single parameter strategy +# This creates 3 analyses, one for each waveform approximant +--- +kind: analysis +name: bilby-{waveform.approximant} +event: GW150914_095045 +pipeline: bilby +comment: Testing multiple waveform approximants +needs: + - generate-psd +likelihood: + sample rate: 4096 +strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + - IMRPhenomD + +# Example 2: Multi-parameter matrix strategy +# This creates 6 analyses (3 waveforms × 2 samplers) +--- +kind: analysis +name: pe-{waveform.approximant}-{sampler.sampler} +event: GW150914_095045 +pipeline: bilby +comment: Systematic comparison of waveforms and samplers +needs: + - generate-psd +likelihood: + sample rate: 4096 + psd length: 4 +strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + - IMRPhenomD + sampler.sampler: + - dynesty + - emcee + +# Example 3: Strategy with numeric parameters +# This creates 3 analyses with different reference frequencies +--- +kind: analysis +name: bilby-fref-{waveform.reference frequency} +event: GW150914_095045 +pipeline: bilby +comment: Testing different reference frequencies +strategy: + waveform.reference frequency: + - 20 + - 50 + - 100 + +# Example 4: Strategy with nested parameters +# This creates 2 analyses with distance marginalization on/off +--- +kind: analysis +name: bilby-margdist-{likelihood.marginalization.distance} +event: GW150914_095045 +pipeline: bilby +comment: Testing distance marginalization +strategy: + likelihood.marginalization.distance: + - true + - false + +# The above strategies would create the following analyses: +# +# From Example 1: +# - bilby-IMRPhenomXPHM +# - bilby-SEOBNRv4PHM +# - bilby-IMRPhenomD +# +# From Example 2: +# - pe-IMRPhenomXPHM-dynesty +# - pe-IMRPhenomXPHM-emcee +# - pe-SEOBNRv4PHM-dynesty +# - pe-SEOBNRv4PHM-emcee +# - pe-IMRPhenomD-dynesty +# - pe-IMRPhenomD-emcee +# +# From Example 3: +# - bilby-fref-20 +# - bilby-fref-50 +# - bilby-fref-100 +# +# From Example 4: +# - bilby-margdist-true +# - bilby-margdist-false diff --git a/pyproject.toml b/pyproject.toml index b8a11bfe..34b1bea7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ authors = [ ] description = "A Python package for managing and interacting with data analysis jobs." readme = "README.md" -license = "MIT" +license = { file = "LICENSE" } classifiers = [ "Programming Language :: Python :: 3", "Operating System :: OS Independent", @@ -49,7 +49,8 @@ dependencies = [ "igwn-auth-utils>=0.2.1", "flask", "tinydb", - "pillow>=10.2.0" + "pillow>=10.2.0", + "pydantic>=2.0.0" ] [project.optional-dependencies] @@ -61,8 +62,14 @@ docs = [ "sphinxcontrib-httpdomain", "sphinxcontrib-httpexample", "sphinx-jsonschema", - "sphinx-multiversion" + "sphinx-multiversion", + "sphinxcontrib.autodoc_pydantic", ] +bilby = [ + "bilby_pipe", + "bilby", +] +testing = [] [project.scripts] olivaw = "asimov.olivaw:olivaw" @@ -76,4 +83,13 @@ locutus = "asimov.locutus:cli" include-package-data = true packages = [ "asimov", -] \ No newline at end of file + "asimov.pipelines", + "asimov.pipelines.testing", + "asimov.cli", + "asimov.configs" +] + +[project.entry-points."asimov.pipelines"] +simpletestpipeline = "asimov.pipelines.testing:SimpleTestPipeline" +subjecttestpipeline = "asimov.pipelines.testing:SubjectTestPipeline" +projecttestpipeline = "asimov.pipelines.testing:ProjectTestPipeline" diff --git a/sandbox/gitlab-issue-tests.py b/sandbox/gitlab-issue-tests.py index aed91450..de30c01f 100644 --- a/sandbox/gitlab-issue-tests.py +++ b/sandbox/gitlab-issue-tests.py @@ -44,17 +44,17 @@ def start_dag(event, repo, prod, psd_prod="Prod0"): psds_dict = get_psds_rundir(event.data[f'{psd_prod}_rundir']) try: - out = repo.build_dag("C01_offline", prod, psds_dict) + repo.build_dag("analyses", prod, psds_dict) status = "DAG ready" except ValueError as e: status = "ini error" print(e) try: - cluster = repo.submit_dag("C01_offline", prod) + cluster = repo.submit_dag("analyses", prod) job = condor.CondorJob(cluster) event.data[prod] = cluster - event.data[f"{prod}_rundir"] = f"/home/daniel.williams/events/O3/o3a_catalog/{event.title}/C01_offline/"+job.run_directory + event.data[f"{prod}_rundir"] = f"/home/daniel.williams/events/O3/o3a_catalog/{event.title}/analyses/"+job.run_directory event.update_data() event.state = "Productions running" except ValueError as e: @@ -114,7 +114,7 @@ def start_dag(event, repo, prod, psd_prod="Prod0"): try: - event_prods = repo.find_prods("C01_offline") + event_prods = repo.find_prods("analyses") except: print(f"No C01 runs in this repository") continue diff --git a/sandbox/make-nonspin.py b/sandbox/make-nonspin.py index 56ad1684..c8c1b0bd 100644 --- a/sandbox/make-nonspin.py +++ b/sandbox/make-nonspin.py @@ -44,7 +44,8 @@ def get_psds_rundir(rundir): print(f"{event.title} missing from the uberrepo") continue - repo.repo.git.checkout("master") + default_branch = repo.get_default_branch() + repo.repo.git.checkout(default_branch) repo.repo.git.pull() try: diff --git a/scripts/check-ifo.py b/scripts/check-ifo.py index b8736241..58f0b786 100644 --- a/scripts/check-ifo.py +++ b/scripts/check-ifo.py @@ -330,12 +330,12 @@ def calibration(event): for ifo, envelope in calibrations.items(): description = f"Added calibration {envelope} for {ifo}." try: - event.event_object.repository.add_file(os.path.join(f"/home/cal/public_html/uncertainty/O3C01/{ifo}", envelope), f"C01_offline/calibration/{ifo}.dat", + event.event_object.repository.add_file(os.path.join(f"/home/cal/public_html/uncertainty/O3C01/{ifo}", envelope), f"analyses/calibration/{ifo}.dat", commit_message=description) except GitCommandError as e: if "nothing to commit," in e.stderr: pass - calibrations[ifo] = f"C01_offline/calibration/{ifo}.dat" + calibrations[ifo] = f"analyses/calibration/{ifo}.dat" envelopes = yaml.dump({"calibration": calibrations}) event.add_note(CALIBRATION_NOTE.format(envelopes)) diff --git a/scripts/find_calibration.py b/scripts/find_calibration.py index 1a336cbb..f402dc0b 100644 --- a/scripts/find_calibration.py +++ b/scripts/find_calibration.py @@ -107,12 +107,12 @@ def calibration(event): for ifo, envelope in calibrations.items(): description = f"Added calibration {envelope} for {ifo}." try: - event.event_object.repository.add_file(os.path.join(f"/home/cal/public_html/uncertainty/O3C01/{ifo}", envelope), f"C01_offline/calibration/{ifo}.dat", + event.event_object.repository.add_file(os.path.join(f"/home/cal/public_html/uncertainty/O3C01/{ifo}", envelope), f"analyses/calibration/{ifo}.dat", commit_message=description) except GitCommandError as e: if "nothing to commit," in e.stderr: pass - calibrations[ifo] = f"C01_offline/calibration/{ifo}.dat" + calibrations[ifo] = f"analyses/calibration/{ifo}.dat" envelopes = yaml.dump({"calibration": calibrations}) event.add_note(CALIBRATION_NOTE.format(envelopes)) diff --git a/tests/blueprints.py b/tests/blueprints.py new file mode 100644 index 00000000..ee4f8f16 --- /dev/null +++ b/tests/blueprints.py @@ -0,0 +1,32 @@ +""" +Blueprint path constants for tests. + +Provides absolute paths to local blueprint files that are v0.7-compatible +(minimum frequency in 'waveform' section). These should be used instead of +the external asimov-data URLs so that unit tests are not dependent on network +access and do not fail due to upstream data lagging behind code changes. +""" +import os + +_BLUEPRINTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_data", "blueprints") + +DEFAULTS_PE = os.path.join(_BLUEPRINTS_DIR, "production-pe.yaml") +DEFAULTS_PE_PRIORS = os.path.join(_BLUEPRINTS_DIR, "production-pe-priors.yaml") + +EVENTS = { + "GW150914_095045": os.path.join(_BLUEPRINTS_DIR, "GW150914_095045.yaml"), + "GW190924_021846": os.path.join(_BLUEPRINTS_DIR, "GW190924_021846.yaml"), + "GW190929_012149": os.path.join(_BLUEPRINTS_DIR, "GW190929_012149.yaml"), + "GW191109_010717": os.path.join(_BLUEPRINTS_DIR, "GW191109_010717.yaml"), +} + +# GWTC-2.1 event blueprints +GWTC21_EVENTS = { + "GW150914_095045": os.path.join(_BLUEPRINTS_DIR, "gwtc-2-1", "GW150914_095045.yaml"), +} + +PIPELINES = { + "bilby": os.path.join(_BLUEPRINTS_DIR, "bilby.yaml"), + "bayeswave": os.path.join(_BLUEPRINTS_DIR, "bayeswave.yaml"), + "rift": os.path.join(_BLUEPRINTS_DIR, "rift.yaml"), +} diff --git a/tests/external_blueprint_compat.py b/tests/external_blueprint_compat.py new file mode 100644 index 00000000..83d1c412 --- /dev/null +++ b/tests/external_blueprint_compat.py @@ -0,0 +1,52 @@ +""" +External blueprint compatibility tests. + +These tests verify that asimov is compatible with the upstream asimov-data +blueprints. They are intentionally in a file that does NOT match the default +unittest discovery pattern (``test*.py``) so that ``python -m unittest discover +tests/`` does **not** run them automatically. + +They are run in a separate CI job (``test-external-blueprints``) that has +``continue-on-error: true`` to allow this job to fail if the external data lags +behind code changes. + +To run these tests manually:: + + python -m unittest tests.external_blueprint_compat +""" +import unittest + +from asimov.cli.application import apply_page +from asimov.testing import AsimovTestCase +from tests.test_specific_events import ( + EXTERNAL_DEFAULTS_URL, + EXTERNAL_TESTS_BASE_URL, + _GravitationalWaveEventsBase, +) + + +class TestGravitationalWaveEventsExternal(_GravitationalWaveEventsBase, AsimovTestCase): + """ + Tests using blueprints fetched directly from the external asimov-data + repository. These tests verify compatibility with the upstream data, but + are allowed to fail in CI because external data may lag behind code changes. + Run via the ``test-external-blueprints`` CI job with + ``continue-on-error: true``. + """ + + def _apply_defaults(self): + apply_page( + file=EXTERNAL_DEFAULTS_URL, + event=None, + ledger=self.ledger, + ) + + def _get_event_blueprint(self, event): + return f"{EXTERNAL_TESTS_BASE_URL}/{event}.yaml" + + def _get_pipeline_blueprint(self, pipeline): + return f"{EXTERNAL_TESTS_BASE_URL}/{pipeline}.yaml" + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/GW190426190642.yaml b/tests/integration/GW190426190642.yaml index e3aaa03b..0c249689 100644 --- a/tests/integration/GW190426190642.yaml +++ b/tests/integration/GW190426190642.yaml @@ -1,7 +1,7 @@ calibration: - H1: C01_offline/calibration/H1.dat - L1: C01_offline/calibration/L1.dat - V1: C01_offline/calibration/V1.dat + H1: analyses/calibration/H1.dat + L1: analyses/calibration/L1.dat + V1: analyses/calibration/V1.dat data: channels: H1: H1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01 @@ -57,8 +57,8 @@ productions: rundir: {wd}/ProdF6 psds: 1024: - H1: C01_offline/psds/1024/H1-psd.dat - L1: C01_offline/psds/1024/L1-psd.dat + H1: analyses/psds/1024/H1-psd.dat + L1: analyses/psds/1024/L1-psd.dat quality: high-frequency: 448 lower-frequency: diff --git a/tests/integration/test_gwtc2d1.py b/tests/integration/test_gwtc2d1.py index 197cb7f4..f0c860de 100644 --- a/tests/integration/test_gwtc2d1.py +++ b/tests/integration/test_gwtc2d1.py @@ -35,7 +35,7 @@ def test_commandline(self): production.make_config(f"{production.name}.ini") if production.pipeline.lower() in known_pipelines: try: - pipe = known_pipelines[production.pipeline.lower()](production, "C01_offline") + pipe = known_pipelines[production.pipeline.lower()](production, "analyses") pipe.clean() pipe.build_dag() except Exception as e: diff --git a/tests/manual_test_priors.py b/tests/manual_test_priors.py new file mode 100755 index 00000000..cf24d274 --- /dev/null +++ b/tests/manual_test_priors.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +""" +Manual integration test for the prior system. + +This script tests the prior system without requiring the full asimov environment. +It demonstrates that: +1. Priors can be validated +2. Prior interfaces work correctly +3. Backward compatibility is maintained +""" + +import sys +import os + +# Add the asimov module to the path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'asimov')) + +# Import outside the asimov directory to avoid logging.py conflict +os.chdir('/tmp') + +# Import the prior models directly +from asimov.priors import ( + PriorSpecification, + PriorDict, + Reparameterization +) +from asimov.pipelines.bilby import BilbyPriorInterface + +def test_basic_prior_spec(): + """Test basic prior specification.""" + print("Testing basic prior specification...") + prior = PriorSpecification(minimum=10, maximum=1000) + assert prior.minimum == 10 + assert prior.maximum == 1000 + print("✓ Basic prior specification works") + +def test_prior_dict_from_blueprint(): + """Test creating a PriorDict from blueprint data.""" + print("\nTesting PriorDict from blueprint...") + + # Simulate data from a blueprint + blueprint_data = { + "default": "BBHPriorDict", + "luminosity distance": { + "minimum": 10, + "maximum": 10000 + }, + "mass ratio": { + "minimum": 0.05, + "maximum": 1.0 + }, + "chirp mass": { + "minimum": 21.41, + "maximum": 41.97, + "type": "UniformInComponentsChirpMass" + } + } + + priors = PriorDict.from_dict(blueprint_data) + assert priors.default == "BBHPriorDict" + + # Get individual priors + lum_dist = priors.get_prior("luminosity distance") + assert lum_dist is not None + assert lum_dist.minimum == 10 + + mass_ratio = priors.get_prior("mass ratio") + assert mass_ratio is not None + assert mass_ratio.maximum == 1.0 + + print("✓ PriorDict from blueprint works") + +def test_bilby_interface(): + """Test the Bilby prior interface.""" + print("\nTesting Bilby prior interface...") + + blueprint_data = { + "default": "BBHPriorDict", + "luminosity distance": { + "minimum": 10, + "maximum": 1000, + "type": "PowerLaw", + "alpha": 2 + } + } + + interface = BilbyPriorInterface(blueprint_data) + result = interface.convert() + + assert result["default"] == "BBHPriorDict" + assert "luminosity distance" in result + assert result["luminosity distance"]["minimum"] == 10 + + default = interface.get_default_prior() + assert default == "BBHPriorDict" + + print("✓ Bilby prior interface works") + +def test_backward_compatibility(): + """Test that old blueprint formats still work.""" + print("\nTesting backward compatibility...") + + # Old format blueprint (from actual test data) + old_format = { + "amplitude order": 1, + "chirp mass": { + "maximum": 41.97447913941358, + "minimum": 21.418182160215295 + }, + "luminosity distance": { + "maximum": 10000, + "minimum": 10 + }, + "mass 1": { + "maximum": 1000, + "minimum": 1 + }, + "mass ratio": { + "maximum": 1.0, + "minimum": 0.05 + } + } + + # Should not raise exception + priors = PriorDict.from_dict(old_format) + result = priors.to_dict() + + # Verify structure is preserved + assert "chirp mass" in result + assert result["chirp mass"]["minimum"] == 21.418182160215295 + assert result["mass ratio"]["maximum"] == 1.0 + + print("✓ Backward compatibility maintained") + +def test_reparameterization(): + """Test reparameterization specification.""" + print("\nTesting reparameterization...") + + reparam = Reparameterization( + from_parameters=["mass_1", "mass_2"], + to_parameters=["chirp_mass", "mass_ratio"], + transform="mass_to_chirp_mass_ratio" + ) + + assert reparam.from_parameters == ["mass_1", "mass_2"] + assert reparam.to_parameters == ["chirp_mass", "mass_ratio"] + assert reparam.transform == "mass_to_chirp_mass_ratio" + + print("✓ Reparameterization works") + +def test_extra_fields(): + """Test that extra fields are allowed.""" + print("\nTesting extra fields...") + + prior = PriorSpecification( + minimum=10, + maximum=100, + custom_param="custom_value", + another_param=42 + ) + + # Extra fields should be stored + assert hasattr(prior, '__pydantic_extra__') + + print("✓ Extra fields are allowed") + +def main(): + """Run all tests.""" + print("=" * 60) + print("Running Prior System Integration Tests") + print("=" * 60) + + try: + test_basic_prior_spec() + test_prior_dict_from_blueprint() + test_bilby_interface() + test_backward_compatibility() + test_reparameterization() + test_extra_fields() + + print("\n" + "=" * 60) + print("All tests passed! ✓") + print("=" * 60) + return 0 + + except AssertionError as e: + print(f"\n✗ Test failed: {e}") + return 1 + except Exception as e: + print(f"\n✗ Unexpected error: {e}") + import traceback + traceback.print_exc() + return 1 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/mock_gwdatafind_server.py b/tests/mock_gwdatafind_server.py new file mode 100644 index 00000000..e7a45648 --- /dev/null +++ b/tests/mock_gwdatafind_server.py @@ -0,0 +1,139 @@ +""" +Mock gwdatafind server for testing. + +This module provides a simple HTTP server that implements the gwdatafind API +for testing purposes, without requiring the full gwdatafind-server package. +""" +import json +from http.server import HTTPServer, BaseHTTPRequestHandler +from urllib.parse import urlparse +import threading +import time + + +class MockGWDataFindHandler(BaseHTTPRequestHandler): + """ + HTTP request handler that implements a minimal gwdatafind API. + + This handler responds to gwdatafind queries with pre-configured + frame file URLs. + """ + + # Class variable to store frame configurations + frame_configs = {} + + def log_message(self, format, *args): + """Suppress HTTP server logging.""" + pass + + def do_GET(self): + """Handle GET requests for gwdatafind API.""" + parsed_path = urlparse(self.path) + path_parts = parsed_path.path.strip('/').split('/') + + # gwdatafind API format: /api/v1/gwf/{site}/{frametype}/{gpsstart},{gpsend}/{urltype}.json + if len(path_parts) >= 6 and path_parts[0] == 'api' and path_parts[2] == 'gwf': + site = path_parts[3] + frametype = path_parts[4] + + # Get frame URLs for this site/frametype combination + key = (site, frametype) + if key in self.frame_configs: + urls = self.frame_configs[key] + + # Return JSON response + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(urls).encode('utf-8')) + else: + # No frames configured + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps([]).encode('utf-8')) + else: + self.send_response(404) + self.end_headers() + + +class MockGWDataFindServer: + """ + A mock gwdatafind server for testing. + + This server implements the basic gwdatafind API to return frame file URLs + without requiring network access to real gwdatafind servers. + + Parameters + ---------- + host : str, optional + The host to bind to. Default is 'localhost'. + port : int, optional + The port to bind to. Default is 8765. + frame_configs : dict, optional + Dictionary mapping (site, frametype) tuples to lists of frame URLs. + + Examples + -------- + >>> server = MockGWDataFindServer(frame_configs={ + ... ('H', 'H1_HOFT_C02'): [ + ... 'file:///data/H-H1_HOFT_C02-1126256640-4096.gwf' + ... ] + ... }) + >>> server.start() + >>> # Use gwdatafind with host='localhost:8765' + >>> server.stop() + """ + + def __init__(self, host='localhost', port=8765, frame_configs=None): + self.host = host + self.port = port + self.frame_configs = frame_configs or {} + self.server = None + self.server_thread = None + + def start(self): + """Start the mock gwdatafind server in a background thread.""" + # Set the frame configurations on the handler class + MockGWDataFindHandler.frame_configs = self.frame_configs + + # Create and start the server + self.server = HTTPServer((self.host, self.port), MockGWDataFindHandler) + self.server_thread = threading.Thread(target=self.server.serve_forever, daemon=True) + self.server_thread.start() + + # Give the server a moment to start + time.sleep(0.1) + + def stop(self): + """Stop the mock gwdatafind server.""" + if self.server: + self.server.shutdown() + self.server.server_close() + if self.server_thread: + self.server_thread.join(timeout=1) + + def __enter__(self): + """Context manager entry.""" + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + self.stop() + + def add_frames(self, site, frametype, urls): + """ + Add frame URLs for a site/frametype combination. + + Parameters + ---------- + site : str + Single-character site identifier (e.g., 'H', 'L', 'V') + frametype : str + Frame type name (e.g., 'H1_HOFT_C02') + urls : list of str + List of frame file URLs + """ + self.frame_configs[(site, frametype)] = urls + MockGWDataFindHandler.frame_configs = self.frame_configs diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 392334c9..b15c8327 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -48,6 +48,7 @@ def tearDown(self): def setUp(self): reload(asimov) reload(manage) + shutil.rmtree(f"{self.cwd}/tests/tmp/", ignore_errors=True) os.makedirs(f"{self.cwd}/tests/tmp/project") os.chdir(f"{self.cwd}/tests/tmp/project") runner = CliRunner() diff --git a/tests/test_application.py b/tests/test_application.py index e53e1768..0e2921fb 100644 --- a/tests/test_application.py +++ b/tests/test_application.py @@ -118,9 +118,9 @@ def test_event_non_standard_fmin(self): event = self.ledger.get_event("Nonstandard fmin")[0] - self.assertEqual(event.meta["quality"]["minimum frequency"]["H1"], 62) - self.assertEqual(event.meta["quality"]["minimum frequency"]["L1"], 92) - self.assertEqual(event.meta["quality"]["minimum frequency"]["V1"], 62) + self.assertEqual(event.meta["waveform"]["minimum frequency"]["H1"], 62) + self.assertEqual(event.meta["waveform"]["minimum frequency"]["L1"], 92) + self.assertEqual(event.meta["waveform"]["minimum frequency"]["V1"], 62) def test_event_non_standard_channels(self): """Check event-specific channel overwrites project default.""" @@ -159,3 +159,137 @@ def test_event_non_standard_frames(self): self.assertEqual(event.meta["data"]["frame types"]["L1"], "NonstandardFrameL1") self.assertEqual(event.meta["data"]["frame types"]["H1"], "NonstandardFrame") self.assertEqual(event.meta["data"]["frame types"]["V1"], "UnusualFrameType") + + def test_minimum_frequency_in_quality_raises_error(self): + """Test that having minimum frequency in quality section raises an error.""" + apply_page( + f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger, + ) + apply_page( + f"{self.cwd}/tests/test_data/event_deprecated_fmin_quality.yaml", + event=None, + ledger=self.ledger, + ) + + # Creating an analysis from this event should raise a ValueError + with self.assertRaises(ValueError) as context: + apply_page( + f"{self.cwd}/tests/test_data/simple_analysis.yaml", + event="Deprecated fmin in quality", + ledger=self.ledger, + ) + + self.assertIn("waveform", str(context.exception).lower()) + self.assertIn("quality", str(context.exception).lower()) + + def test_minimum_frequency_in_likelihood_raises_error(self): + """Test that having minimum frequency in likelihood section raises an error.""" + apply_page( + f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger, + ) + apply_page( + f"{self.cwd}/tests/test_data/event_deprecated_fmin_likelihood.yaml", + event=None, + ledger=self.ledger, + ) + + # Creating an analysis from this event should raise a ValueError + with self.assertRaises(ValueError) as context: + apply_page( + f"{self.cwd}/tests/test_data/simple_analysis.yaml", + event="Deprecated fmin in likelihood", + ledger=self.ledger, + ) + + self.assertIn("waveform", str(context.exception).lower()) + self.assertIn("likelihood", str(context.exception).lower()) + + +class StrategyTests(AsimovTestCase): + """ + Tests to ensure that strategy blueprints are handled correctly. + """ + + def test_single_parameter_strategy(self): + """Test that a single-parameter strategy creates multiple analyses.""" + # First add the event + apply_page( + f"{self.cwd}/tests/test_data/test_strategy_event.yaml", + ledger=self.ledger, + ) + + # Apply the strategy blueprint + apply_page( + f"{self.cwd}/tests/test_data/test_strategy_single.yaml", + event="S000000", + ledger=self.ledger + ) + + event = self.ledger.get_event("S000000")[0] + + # Should have created 3 analyses from the strategy + self.assertEqual(len(event.productions), 3) + + # Check that each analysis has the correct waveform + analysis_names = {prod.name for prod in event.productions} + expected_names = { + "bilby-IMRPhenomXPHM", + "bilby-SEOBNRv4PHM", + "bilby-IMRPhenomD" + } + self.assertEqual(analysis_names, expected_names) + + # Check that each analysis has the correct waveform set + for prod in event.productions: + if prod.name == "bilby-IMRPhenomXPHM": + self.assertEqual(prod.meta["waveform"]["approximant"], "IMRPhenomXPHM") + elif prod.name == "bilby-SEOBNRv4PHM": + self.assertEqual(prod.meta["waveform"]["approximant"], "SEOBNRv4PHM") + elif prod.name == "bilby-IMRPhenomD": + self.assertEqual(prod.meta["waveform"]["approximant"], "IMRPhenomD") + + def test_multi_parameter_strategy_matrix(self): + """Test that a multi-parameter strategy creates all combinations.""" + # First add the event + apply_page( + f"{self.cwd}/tests/test_data/test_strategy_event.yaml", + ledger=self.ledger, + ) + + # Apply the strategy blueprint + apply_page( + f"{self.cwd}/tests/test_data/test_strategy_matrix.yaml", + event="S000000", + ledger=self.ledger + ) + + event = self.ledger.get_event("S000000")[0] + + # Should have created 4 analyses (2 waveforms x 2 samplers) + self.assertEqual(len(event.productions), 4) + + # Check that each analysis has the correct combination + analysis_names = {prod.name for prod in event.productions} + expected_names = { + "bilby-IMRPhenomXPHM-dynesty", + "bilby-IMRPhenomXPHM-emcee", + "bilby-SEOBNRv4PHM-dynesty", + "bilby-SEOBNRv4PHM-emcee" + } + self.assertEqual(analysis_names, expected_names) + + # Verify parameter combinations + for prod in event.productions: + if "IMRPhenomXPHM" in prod.name: + self.assertEqual(prod.meta["waveform"]["approximant"], "IMRPhenomXPHM") + elif "SEOBNRv4PHM" in prod.name: + self.assertEqual(prod.meta["waveform"]["approximant"], "SEOBNRv4PHM") + + if "dynesty" in prod.name: + self.assertEqual(prod.meta["sampler"]["sampler"], "dynesty") + elif "emcee" in prod.name: + self.assertEqual(prod.meta["sampler"]["sampler"], "emcee") diff --git a/tests/test_asimov.py b/tests/test_asimov.py index 9f08cae4..7d555c56 100644 --- a/tests/test_asimov.py +++ b/tests/test_asimov.py @@ -5,13 +5,16 @@ from importlib import reload import asimov -from pkg_resources import DistributionNotFound +try: + from importlib.metadata import PackageNotFoundError +except ImportError: + from importlib_metadata import PackageNotFoundError class TestAsimovBase(unittest.TestCase): - @patch("pkg_resources.get_distribution", + @patch("importlib.metadata.version", **{ - 'side_effect': DistributionNotFound,#("Not found", "asimov"), + 'side_effect': PackageNotFoundError, }) def testImports(self, blah): reload(asimov) diff --git a/tests/test_blueprints.py b/tests/test_blueprints.py new file mode 100644 index 00000000..24c70f7e --- /dev/null +++ b/tests/test_blueprints.py @@ -0,0 +1,7 @@ +import unittest + +from asimov import blueprints + +class TestAnalysisBlueprint(unittest.TestCase): + def test_blueprints_module_importable(self): + self.assertIsNotNone(blueprints) \ No newline at end of file diff --git a/tests/test_blueprints/bayeswave_quick_test.yaml b/tests/test_blueprints/bayeswave_quick_test.yaml new file mode 100644 index 00000000..3819cb7a --- /dev/null +++ b/tests/test_blueprints/bayeswave_quick_test.yaml @@ -0,0 +1,9 @@ +kind: analysis +name: generate-psd +pipeline: bayeswave +comment: Bayeswave on-source PSD estimation process +likelihood: + roll off time: 1 + iterations: 25000 + chains: 4 + threads: 4 diff --git a/tests/test_blueprints/bilby_quick_test.yaml b/tests/test_blueprints/bilby_quick_test.yaml new file mode 100644 index 00000000..1a42e249 --- /dev/null +++ b/tests/test_blueprints/bilby_quick_test.yaml @@ -0,0 +1,13 @@ +# This file contains a standard bilby_pipe analysis +kind: analysis +name: bilby-IMRPhenomXPHM +pipeline: bilby +waveform: + approximant: IMRPhenomXPHM + arguments: + PhenomXHMReleaseVersion: 122022 + +comment: PE job using IMRPhenomXPHM and bilby +needs: + #- get-data + - generate-psd diff --git a/tests/test_blueprints/gwosc_event.yaml b/tests/test_blueprints/gwosc_event.yaml new file mode 100644 index 00000000..6b3cf64b --- /dev/null +++ b/tests/test_blueprints/gwosc_event.yaml @@ -0,0 +1,26 @@ +data: + segment length: 4 +event time: 1126259462.391 +gid: G190047 +interferometers: +- H1 +- L1 +kind: event +likelihood: + psd length: 4 + reference frequency: 20 + sample rate: 2048 + segment start: 1126259460.391 + start frequency: 13.333333333333334 + window length: 4 +name: GW150914_095045 +priors: + amplitude order: 1 + chirp mass: + maximum: 41.97447913941358 + minimum: 21.418182160215295 +quality: + minimum frequency: + H1: 20 + L1: 20 + diff --git a/tests/test_blueprints/gwosc_get_data.yaml b/tests/test_blueprints/gwosc_get_data.yaml new file mode 100644 index 00000000..60f3c20f --- /dev/null +++ b/tests/test_blueprints/gwosc_get_data.yaml @@ -0,0 +1,5 @@ +kind: analysis +name: get-data +pipeline: gwdata +download: + - frames diff --git a/tests/test_blueprints/gwosc_quick_test.yaml b/tests/test_blueprints/gwosc_quick_test.yaml new file mode 100644 index 00000000..a7ebbdca --- /dev/null +++ b/tests/test_blueprints/gwosc_quick_test.yaml @@ -0,0 +1,117 @@ +# These settings are designed to set up very quick analyses which will finish quickly. +# They are absolutely not intended for real analyses, but are useful for testing the pipeline. +kind: configuration +data: + channels: + L1: L1:GWOSC-16KHZ_R1_STRAIN + H1: H1:GWOSC-16KHZ_R1_STRAIN + V1: V1:GWOSC-16KHZ_R1_STRAIN + frame types: + H1: H1_LOSC_16_V1 + L1: L1_LOSC_16_V1 +pipelines: + bilby: + quality: + state vector: + L1: L1:DCS-CALIB_STATE_VECTOR_C01 + H1: H1:DCS-CALIB_STATE_VECTOR_C01 + V1: V1:DQ_ANALYSIS_STATE_VECTOR + sampler: + sampler: dynesty + parallel jobs: 2 + sampler kwargs: "{nlive: 50, dlogz: 1, naccept: 5, check_point_delta_t: 1800, 'print_method': 'interval-10', 'sample': 'acceptance-walk'}" + scheduler: + accounting group: ligo.dev.o4.cbc.pe.bilby + request cpus: 4 + request memory: 2.0 + cosmology: Planck15_lal + bayeswave: + quality: + state vector: + L1: L1:DCS-CALIB_STATE_VECTOR_C01 + H1: H1:DCS-CALIB_STATE_VECTOR_C01 + V1: V1:DQ_ANALYSIS_STATE_VECTOR + likelihood: + iterations: 25000 + chains: 2 + threads: 1 + scheduler: + accounting group: ligo.dev.o4.cbc.pe.bilby + request memory: 1024 + request post memory: 2048 + rift: + scheduler: + accounting group: ligo.dev.o4.cbc.pe.bilby + request memory: 1024 +postprocessing: + pesummary: + accounting group: ligo.dev.o4.cbc.pe.bilby + cosmology: Planck15 + evolve spins: forwards + multiprocess: 4 + redshift: exact + regenerate posteriors: + - redshift + - radiated_energy + skymap samples: 2000 +--- +kind: configuration +priors: + chirp mass: + maximum: 100 + minimum: 1 + type: bilby.gw.prior.UniformInComponentsChirpMass + dec: + type: Cosine + luminosity distance: + maximum: 20000 + minimum: 10 + type: bilby.gw.prior.UniformSourceFrame + cosmology: Planck15_LAL + mass 1: + maximum: 1000 + minimum: 1 + type: Constraint + mass 2: + maximum: 1000 + minimum: 1 + type: Constraint + mass ratio: + maximum: 1.0 + minimum: 0.05 + type: bilby.gw.prior.UniformInComponentsMassRatio + phase: + boundary: periodic + minimum: 0 + maximum: 2 * np.pi + type: Uniform + phi 12: + minimum: 0 + maximum: 2 * np.pi + type: Uniform + phi jl: + minimum: 0 + maximum: 2 * np.pi + type: Uniform + psi: + minimum: 0 + maximum: np.pi + type: Uniform + ra: + minimum: 0 + maximum: 2 * np.pi + type: Uniform + spin 1: + maximum: 0.99 + minimum: 0 + type: Uniform + spin 2: + maximum: 0.99 + minimum: 0 + type: Uniform + theta jn: + type: Sine + tilt 1: + type: Sine + tilt 2: + type: Sine diff --git a/tests/test_blueprints/lalinference_quick_test.yaml b/tests/test_blueprints/lalinference_quick_test.yaml new file mode 100644 index 00000000..2fccaa82 --- /dev/null +++ b/tests/test_blueprints/lalinference_quick_test.yaml @@ -0,0 +1,10 @@ +# This file contains a standard lalinference analysis +kind: analysis +name: lalinference-IMRPhenomXPHM +pipeline: lalinference +waveform: + approximant: IMRPhenomXPHM + +comment: PE job using IMRPhenomXPHM and lalinference +needs: + - generate-psd diff --git a/tests/test_blueprints/project_test_pipeline.yaml b/tests/test_blueprints/project_test_pipeline.yaml new file mode 100644 index 00000000..fce22176 --- /dev/null +++ b/tests/test_blueprints/project_test_pipeline.yaml @@ -0,0 +1,11 @@ +# Test blueprint for the ProjectTestPipeline +# This creates a project analysis that operates across multiple events +kind: ProjectAnalysis +name: test-project-pipeline +pipeline: projecttestpipeline +comment: Testing pipeline for ProjectAnalysis infrastructure +status: ready +subjects: + - GW150914_095045 +analyses: + - name: test-simple-pipeline diff --git a/tests/test_blueprints/simple_test_pipeline.yaml b/tests/test_blueprints/simple_test_pipeline.yaml new file mode 100644 index 00000000..00d22599 --- /dev/null +++ b/tests/test_blueprints/simple_test_pipeline.yaml @@ -0,0 +1,7 @@ +# Test blueprint for the SimpleTestPipeline +# This creates a simple analysis that will complete quickly for testing +kind: analysis +name: test-simple-pipeline +pipeline: simpletestpipeline +comment: Testing pipeline for SimpleAnalysis infrastructure +status: ready diff --git a/tests/test_blueprints/subject_test_pipeline.yaml b/tests/test_blueprints/subject_test_pipeline.yaml new file mode 100644 index 00000000..d74b5490 --- /dev/null +++ b/tests/test_blueprints/subject_test_pipeline.yaml @@ -0,0 +1,9 @@ +# Test blueprint for the SubjectTestPipeline +# This creates a subject analysis that depends on other analyses +kind: analysis +name: test-subject-pipeline +pipeline: subjecttestpipeline +comment: Testing pipeline for SubjectAnalysis infrastructure +status: ready +needs: + - test-simple-pipeline diff --git a/tests/test_cbcflow_integration.py b/tests/test_cbcflow_integration.py new file mode 100644 index 00000000..69970840 --- /dev/null +++ b/tests/test_cbcflow_integration.py @@ -0,0 +1,554 @@ +""" +Integration tests for CBCFlow <-> Asimov interaction. + +These tests cover the two integration paths: + +1. Applicator (cbcflow -> asimov): ``asimov apply -p cbcflow --event `` + Reads metadata from a cbcflow library and creates/updates an Asimov event. + +2. Collector (asimov -> cbcflow): post-monitor hook run by ``asimov monitor`` + Reads analysis status from the asimov ledger and writes it back to the + cbcflow library. + +The tests create a lightweight, local-git-backed cbcflow library so that no +network access or real remote is required. Git *remote* operations +(pull/push) are patched out; local add/commit operations are allowed to run +for realism. + +These tests require cbcflow to be installed (``pip install cbcflow``). +If cbcflow is not available the whole module is skipped gracefully. They +are run in a separate CI workflow with ``continue-on-error: true``. +""" + +import copy +import json +import os +import shutil +import subprocess +import unittest +from unittest.mock import patch + +try: + import cbcflow + import cbcflow.core.database + import cbcflow.core.schema + import cbcflow.core.parser + from cbcflow.inputs.asimov import Collector + from cbcflow.outputs.asimov import Applicator + + cbcflow_available = True +except ImportError: + cbcflow_available = False + +import git + +from asimov.testing import AsimovTestCase +from asimov.cli.application import apply_page, apply_via_plugin +from asimov.ledger import YAMLLedger + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +TEST_SNAME = "S000000xx" + +_EVENT_BLUEPRINT = f"""\ +kind: event +name: {TEST_SNAME} +ligo: + sname: {TEST_SNAME} +interferometers: + - H1 + - L1 +""" + +_BILBY_ANALYSIS_BLUEPRINT = """\ +kind: analysis +name: Prod0 +pipeline: bilby +status: running +waveform: + approximant: IMRPhenomXPHM +""" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_LIBRARY_CFG = """\ +[Library Info] +library-name = test-library + +[Events] +far-threshold = 1.0 +""" + +_MINIMAL_METADATA = { + "Sname": TEST_SNAME, + "Info": {"Notes": [], "Labels": []}, + "Publications": {"Papers": []}, + "GraceDB": { + "Events": [ + { + "State": "preferred", + "UID": "G000001", + "Pipeline": "gstlal", + "GPSTime": 1000000000.0, + "FAR": 1.0e-10, + "NetworkSNR": 12.0, + "Mass1": 30.0, + "Mass2": 25.0, + } + ], + "Instruments": "H1,L1", + "LastUpdate": "2023-01-01 00:00:00.000000", + }, + "ParameterEstimation": { + "Analysts": [], + "Reviewers": [], + "Status": "ongoing", + "Results": [], + "Notes": [], + }, + "ExtremeMatter": {"Analyses": []}, + "Cosmology": {"Counterparts": []}, + "Lensing": {"Analyses": []}, + "RatesAndPopulations": {"RnPRunsUsingThisSuperevent": []}, + "TestingGR": { + "IMRCTAnalyses": [], + "SSBAnalyses": [], + "PSEOBRDAnalyses": [], + "SIMAnalyses": [], + "MDRAnalyses": [], + "FTIAnalyses": [], + "Notes": [], + }, + "DetectorCharacterization": { + "Analysts": [], + "Reviewers": [], + "ParticipatingDetectors": ["H1", "L1"], + "Status": "complete", + "RecommendedDetectors": [ + { + "UID": "H1", + "RecommendedMinimumFrequency": 20, + "FrameType": "H1_HOFT_C00", + "RecommendedChannel": "H1:GDS-CALIB_STRAIN_CLEAN", + "GlitchMitigationStatus": "not required", + "Notes": [], + }, + { + "UID": "L1", + "RecommendedMinimumFrequency": 20, + "FrameType": "L1_HOFT_C00", + "RecommendedChannel": "L1:GDS-CALIB_STRAIN_CLEAN", + "GlitchMitigationStatus": "not required", + "Notes": [], + }, + ], + "DQRResults": [], + "Notes": [], + "RecommendedDuration": 8.0, + }, +} + + +def _init_library_repo(library_path): + """ + Initialise a git repository at *library_path* with committer identity + set. Returns the Repo object. + + Note: branch renaming to 'main' must be done *after* the first commit + because ``git branch -M`` requires at least one commit to exist. + Call ``_ensure_main_branch(repo)`` once the initial commit is made. + """ + # git init -b main (git >= 2.28). Fall back to plain init for older git. + result = subprocess.run( + ["git", "init", "-b", "main", library_path], + capture_output=True, + ) + if result.returncode != 0: + subprocess.run(["git", "init", library_path], check=True, capture_output=True) + + repo = git.Repo(library_path) + with repo.config_writer() as cfg: + cfg.set_value("user", "name", "Asimov Test") + cfg.set_value("user", "email", "test@asimov.test") + return repo + + +def _ensure_main_branch(repo): + """ + Rename the current branch to 'main' if it isn't already. + + Must be called *after* at least one commit exists in the repository, + because ``git branch -M`` requires an existing commit. + """ + try: + repo.git.branch("-M", "main") + except git.GitCommandError: + pass # already on 'main', or rename not supported + + +# --------------------------------------------------------------------------- +# Test: Applicator (cbcflow -> asimov) +# --------------------------------------------------------------------------- + + +@unittest.skipUnless(cbcflow_available, "cbcflow not installed") +class TestCBCFlowApplicator(AsimovTestCase): + """ + Tests for the cbcflow Applicator hook. + + The Applicator reads metadata from a cbcflow library and creates an + event in the Asimov ledger. This is the path exercised by:: + + asimov apply -p cbcflow --event + """ + + def setUp(self): + super().setUp() + self.library_path = os.path.join(self.cwd, "tests", "tmp", "cbcflow_library") + self._setup_library_with_event() + self._configure_hooks() + + def _setup_library_with_event(self): + """Create a cbcflow library containing metadata for TEST_SNAME.""" + os.makedirs(self.library_path, exist_ok=True) + repo = _init_library_repo(self.library_path) + + with open(os.path.join(self.library_path, "library.cfg"), "w") as f: + f.write(_LIBRARY_CFG) + + metadata = copy.deepcopy(_MINIMAL_METADATA) + metadata_file = f"{TEST_SNAME}-cbc-metadata.json" + with open(os.path.join(self.library_path, metadata_file), "w") as f: + json.dump(metadata, f, indent=2) + + repo.index.add(["library.cfg", metadata_file]) + repo.index.commit("Initial test library") + _ensure_main_branch(repo) + + def _configure_hooks(self): + self.ledger.data["hooks"] = { + "applicator": { + "cbcflow": {"library location": self.library_path} + } + } + self.ledger.save() + + # --- individual tests --------------------------------------------------- + + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_applicator_adds_event_to_ledger(self, _mock_pull): + """Applicator should create an event in the asimov ledger.""" + applicator = Applicator(self.ledger) + applicator.run(sid=TEST_SNAME) + + events = self.ledger.get_event(TEST_SNAME) + self.assertEqual(len(events), 1) + self.assertEqual(events[0].name, TEST_SNAME) + + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_applicator_sets_minimum_frequency(self, _mock_pull): + """Applicator should populate quality.minimum frequency from DetectorCharacterization.""" + applicator = Applicator(self.ledger) + applicator.run(sid=TEST_SNAME) + + event = self.ledger.get_event(TEST_SNAME)[0] + min_freq = event.meta.get("quality", {}).get("minimum frequency", {}) + self.assertEqual(min_freq.get("H1"), 20) + self.assertEqual(min_freq.get("L1"), 20) + + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_applicator_sets_data_channels(self, _mock_pull): + """Applicator should populate data.channels from DetectorCharacterization.""" + applicator = Applicator(self.ledger) + applicator.run(sid=TEST_SNAME) + + event = self.ledger.get_event(TEST_SNAME)[0] + channels = event.meta.get("data", {}).get("channels", {}) + self.assertEqual(channels.get("H1"), "H1:GDS-CALIB_STRAIN_CLEAN") + self.assertEqual(channels.get("L1"), "L1:GDS-CALIB_STRAIN_CLEAN") + + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_applicator_sets_frame_types(self, _mock_pull): + """Applicator should populate data.frame types from DetectorCharacterization.""" + applicator = Applicator(self.ledger) + applicator.run(sid=TEST_SNAME) + + event = self.ledger.get_event(TEST_SNAME)[0] + frame_types = event.meta.get("data", {}).get("frame types", {}) + self.assertEqual(frame_types.get("H1"), "H1_HOFT_C00") + self.assertEqual(frame_types.get("L1"), "L1_HOFT_C00") + + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_applicator_sets_ligo_sname(self, _mock_pull): + """Applicator should store the sname and FAR under event.ligo.""" + applicator = Applicator(self.ledger) + applicator.run(sid=TEST_SNAME) + + event = self.ledger.get_event(TEST_SNAME)[0] + self.assertEqual(event.meta["ligo"]["sname"], TEST_SNAME) + self.assertAlmostEqual(event.meta["ligo"]["false alarm rate"], 1.0e-10) + + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_apply_via_plugin_adds_event(self, _mock_pull): + """Full CLI path: apply_via_plugin should add the event via the cbcflow hook.""" + import sys + if sys.version_info >= (3, 10): + from importlib.metadata import entry_points as ep + else: + from importlib_metadata import entry_points as ep + if not any(h.name == "cbcflow" for h in ep(group="asimov.hooks.applicator")): + self.skipTest( + "cbcflow applicator entry point not registered " + "(install cbcflow from PyPI to enable this test)" + ) + + apply_via_plugin(TEST_SNAME, hookname="cbcflow") + + # Reload ledger from disk to pick up changes made by the plugin + ledger = YAMLLedger(".asimov/ledger.yml") + events = ledger.get_event(TEST_SNAME) + self.assertEqual(len(events), 1) + + +# --------------------------------------------------------------------------- +# Test: Collector (asimov -> cbcflow) +# --------------------------------------------------------------------------- + + +@unittest.skipUnless(cbcflow_available, "cbcflow not installed") +class TestCBCFlowCollector(AsimovTestCase): + """ + Tests for the cbcflow Collector hook. + + The Collector runs as a post-monitor hook and writes analysis status + from the asimov ledger back into the cbcflow library. This is the + path exercised by the ``asimov monitor`` command when the cbcflow + postmonitor hook is configured. + """ + + def setUp(self): + super().setUp() + self.library_path = os.path.join(self.cwd, "tests", "tmp", "cbcflow_library") + self._setup_empty_library() + self._setup_asimov_event() + self._configure_hooks() + # YAMLLedger caches events in _all_events at __init__ time. Reload + # now that all events and analyses have been written to disk so that + # ledger.get_event() (no-arg form used by the Collector) is populated. + self.ledger = YAMLLedger(".asimov/ledger.yml") + + def _setup_empty_library(self): + """Create an empty cbcflow library (no event files yet). + + A local bare repository is created alongside the working copy and added + as the "origin" remote. This is required because cbcflow's + ``git_checkout_new_branch`` calls ``git push -u origin `` to + set up tracking the first time it sees an untracked branch. Without a + real remote the push fails with "fatal: 'origin' does not appear to be + a git repository". The bare repo acts as that remote; the actual remote + push at the end of ``Collector.run()`` (``git_push_to_remote``) is + still mocked out so no real network I/O occurs. + """ + if os.path.exists(self.library_path): + shutil.rmtree(self.library_path) + os.makedirs(self.library_path) + repo = _init_library_repo(self.library_path) + + with open(os.path.join(self.library_path, "library.cfg"), "w") as f: + f.write(_LIBRARY_CFG) + repo.index.add(["library.cfg"]) + repo.index.commit("Initial empty library") + _ensure_main_branch(repo) + + # Set up a local bare repo as "origin" so that git_checkout_new_branch + # can successfully call "git push -u origin main" to register tracking. + bare_path = self.library_path + "_origin" + if os.path.exists(bare_path): + shutil.rmtree(bare_path) + git.Repo.init(bare_path, bare=True) + repo.create_remote("origin", bare_path) + repo.git.push("-u", "origin", "main") + + def _setup_asimov_event(self): + """Add the test event and a bilby analysis to the asimov ledger.""" + with open("test_event.yaml", "w") as f: + f.write(_EVENT_BLUEPRINT) + apply_page("test_event.yaml", ledger=self.ledger) + + with open("test_analysis.yaml", "w") as f: + f.write(_BILBY_ANALYSIS_BLUEPRINT) + apply_page("test_analysis.yaml", event=TEST_SNAME, ledger=self.ledger) + + def _configure_hooks(self): + self.ledger.data["hooks"] = { + "postmonitor": { + "cbcflow": { + "library location": self.library_path, + "schema section": "ParameterEstimation", + } + } + } + self.ledger.save() + + def _set_analysis_status(self, status): + """ + Update the status of 'Prod0' in the asimov ledger and save. + + Works by mutating the raw ledger dict so no assumptions need to be + made about the public API for status updates. Reloads self.ledger + afterward so that _all_events reflects the new status. + """ + for prod_entry in self.ledger.events[TEST_SNAME].get("productions", []): + if isinstance(prod_entry, dict) and "Prod0" in prod_entry: + prod_entry["Prod0"]["status"] = status + break + self.ledger.save() + self.ledger = YAMLLedger(".asimov/ledger.yml") + + def _read_pe_results(self): + """Return the ParameterEstimation.Results list from the library file.""" + metadata_file = os.path.join( + self.library_path, f"{TEST_SNAME}-cbc-metadata.json" + ) + with open(metadata_file) as f: + data = json.load(f) + return data.get("ParameterEstimation", {}).get("Results", []) + + # --- individual tests --------------------------------------------------- + + @patch("asimov.git.EventRepo.find_prods", return_value=[]) + @patch("cbcflow.core.database.LocalLibraryDatabase.git_push_to_remote") + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_collector_creates_metadata_file(self, _mock_pull, _mock_push, _mock_find): + """Collector should create a cbcflow metadata file for the event.""" + collector = Collector(self.ledger) + collector.run() + + metadata_file = os.path.join( + self.library_path, f"{TEST_SNAME}-cbc-metadata.json" + ) + self.assertTrue( + os.path.exists(metadata_file), + "Collector should write a cbcflow metadata file", + ) + + @patch("asimov.git.EventRepo.find_prods", return_value=[]) + @patch("cbcflow.core.database.LocalLibraryDatabase.git_push_to_remote") + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_collector_writes_analysis_uid_and_pipeline(self, _mock_pull, _mock_push, _mock_find): + """Collector should write the analysis UID and InferenceSoftware.""" + collector = Collector(self.ledger) + collector.run() + + results = self._read_pe_results() + self.assertEqual(len(results), 1, "Expected exactly one result entry") + self.assertEqual(results[0]["UID"], "Prod0") + self.assertEqual(results[0]["InferenceSoftware"], "bilby") + + @patch("asimov.git.EventRepo.find_prods", return_value=[]) + @patch("cbcflow.core.database.LocalLibraryDatabase.git_push_to_remote") + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_collector_maps_running_status(self, _mock_pull, _mock_push, _mock_find): + """'running' in asimov should map to 'running' in cbcflow.""" + collector = Collector(self.ledger) + collector.run() + + results = self._read_pe_results() + self.assertEqual(results[0]["RunStatus"], "running") + + @patch("asimov.pipelines.bilby.Bilby.collect_assets") + @patch("asimov.git.EventRepo.find_prods", return_value=[]) + @patch("cbcflow.core.database.LocalLibraryDatabase.git_push_to_remote") + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_collector_maps_uploaded_status(self, _mock_pull, _mock_push, _mock_find, _mock_assets): + """'uploaded' in asimov should map to 'complete' in cbcflow.""" + # A real (though empty) file is needed so cbcflow can compute its MD5 + # when validating the ResultFile entry against the schema. + fake_sample = os.path.join(self.cwd, "tests", "tmp", "fake_result.hdf5") + open(fake_sample, "w").close() + _mock_assets.return_value = {"samples": [fake_sample], "config": fake_sample} + + self._set_analysis_status("uploaded") + + collector = Collector(self.ledger) + collector.run() + + results = self._read_pe_results() + self.assertEqual(results[0]["RunStatus"], "complete") + + @patch("asimov.git.EventRepo.find_prods", return_value=[]) + @patch("cbcflow.core.database.LocalLibraryDatabase.git_push_to_remote") + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_collector_writes_waveform_approximant(self, _mock_pull, _mock_push, _mock_find): + """Collector should write the WaveformApproximant field.""" + collector = Collector(self.ledger) + collector.run() + + results = self._read_pe_results() + self.assertEqual(results[0].get("WaveformApproximant"), "IMRPhenomXPHM") + + @patch("asimov.pipelines.bilby.Bilby.collect_assets") + @patch("asimov.git.EventRepo.find_prods", return_value=[]) + @patch("cbcflow.core.database.LocalLibraryDatabase.git_push_to_remote") + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_collector_updates_status_on_second_run(self, _mock_pull, _mock_push, _mock_find, _mock_assets): + """ + Running the Collector twice should update the status, not duplicate entries. + + This is the key regression test for the bug where ledger updates were + not being reflected back to cbcflow. + """ + # First run: analysis is running — collect_assets is not called + _mock_assets.return_value = {"samples": [], "config": None} + collector = Collector(self.ledger) + collector.run() + results_after_first = self._read_pe_results() + self.assertEqual(results_after_first[0]["RunStatus"], "running") + + # Simulate the analysis completing: update status to 'uploaded' + self._set_analysis_status("uploaded") + + # A real (though empty) file is needed so cbcflow can compute its MD5 + # when validating the ResultFile entry against the schema. + fake_sample = os.path.join(self.cwd, "tests", "tmp", "fake_result.hdf5") + open(fake_sample, "w").close() + _mock_assets.return_value = {"samples": [fake_sample], "config": fake_sample} + + # Second run: should update the existing entry to 'complete' + ledger2 = YAMLLedger(".asimov/ledger.yml") + collector2 = Collector(ledger2) + collector2.run() + + results_after_second = self._read_pe_results() + self.assertEqual( + len(results_after_second), + 1, + "Collector should update the existing entry, not create a duplicate", + ) + self.assertEqual( + results_after_second[0]["RunStatus"], + "complete", + "Status should be updated to 'complete' after the analysis is uploaded", + ) + + @patch("asimov.git.EventRepo.find_prods", return_value=[]) + @patch("cbcflow.core.database.LocalLibraryDatabase.git_push_to_remote") + @patch("cbcflow.core.database.LocalLibraryDatabase.git_pull_from_remote") + def test_collector_commits_changes_to_git(self, _mock_pull, _mock_push, _mock_find): + """Collector should commit the updated metadata to the local git repo.""" + repo = git.Repo(self.library_path) + commits_before = len(list(repo.iter_commits("main"))) + + collector = Collector(self.ledger) + collector.run() + + commits_after = len(list(repo.iter_commits("main"))) + self.assertGreater( + commits_after, + commits_before, + "Collector should commit the metadata update to git", + ) diff --git a/tests/test_cli_manage.py b/tests/test_cli_manage.py index 0b7b4f72..ad62f85f 100644 --- a/tests/test_cli_manage.py +++ b/tests/test_cli_manage.py @@ -18,6 +18,7 @@ from asimov.cli import manage, project from asimov.ledger import YAMLLedger from asimov.pipeline import PipelineException +from tests.blueprints import DEFAULTS_PE, DEFAULTS_PE_PRIORS, EVENTS as BLUEPRINT_EVENTS, PIPELINES pipelines = {"bayeswave"} EVENTS = ["GW150914_095045", "GW190924_021846", "GW190929_012149", "GW191109_010717"] @@ -48,12 +49,12 @@ def setUp(self): f = io.StringIO() with contextlib.redirect_stdout(f): - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe-priors.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE_PRIORS, event=None, ledger=self.ledger) for event in EVENTS: for pipeline in pipelines: - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{event}.yaml", event=None, ledger=self.ledger) - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{pipeline}.yaml", event=event, ledger=self.ledger) + apply_page(file=BLUEPRINT_EVENTS[event], event=None, ledger=self.ledger) + apply_page(file=PIPELINES[pipeline], event=event, ledger=self.ledger) def test_build_all_events(self): """Check that multiple events can be built at once""" @@ -67,7 +68,7 @@ def test_build_all_events(self): self.assertTrue(f"Working on {event}" in result.output) self.assertTrue(f"Production config Prod0 created" in result.output) self.assertFalse(f"Production config Prod1 created" in result.output) - self.assertTrue(os.path.exists(os.path.join(self.cwd, "tests", "tmp", "project", "checkouts", event, "C01_offline", "Prod0.ini"))) + self.assertTrue(os.path.exists(os.path.join(self.cwd, "tests", "tmp", "project", "checkouts", event, "analyses", "Prod0.ini"))) def test_build_dryruns(self): @@ -129,12 +130,12 @@ def setUp(self): #f = io.StringIO() #with contextlib.redirect_stdout(f): - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe-priors.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE_PRIORS, event=None, ledger=self.ledger) for event in EVENTS: for pipeline in pipelines: - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{event}.yaml", event=None, ledger=self.ledger) - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{pipeline}.yaml", event=event, ledger=self.ledger) + apply_page(file=BLUEPRINT_EVENTS[event], event=None, ledger=self.ledger) + apply_page(file=PIPELINES[pipeline], event=event, ledger=self.ledger) def test_buildsubmit_all_events(self): """Check that multiple events can be built at once""" @@ -148,7 +149,7 @@ def test_buildsubmit_all_events(self): self.assertTrue(f"Working on {event}" in result.output) self.assertTrue(f"Production config Prod0 created" in result.output) self.assertFalse(f"Production config Prod1 created" in result.output) - self.assertTrue(os.path.exists(os.path.join(self.cwd, "tests", "tmp", "project", "checkouts", event, "C01_offline", "Prod0.ini"))) + self.assertTrue(os.path.exists(os.path.join(self.cwd, "tests", "tmp", "project", "checkouts", event, "analyses", "Prod0.ini"))) def test_build_submit_dryruns(self): diff --git a/tests/test_custom_states.py b/tests/test_custom_states.py new file mode 100644 index 00000000..07e52e5a --- /dev/null +++ b/tests/test_custom_states.py @@ -0,0 +1,175 @@ +""" +Unit tests for custom monitor states. +""" + +import unittest +from unittest.mock import Mock, patch + +from asimov.custom_states import ( + ReviewState, + ReviewedState, + UploadingState, + UploadedState, + RestartState, + WaitState, + CancelledState, + ManualState, + register_custom_states, +) +from asimov.monitor_context import MonitorContext + + +class TestCustomStates(unittest.TestCase): + """Test custom state handlers.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.pipeline = Mock() + + self.context = Mock(spec=MonitorContext) + self.context.analysis = self.analysis + self.context.update_ledger = Mock() + self.context.has_condor_job = Mock(return_value=False) + + @patch('asimov.custom_states.click.echo') + def test_review_state(self, mock_echo): + """Test ReviewState handler.""" + state = ReviewState() + self.assertEqual(state.state_name, "review") + + # Test with no review + result = state.handle(self.context) + self.assertTrue(result) + + # Test with approved review + self.analysis.review = Mock() + self.analysis.review.status = "approved" + result = state.handle(self.context) + self.assertTrue(result) + self.assertEqual(self.analysis.status, "reviewed") + + @patch('asimov.custom_states.click.echo') + def test_reviewed_state(self, mock_echo): + """Test ReviewedState handler.""" + state = ReviewedState() + self.assertEqual(state.state_name, "reviewed") + + result = state.handle(self.context) + self.assertTrue(result) + + @patch('asimov.custom_states.click.echo') + def test_uploading_state(self, mock_echo): + """Test UploadingState handler.""" + state = UploadingState() + self.assertEqual(state.state_name, "uploading") + + # Test without pipeline + self.analysis.pipeline = None + result = state.handle(self.context) + self.assertFalse(result) + + # Test with pipeline + self.analysis.pipeline = Mock() + self.analysis.pipeline.detect_upload_completion = Mock(return_value=True) + result = state.handle(self.context) + self.assertTrue(result) + self.assertEqual(self.analysis.status, "uploaded") + + @patch('asimov.custom_states.click.echo') + def test_uploaded_state(self, mock_echo): + """Test UploadedState handler.""" + state = UploadedState() + self.assertEqual(state.state_name, "uploaded") + + result = state.handle(self.context) + self.assertTrue(result) + + @patch('asimov.custom_states.click.echo') + def test_restart_state(self, mock_echo): + """Test RestartState handler.""" + state = RestartState() + self.assertEqual(state.state_name, "restart") + + # Test without pipeline + self.analysis.pipeline = None + result = state.handle(self.context) + self.assertFalse(result) + + # Test with pipeline + self.analysis.pipeline = Mock() + result = state.handle(self.context) + self.assertTrue(result) + self.assertEqual(self.analysis.status, "ready") + + @patch('asimov.custom_states.click.echo') + def test_wait_state(self, mock_echo): + """Test WaitState handler.""" + state = WaitState() + self.assertEqual(state.state_name, "wait") + + # Mock _needs attribute as an empty list + self.analysis._needs = [] + + result = state.handle(self.context) + self.assertTrue(result) + + @patch('asimov.custom_states.click.echo') + def test_wait_state_with_dependencies(self, mock_echo): + """Test WaitState handler with dependencies.""" + state = WaitState() + + # Mock _needs attribute with some dependencies + self.analysis._needs = ["dep1", "dep2"] + + result = state.handle(self.context) + self.assertTrue(result) + + @patch('asimov.custom_states.click.echo') + def test_cancelled_state(self, mock_echo): + """Test CancelledState handler.""" + state = CancelledState() + self.assertEqual(state.state_name, "cancelled") + + result = state.handle(self.context) + self.assertTrue(result) + + @patch('asimov.custom_states.click.echo') + def test_manual_state(self, mock_echo): + """Test ManualState handler.""" + state = ManualState() + self.assertEqual(state.state_name, "manual") + + result = state.handle(self.context) + self.assertTrue(result) + + @patch('asimov.custom_states.register_state') + def test_register_custom_states(self, mock_register): + """Test that all custom states are registered.""" + register_custom_states() + + # Should register 8 states + self.assertEqual(mock_register.call_count, 8) + + +class TestCustomStateIntegration(unittest.TestCase): + """Test integration of custom states with state registry.""" + + def test_custom_states_in_registry(self): + """Test that custom states are available in registry.""" + from asimov.monitor_states import get_state_handler + + # Test a few custom states + review_handler = get_state_handler("review") + self.assertIsInstance(review_handler, ReviewState) + + uploaded_handler = get_state_handler("uploaded") + self.assertIsInstance(uploaded_handler, UploadedState) + + restart_handler = get_state_handler("restart") + self.assertIsInstance(restart_handler, RestartState) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_dag.py b/tests/test_dag.py index 8f4abfa2..7fef67eb 100644 --- a/tests/test_dag.py +++ b/tests/test_dag.py @@ -9,6 +9,7 @@ from asimov.cli.project import make_project from asimov.cli.application import apply_page import git +from tests.blueprints import DEFAULTS_PE TEST_LEDGER = """ @@ -31,7 +32,7 @@ def setUp(self): os.chdir(f"{self.cwd}/tests/tmp/project") make_project(name="Test project", root=f"{self.cwd}/tests/tmp/project") self.ledger = YAMLLedger(f".asimov/ledger.yml") - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) apply_page(file = f"{self.cwd}/tests/test_data/events_blueprint.yaml", ledger=self.ledger) diff --git a/tests/test_data/blueprints/GW150914_095045.yaml b/tests/test_data/blueprints/GW150914_095045.yaml new file mode 100644 index 00000000..d48a2d60 --- /dev/null +++ b/tests/test_data/blueprints/GW150914_095045.yaml @@ -0,0 +1,49 @@ +kind: event +data: + calibration: {} + channels: + H1: H1:DCS-CALIB_STRAIN_C02 + L1: L1:DCS-CALIB_STRAIN_C02 + frame types: + H1: H1_HOFT_C02 + L1: L1_HOFT_C02 + segment length: 4 +event time: 1126259462.391 +name: GW150914_095045 +gid: G190047 +interferometers: +- H1 +- L1 +quality: + maximum frequency: + H1: 896 + L1: 896 +likelihood: + psd length: 4 + reference frequency: 20 + sample rate: 2048 + segment start: 1126259460.391 + start frequency: 13.333333333333334 + window-length: 4 +# minimum frequency has been moved to the waveform section (v0.7 requirement) +waveform: + minimum frequency: + H1: 20 + L1: 20 +priors: + amplitude order: 1 + chirp mass: + minimum: 21.418182160215295 + maximum: 41.97447913941358 + mass 1: + minimum: 1 + maximum: 1000 + mass 2: + minimum: 1 + maximum: 1000 + luminosity distance: + minimum: 10 + maximum: 10000 + mass ratio: + minimum: 0.05 + maximum: 1.0 diff --git a/tests/test_data/blueprints/GW190924_021846.yaml b/tests/test_data/blueprints/GW190924_021846.yaml new file mode 100644 index 00000000..766eb3b8 --- /dev/null +++ b/tests/test_data/blueprints/GW190924_021846.yaml @@ -0,0 +1,47 @@ +data: + channels: + H1: H1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01 + L1: L1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01_T1700406_v4 + V1: V1:Hrec_hoft_V1O3ARepro1A_16384Hz + frame types: + H1: H1_HOFT_CLEAN_SUB60HZ_C01 + L1: L1_HOFT_CLEAN_SUB60HZ_C01_T1700406_v4 + V1: V1O3Repro1A + segment length: 32 +event time: 1253326744.841309 +interferometers: +- H1 +- L1 +- V1 +kind: event +likelihood: + psd length: 32 + reference frequency: 20 + sample rate: 8192 + segment start: 1253326714.841309 + window length: 32 +name: GW190924_021846 +priors: + amplitude order: 1 + chirp mass: + maximum: 7.942647336817651 + minimum: 5.076593989629403 + luminosity distance: + maximum: 10000 + minimum: 100 + mass 1: + maximum: 1000 + minimum: 1 + mass ratio: + maximum: 1.0 + minimum: 0.05 +# minimum frequency has been moved to the waveform section (v0.7 requirement) +waveform: + minimum frequency: + H1: 20 + L1: 20 + V1: 20 +supress: + V1: + lower: 49.5 + upper: 50.5 diff --git a/tests/test_data/blueprints/GW190929_012149.yaml b/tests/test_data/blueprints/GW190929_012149.yaml new file mode 100644 index 00000000..cac5764a --- /dev/null +++ b/tests/test_data/blueprints/GW190929_012149.yaml @@ -0,0 +1,53 @@ +data: + channels: + H1: H1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01 + L1: L1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01 + V1: V1:Hrec_hoft_V1O3ARepro1A_16384Hz + frame types: + H1: H1_HOFT_CLEAN_SUB60HZ_C01 + L1: L1_HOFT_CLEAN_SUB60HZ_C01 + V1: V1O3Repro1A + segment length: 4 +event time: 1253755327.498 +interferometers: +- L1 +- H1 +- V1 +kind: event +likelihood: + psd length: 4 + reference frequency: 10 + sample rate: 2048 + segment start: 1253755325.498 + start frequency: 3.9977733341464865 + window length: 4 +name: GW190929_012149 +priors: + amplitude order: 9 + chirp mass: + maximum: 104.85539754157023 + minimum: 23.6385078610448 + luminosity distance: + maximum: 10000 + minimum: 100 + mass 1: + maximum: 1000 + minimum: 1 + mass ratio: + maximum: 1.0 + minimum: 0.05 +quality: + maximum frequency: + H1: 896 + L1: 896 + V1: 896 +# minimum frequency has been moved to the waveform section (v0.7 requirement) +waveform: + minimum frequency: + H1: 20 + L1: 20 + V1: 20 +supress: + V1: + lower: 49.5 + upper: 50.5 diff --git a/tests/test_data/blueprints/GW191109_010717.yaml b/tests/test_data/blueprints/GW191109_010717.yaml new file mode 100644 index 00000000..e6beb86a --- /dev/null +++ b/tests/test_data/blueprints/GW191109_010717.yaml @@ -0,0 +1,51 @@ +data: + channels: + H1: H1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01_T1700406_v4 + L1: L1:DCS-CALIB_STRAIN_CLEAN_SUB60HZ_C01_T1700406_v4 + V1: V1:Hrec_hoft_16384Hz + frame types: + H1: H1_HOFT_CLEAN_SUB60HZ_C01_T1700406_v4 + L1: L1_HOFT_CLEAN_SUB60HZ_C01_T1700406_v4 + V1: V1Online + segment length: 4 +event time: 1257296855.216458 +interferometers: +- H1 +- L1 +kind: event +likelihood: + psd length: 4 + reference frequency: 20 + sample rate: 1024 + segment start: 1257296853.216458 + start frequency: 6.864878621747214 + window length: 4 +name: GW191109_010717 +priors: + amplitude order: 4 + chirp mass: + maximum: 96.67976382167427 + minimum: 26.700411297080883 + luminosity distance: + maximum: 10000 + minimum: 100 + mass 1: + maximum: 1000 + minimum: 1 + mass ratio: + maximum: 1.0 + minimum: 0.05 +quality: + maximum frequency: + H1: 448 + L1: 448 +# minimum frequency has been moved to the waveform section (v0.7 requirement) +waveform: + minimum frequency: + H1: 20 + L1: 20 + V1: 20 +supress: + V1: + lower: 46.0 + upper: 51.0 diff --git a/tests/test_data/blueprints/bayeswave.yaml b/tests/test_data/blueprints/bayeswave.yaml new file mode 100644 index 00000000..b21b4567 --- /dev/null +++ b/tests/test_data/blueprints/bayeswave.yaml @@ -0,0 +1,6 @@ +kind: analysis +name: Prod0 +pipeline: bayeswave +comment: Bayeswave on-source PSD estimation job +scheduler: + accounting group: test diff --git a/tests/test_data/blueprints/bilby.yaml b/tests/test_data/blueprints/bilby.yaml new file mode 100644 index 00000000..1ee27a33 --- /dev/null +++ b/tests/test_data/blueprints/bilby.yaml @@ -0,0 +1,10 @@ +# This file contains the standard set of analyses which were +# applied to the events for the GWTC-3 catalogue paper. +kind: analysis +name: Prod1 +pipeline: bilby +waveform: + approximant: IMRPhenomXPHM +scheduler: + accounting group: test +comment: Bilby parameter estimation job diff --git a/tests/test_data/blueprints/gwtc-2-1/GW150914_095045.yaml b/tests/test_data/blueprints/gwtc-2-1/GW150914_095045.yaml new file mode 100644 index 00000000..6875e853 --- /dev/null +++ b/tests/test_data/blueprints/gwtc-2-1/GW150914_095045.yaml @@ -0,0 +1,41 @@ +data: + channels: + H1: H1:DCS-CALIB_STRAIN_C02 + L1: L1:DCS-CALIB_STRAIN_C02 + frame types: + H1: H1_HOFT_C02 + L1: L1_HOFT_C02 + segment length: 4 +event time: 1126259462.391 +gid: G190047 +interferometers: +- H1 +- L1 +kind: event +likelihood: + psd length: 4 + reference frequency: 20 + sample rate: 2048 + segment start: 1126259460.391 + start frequency: 13.333333333333334 + window length: 4 +name: GW150914_095045 +priors: + amplitude order: 1 + chirp mass: + maximum: 41.97447913941358 + minimum: 21.418182160215295 + luminosity distance: + maximum: 10000 + minimum: 10 + mass 1: + maximum: 1000 + minimum: 1 + mass ratio: + maximum: 1.0 + minimum: 0.05 +# minimum frequency has been moved to the waveform section (v0.7 requirement) +waveform: + minimum frequency: + H1: 20 + L1: 20 diff --git a/tests/test_data/blueprints/production-pe-priors.yaml b/tests/test_data/blueprints/production-pe-priors.yaml new file mode 100644 index 00000000..2679e2b6 --- /dev/null +++ b/tests/test_data/blueprints/production-pe-priors.yaml @@ -0,0 +1,50 @@ +kind: configuration +priors: + chirp mass: + maximum: 100 + minimum: 1 + type: bilby.gw.prior.UniformInComponentsChirpMass + dec: + type: Cosine + luminosity distance: + maximum: 20000 + minimum: 10 + type: bilby.gw.prior.UniformSourceFrame + cosmology: Planck15_LAL + mass 1: + maximum: 1000 + minimum: 1 + type: Constraint + mass 2: + maximum: 1000 + minimum: 1 + type: Constraint + mass ratio: + maximum: 1.0 + minimum: 0.05 + type: bilby.gw.prior.UniformInComponentsMassRatio + phase: + boundary: periodic + type: Uniform + phi 12: + type: Uniform + phi jl: + type: Uniform + psi: + type: Uniform + ra: + type: Uniform + spin 1: + maximum: 0.99 + minimum: 0 + type: Uniform + spin 2: + maximum: 0.99 + minimum: 0 + type: Uniform + theta jn: + type: Sine + tilt 1: + type: Sine + tilt 2: + type: Sine diff --git a/tests/test_data/blueprints/production-pe.yaml b/tests/test_data/blueprints/production-pe.yaml new file mode 100644 index 00000000..9921205e --- /dev/null +++ b/tests/test_data/blueprints/production-pe.yaml @@ -0,0 +1,83 @@ +# +# This is a local copy of the production-pe.yaml defaults, updated to match +# the v0.7 requirement that minimum frequency must be in the 'waveform' section. +# Original source: https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml +# + +kind: configuration +data: + channels: + H1: H1:DCS-CALIB_STRAIN_CLEAN_AR01 + L1: L1:DCS-CALIB_STRAIN_CLEAN_AR01 + V1: V1:Hrec_hoftRepro1AR_16384Hz + frame types: + H1: H1_HOFT_AR01 + L1: L1_HOFT_AR01 + V1: HoftAR1U02 +likelihood: + roll off time: 1.0 +pipelines: + bilby: + sampler: + sampler: dynesty + sampler kwargs: + nlive: 1000 + naccept: 60 + sample: acceptance-walk + check_point_plot: True + maxmcmc: 100000 + parallel jobs: 3 + scheduler: + accounting group: ligo.prod.o4.cbc.pe.bilby + request cpus: 16 + likelihood: + roll off time: 1 + marginalization: + time: False + distance: True + cosmology: Planck15_lal + bayeswave: + scheduler: + accounting group: ligo.prod.o4.cbc.pe.bilby + request memory: 4096 MB + request post memory: 16384 MB + copy frames: True + osg: True + likelihood: + roll off time: 1 + iterations: 250000 + chains: 16 + threads: 4 + rift: + scheduler: + accounting group: ligo.prod.o4.cbc.pe.rift + request memory: 1024 + sampler: + cip: + fitting method: rf + explode jobs auto: True + ile: + n eff: 10 + jobs per worker: 100 + use aligned phase coordinates: True +postprocessing: + pesummary: + accounting group: ligo.prod.o4.cbc.pe.bilby + cosmology: Planck15_lal + evolve spins: forwards + multiprocess: 4 + redshift: exact + skymap samples: 2000 +quality: + state vector: + L1: L1:GDS-CALIB_STATE_VECTOR_AR01 + H1: H1:GDS-CALIB_STATE_VECTOR_AR01 + V1: V1:DQ_ANALYSIS_STATE_VECTOR +# minimum frequency has been moved to the waveform section (v0.7 requirement) +waveform: + minimum frequency: + H1: 20 + L1: 20 + V1: 20 + G1: 20 + K1: 20 diff --git a/tests/test_data/blueprints/rift.yaml b/tests/test_data/blueprints/rift.yaml new file mode 100644 index 00000000..f2fb39a9 --- /dev/null +++ b/tests/test_data/blueprints/rift.yaml @@ -0,0 +1,14 @@ +kind: analysis +name: RIFT0 +status: Ready +pipeline: RIFT +waveform: + approximant: SEOBNRv4PHM +comment: This is a sample RIFT analysis +scheduler: + accounting group: test +quality: + state vector: + L1: L1:DCS-CALIB_STATE_VECTOR_C01 + H1: H1:DCS-CALIB_STATE_VECTOR_C01 + V1: V1:DQ_ANALYSIS_STATE_VECTOR diff --git a/tests/test_data/blueprints/test_analysis_blueprint.yaml b/tests/test_data/blueprints/test_analysis_blueprint.yaml new file mode 100644 index 00000000..d25d9b62 --- /dev/null +++ b/tests/test_data/blueprints/test_analysis_blueprint.yaml @@ -0,0 +1,6 @@ +kind: analysis +name: Test Analysis Blueprint +comment: This is a test analysis blueprint for unit testing. + +likelihood: + sample rate: 4096 \ No newline at end of file diff --git a/tests/test_data/event_deprecated_fmin_likelihood.yaml b/tests/test_data/event_deprecated_fmin_likelihood.yaml new file mode 100644 index 00000000..8b904ec7 --- /dev/null +++ b/tests/test_data/event_deprecated_fmin_likelihood.yaml @@ -0,0 +1,20 @@ +# This event is designed for testing that an error is raised when +# minimum frequency is in the likelihood section instead of waveform. +kind: event +name: Deprecated fmin in likelihood +quality: + state vector: + L1: L1:DCS-CALIB_STATE_VECTOR_C01 + H1: H1:DCS-CALIB_STATE_VECTOR_C01 + V1: V1:DQ_ANALYSIS_STATE_VECTOR +likelihood: + sample rate: 4000 + minimum frequency: + H1: 20 + L1: 20 + V1: 20 +interferometers: + - H1 + - L1 + - V1 +event time: 1126259462.0 diff --git a/tests/test_data/event_deprecated_fmin_quality.yaml b/tests/test_data/event_deprecated_fmin_quality.yaml new file mode 100644 index 00000000..e8a3b407 --- /dev/null +++ b/tests/test_data/event_deprecated_fmin_quality.yaml @@ -0,0 +1,18 @@ +# This event is designed for testing that an error is raised when +# minimum frequency is in the quality section instead of waveform. +kind: event +name: Deprecated fmin in quality +quality: + minimum frequency: + H1: 20 + L1: 20 + V1: 20 + state vector: + L1: L1:DCS-CALIB_STATE_VECTOR_C01 + H1: H1:DCS-CALIB_STATE_VECTOR_C01 + V1: V1:DQ_ANALYSIS_STATE_VECTOR +interferometers: + - H1 + - L1 + - V1 +event time: 1126259462.0 diff --git a/tests/test_data/event_non_standard_fmin.yaml b/tests/test_data/event_non_standard_fmin.yaml index 5de6edbe..efab2efa 100644 --- a/tests/test_data/event_non_standard_fmin.yaml +++ b/tests/test_data/event_non_standard_fmin.yaml @@ -1,11 +1,12 @@ kind: event name: Nonstandard fmin quality: - minimum frequency: - H1: 62 - L1: 62 - V1: 62 state vector: L1: L1:DCS-CALIB_STATE_VECTOR_C01 H1: H1:DCS-CALIB_STATE_VECTOR_C01 V1: V1:DQ_ANALYSIS_STATE_VECTOR +waveform: + minimum frequency: + H1: 62 + L1: 62 + V1: 62 diff --git a/tests/test_data/event_non_standard_settings.yaml b/tests/test_data/event_non_standard_settings.yaml index 06b10709..e3eaa4fa 100644 --- a/tests/test_data/event_non_standard_settings.yaml +++ b/tests/test_data/event_non_standard_settings.yaml @@ -4,14 +4,17 @@ kind: event name: Nonstandard fmin quality: - minimum frequency: - H1: 62 - L1: 92 - V1: 62 state vector: L1: L1:DCS-CALIB_STATE_VECTOR_C01 H1: H1:DCS-CALIB_STATE_VECTOR_C01 V1: V1:DQ_ANALYSIS_STATE_VECTOR + +waveform: + minimum frequency: + H1: 62 + L1: 92 + V1: 62 + interferometers: - H1 - L1 diff --git a/tests/test_data/events_blueprint.yaml b/tests/test_data/events_blueprint.yaml index 7e268819..931ccada 100644 --- a/tests/test_data/events_blueprint.yaml +++ b/tests/test_data/events_blueprint.yaml @@ -33,7 +33,7 @@ priors: mass ratio: maximum: 1.0 minimum: 0.05 -quality: +waveform: minimum frequency: H1: 20 L1: 20 @@ -73,7 +73,7 @@ priors: mass ratio: maximum: 1.0 minimum: 0.05 -quality: +waveform: minimum frequency: H1: 20 L1: 20 diff --git a/tests/test_data/frames/H-H1_GWOSC_16KHZ_R1-1126259447-32.gwf b/tests/test_data/frames/H-H1_GWOSC_16KHZ_R1-1126259447-32.gwf new file mode 100644 index 00000000..66b96f8c Binary files /dev/null and b/tests/test_data/frames/H-H1_GWOSC_16KHZ_R1-1126259447-32.gwf differ diff --git a/tests/test_data/frames/L-L1_GWOSC_16KHZ_R1-1126259447-32.gwf b/tests/test_data/frames/L-L1_GWOSC_16KHZ_R1-1126259447-32.gwf new file mode 100644 index 00000000..b6fd73bf Binary files /dev/null and b/tests/test_data/frames/L-L1_GWOSC_16KHZ_R1-1126259447-32.gwf differ diff --git a/tests/test_data/s000000xx/C01_offline/Prod0_test.ini b/tests/test_data/s000000xx/analyses/Prod0_test.ini similarity index 100% rename from tests/test_data/s000000xx/C01_offline/Prod0_test.ini rename to tests/test_data/s000000xx/analyses/Prod0_test.ini diff --git a/tests/test_data/s000000xx/C01_offline/Prod3_test.ini b/tests/test_data/s000000xx/analyses/Prod3_test.ini similarity index 100% rename from tests/test_data/s000000xx/C01_offline/Prod3_test.ini rename to tests/test_data/s000000xx/analyses/Prod3_test.ini diff --git a/tests/test_data/s000000xx/C01_offline/s000000xx_gpsTime.txt b/tests/test_data/s000000xx/analyses/s000000xx_gpsTime.txt similarity index 100% rename from tests/test_data/s000000xx/C01_offline/s000000xx_gpsTime.txt rename to tests/test_data/s000000xx/analyses/s000000xx_gpsTime.txt diff --git a/tests/test_data/simple_analysis.yaml b/tests/test_data/simple_analysis.yaml new file mode 100644 index 00000000..38ef7472 --- /dev/null +++ b/tests/test_data/simple_analysis.yaml @@ -0,0 +1,5 @@ +kind: analysis +name: test-analysis-deprecated-fmin +pipeline: bilby +waveform: + approximant: IMRPhenomXPHM diff --git a/tests/test_data/test_strategy_event.yaml b/tests/test_data/test_strategy_event.yaml new file mode 100644 index 00000000..3c060767 --- /dev/null +++ b/tests/test_data/test_strategy_event.yaml @@ -0,0 +1,13 @@ +kind: event +name: S000000 +event time: 900 +priors: + mass ratio: + maximum: 1.0 + minimum: 0.125 + luminosity distance: + maximum: 1000 + minimum: 10 + name: luminosity_distance + geocent time: + name: geocent_time diff --git a/tests/test_data/test_strategy_matrix.yaml b/tests/test_data/test_strategy_matrix.yaml new file mode 100644 index 00000000..f4761002 --- /dev/null +++ b/tests/test_data/test_strategy_matrix.yaml @@ -0,0 +1,13 @@ +# Test blueprint with a multi-parameter strategy (matrix) +kind: analysis +name: bilby-{waveform.approximant}-{sampler.sampler} +event: S000000 +pipeline: bilby +comment: PE job testing waveform and sampler combinations +strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + sampler.sampler: + - dynesty + - emcee diff --git a/tests/test_data/test_strategy_single.yaml b/tests/test_data/test_strategy_single.yaml new file mode 100644 index 00000000..c86658c0 --- /dev/null +++ b/tests/test_data/test_strategy_single.yaml @@ -0,0 +1,11 @@ +# Test blueprint with a single-parameter strategy +kind: analysis +name: bilby-{waveform.approximant} +event: S000000 +pipeline: bilby +comment: PE job testing multiple waveforms +strategy: + waveform.approximant: + - IMRPhenomXPHM + - SEOBNRv4PHM + - IMRPhenomD diff --git a/tests/test_data/testing_events.yaml b/tests/test_data/testing_events.yaml index 566ff553..edb88539 100644 --- a/tests/test_data/testing_events.yaml +++ b/tests/test_data/testing_events.yaml @@ -33,7 +33,7 @@ priors: mass ratio: maximum: 1.0 minimum: 0.05 -quality: +waveform: minimum frequency: H1: 20 L1: 20 @@ -73,7 +73,7 @@ priors: mass ratio: maximum: 1.0 minimum: 0.05 -quality: +waveform: minimum frequency: H1: 20 L1: 20 @@ -116,7 +116,7 @@ priors: mass ratio: maximum: 1.0 minimum: 0.05 -quality: +waveform: minimum frequency: H1: 20 L1: 20 diff --git a/tests/test_data/testing_pe.yaml b/tests/test_data/testing_pe.yaml index 1ef3f508..9ba3110a 100644 --- a/tests/test_data/testing_pe.yaml +++ b/tests/test_data/testing_pe.yaml @@ -7,14 +7,19 @@ kind: configuration # Data quality settings quality: - minimum frequency: - H1: 20 - L1: 20 - V1: 20 state vector: L1: L1:DCS-CALIB_STATE_VECTOR_C01 H1: H1:DCS-CALIB_STATE_VECTOR_C01 V1: V1:DQ_ANALYSIS_STATE_VECTOR + +# Waveform settings + +waveform: + minimum frequency: + H1: 20 + L1: 20 + V1: 20 + # Pipeline settings pipelines: diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py new file mode 100644 index 00000000..ddcedcc2 --- /dev/null +++ b/tests/test_dependencies.py @@ -0,0 +1,260 @@ +""" +Tests for the improved dependency management system. +""" +import os +import shutil +import unittest + +from asimov.ledger import YAMLLedger +from asimov.cli.project import make_project +from asimov.cli.application import apply_page + + +class DependencyTests(unittest.TestCase): + """Tests for flexible dependency specification.""" + + @classmethod + def setUpClass(cls): + cls.cwd = os.getcwd() + + @classmethod + def tearDownClass(cls): + """Destroy all the products of this test.""" + os.chdir(cls.cwd) + + def setUp(self): + os.makedirs(f"{self.cwd}/tests/tmp/dep_project") + os.chdir(f"{self.cwd}/tests/tmp/dep_project") + make_project(name="Test project", root=f"{self.cwd}/tests/tmp/dep_project") + self.ledger = YAMLLedger(f".asimov/ledger.yml") + apply_page(file=f"{self.cwd}/tests/test_data/testing_pe.yaml", event=None, ledger=self.ledger) + apply_page(file=f"{self.cwd}/tests/test_data/events_blueprint.yaml", ledger=self.ledger) + + def tearDown(self): + del(self.ledger) + shutil.rmtree(f"{self.cwd}/tests/tmp/dep_project") + + def test_simple_name_dependency(self): + """Test that simple name-based dependencies still work.""" + apply_page(file=f"{self.cwd}/tests/test_data/test_linear_dag.yaml", ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + # Prod1 should depend on Prod0 + prod1 = [p for p in event.productions if p.name == 'Prod1'][0] + self.assertEqual(len(prod1.dependencies), 1) + self.assertIn('Prod0', prod1.dependencies) + + def test_property_based_dependency(self): + """Test dependencies based on properties like pipeline.""" + # Create test blueprint with property-based dependency + blueprint = """ +kind: analysis +name: Prod0 +pipeline: bayeswave +status: uploaded +--- +kind: analysis +name: Prod1 +pipeline: bilby +needs: + - pipeline: bayeswave +""" + with open('test_property_dep.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_property_dep.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + prod1 = [p for p in event.productions if p.name == 'Prod1'][0] + self.assertEqual(len(prod1.dependencies), 1) + self.assertIn('Prod0', prod1.dependencies) + + def test_negation_dependency(self): + """Test negation in dependency specifications.""" + blueprint = """ +kind: analysis +name: Prod0 +pipeline: bayeswave +status: uploaded +--- +kind: analysis +name: Prod1 +pipeline: bilby +status: uploaded +--- +kind: analysis +name: Prod2 +pipeline: lalinference +needs: + - pipeline: "!bayeswave" +""" + with open('test_negation.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_negation.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + prod2 = [p for p in event.productions if p.name == 'Prod2'][0] + # Should match Prod1 (bilby) and Prod2 (lalinference) but not Prod0 (bayeswave) + self.assertIn('Prod1', prod2.dependencies) + self.assertNotIn('Prod0', prod2.dependencies) + + def test_or_logic_multiple_values(self): + """Test OR logic with multiple separate dependency items.""" + blueprint = """ +kind: analysis +name: ProdA +pipeline: bayeswave +status: uploaded +waveform: + approximant: IMRPhenomXPHM +--- +kind: analysis +name: ProdB +pipeline: bilby +status: uploaded +waveform: + approximant: SEOBNRv5PHM +--- +kind: analysis +name: ProdC +pipeline: lalinference +status: uploaded +waveform: + approximant: IMRPhenomD +--- +kind: analysis +name: Combiner +pipeline: bilby +needs: + - waveform.approximant: IMRPhenomXPHM + - waveform.approximant: SEOBNRv5PHM +""" + with open('test_or_logic.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_or_logic.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + combiner = [p for p in event.productions if p.name == 'Combiner'][0] + # Should match both ProdA and ProdB (OR logic) + self.assertEqual(len(combiner.dependencies), 2) + self.assertIn('ProdA', combiner.dependencies) + self.assertIn('ProdB', combiner.dependencies) + self.assertNotIn('ProdC', combiner.dependencies) + + def test_and_logic_nested_list(self): + """Test AND logic using nested lists.""" + blueprint = """ +kind: analysis +name: ProdA +pipeline: bayeswave +status: uploaded +waveform: + approximant: IMRPhenomXPHM +--- +kind: analysis +name: ProdB +pipeline: bilby +status: uploaded +waveform: + approximant: IMRPhenomXPHM +--- +kind: analysis +name: ProdC +pipeline: bayeswave +status: uploaded +waveform: + approximant: SEOBNRv5PHM +--- +kind: analysis +name: Selector +pipeline: lalinference +needs: + - - pipeline: bayeswave + - waveform.approximant: IMRPhenomXPHM +""" + with open('test_and_logic.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_and_logic.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + selector = [p for p in event.productions if p.name == 'Selector'][0] + # Should only match ProdA (bayeswave AND IMRPhenomXPHM) + self.assertEqual(len(selector.dependencies), 1) + self.assertIn('ProdA', selector.dependencies) + self.assertNotIn('ProdB', selector.dependencies) + self.assertNotIn('ProdC', selector.dependencies) + + def test_staleness_detection(self): + """Test that analyses can detect when dependencies have changed.""" + blueprint = """ +kind: analysis +name: Prod0 +pipeline: bayeswave +status: finished +--- +kind: analysis +name: Prod1 +pipeline: bilby +status: finished +needs: + - Prod0 +""" + with open('test_stale.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_stale.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + prod1 = [p for p in event.productions if p.name == 'Prod1'][0] + + # Initially not stale (no resolved dependencies recorded) + self.assertFalse(prod1.is_stale) + + # Record the dependencies as resolved + prod1.resolved_dependencies = prod1.dependencies + self.assertFalse(prod1.is_stale) + + # Add a new production that matches the criteria + blueprint2 = """ +kind: analysis +name: Prod0b +pipeline: bayeswave +status: finished +""" + with open('test_stale2.yaml', 'w') as f: + f.write(blueprint2) + + apply_page(file='test_stale2.yaml', event='GW150914_095045', ledger=self.ledger) + # Reload to get updated productions + event = self.ledger.get_event('GW150914_095045')[0] + + # Now if we change Prod1's needs to match by pipeline, it would become stale + # But for this test, dependencies haven't changed, so it's not stale + prod1_new = [p for p in event.productions if p.name == 'Prod1'][0] + prod1_new.resolved_dependencies = ['Prod0'] + self.assertFalse(prod1_new.is_stale) + + def test_refreshable_flag(self): + """Test the refreshable flag on analyses.""" + apply_page(file=f"{self.cwd}/tests/test_data/test_linear_dag.yaml", ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + prod1 = [p for p in event.productions if p.name == 'Prod1'][0] + + # Default is not refreshable + self.assertFalse(prod1.is_refreshable) + + # Set to refreshable + prod1.is_refreshable = True + self.assertTrue(prod1.is_refreshable) + + # Set to not refreshable + prod1.is_refreshable = False + self.assertFalse(prod1.is_refreshable) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_dependency_logic.py b/tests/test_dependency_logic.py new file mode 100644 index 00000000..9afc9850 --- /dev/null +++ b/tests/test_dependency_logic.py @@ -0,0 +1,316 @@ +""" +Unit tests for dependency resolution logic without requiring full project setup. +""" +import unittest +from unittest.mock import Mock, MagicMock +from asimov.analysis import Analysis + + +class MockAnalysis(Analysis): + """Mock analysis for testing dependency logic.""" + + def __init__(self, name, **kwargs): + self.name = name + self.meta = kwargs.get('meta', {}) + self._needs = kwargs.get('needs', []) + self.event = kwargs.get('event', None) + self.status_str = kwargs.get('status', 'ready') + self._reviews = MagicMock() + self._reviews.status = kwargs.get('review_status', 'none') + + @property + def review(self): + return self._reviews + + +class DependencyLogicTests(unittest.TestCase): + """Tests for core dependency resolution logic.""" + + def setUp(self): + """Create mock event with mock analyses.""" + self.event = Mock() + + # Create some mock analyses with different properties + self.analysis1 = MockAnalysis( + 'Prod1', + meta={'pipeline': 'bayeswave', 'waveform': {'approximant': 'IMRPhenomXPHM'}}, + status='finished', + review_status='approved' + ) + self.analysis2 = MockAnalysis( + 'Prod2', + meta={'pipeline': 'bilby', 'waveform': {'approximant': 'SEOBNRv5PHM'}}, + status='finished', + review_status='approved' + ) + self.analysis3 = MockAnalysis( + 'Prod3', + meta={'pipeline': 'bayeswave', 'waveform': {'approximant': 'SEOBNRv5PHM'}}, + status='ready', + review_status='none' + ) + + self.event.analyses = [self.analysis1, self.analysis2, self.analysis3] + + def test_parse_single_dependency_simple_name(self): + """Test parsing a simple name dependency.""" + analysis = MockAnalysis('TestAnalysis', event=self.event) + result = analysis._parse_single_dependency('Prod1') + self.assertEqual(result, (['name'], 'Prod1', False, False)) + + def test_parse_single_dependency_property(self): + """Test parsing a property-based dependency.""" + analysis = MockAnalysis('TestAnalysis', event=self.event) + result = analysis._parse_single_dependency('pipeline: bayeswave') + self.assertEqual(result, (['pipeline'], 'bayeswave', False, False)) + + def test_parse_single_dependency_nested_property(self): + """Test parsing a nested property dependency.""" + analysis = MockAnalysis('TestAnalysis', event=self.event) + result = analysis._parse_single_dependency('waveform.approximant: IMRPhenomXPHM') + self.assertEqual(result, (['waveform', 'approximant'], 'IMRPhenomXPHM', False, False)) + + def test_parse_single_dependency_negated(self): + """Test parsing a negated dependency.""" + analysis = MockAnalysis('TestAnalysis', event=self.event) + result = analysis._parse_single_dependency('pipeline: !bayeswave') + self.assertEqual(result, (['pipeline'], 'bayeswave', True, False)) + + def test_matches_filter_simple_name(self): + """Test matching by name.""" + self.assertTrue(self.analysis1.matches_filter(['name'], 'Prod1', False)) + self.assertFalse(self.analysis1.matches_filter(['name'], 'Prod2', False)) + + def test_matches_filter_property(self): + """Test matching by simple property.""" + self.assertTrue(self.analysis1.matches_filter(['pipeline'], 'bayeswave', False)) + self.assertFalse(self.analysis2.matches_filter(['pipeline'], 'bayeswave', False)) + + def test_matches_filter_nested_property(self): + """Test matching by nested property.""" + self.assertTrue( + self.analysis1.matches_filter(['waveform', 'approximant'], 'IMRPhenomXPHM', False) + ) + self.assertFalse( + self.analysis2.matches_filter(['waveform', 'approximant'], 'IMRPhenomXPHM', False) + ) + + def test_matches_filter_status(self): + """Test matching by status.""" + self.assertTrue(self.analysis1.matches_filter(['status'], 'finished', False)) + self.assertFalse(self.analysis3.matches_filter(['status'], 'finished', False)) + + def test_matches_filter_review(self): + """Test matching by review status.""" + self.assertTrue(self.analysis1.matches_filter(['review', 'status'], 'approved', False)) + self.assertFalse(self.analysis3.matches_filter(['review', 'status'], 'approved', False)) + + def test_matches_filter_negation(self): + """Test negated matching.""" + # Negated name match + self.assertFalse(self.analysis1.matches_filter(['name'], 'Prod1', True)) + self.assertTrue(self.analysis1.matches_filter(['name'], 'Prod2', True)) + + # Negated pipeline match + self.assertFalse(self.analysis1.matches_filter(['pipeline'], 'bayeswave', True)) + self.assertTrue(self.analysis2.matches_filter(['pipeline'], 'bayeswave', True)) + + def test_process_dependencies_simple_list(self): + """Test processing a simple list of dependencies.""" + analysis = MockAnalysis('TestAnalysis', event=self.event, needs=['Prod1', 'Prod2']) + result = analysis._process_dependencies(analysis._needs) + expected = [ + (['name'], 'Prod1', False, False), + (['name'], 'Prod2', False, False) + ] + self.assertEqual(result, expected) + + def test_process_dependencies_property_queries(self): + """Test processing property-based dependencies.""" + analysis = MockAnalysis( + 'TestAnalysis', + event=self.event, + needs=['pipeline: bayeswave', 'waveform.approximant: IMRPhenomXPHM'] + ) + result = analysis._process_dependencies(analysis._needs) + expected = [ + (['pipeline'], 'bayeswave', False, False), + (['waveform', 'approximant'], 'IMRPhenomXPHM', False, False) + ] + self.assertEqual(result, expected) + + def test_process_dependencies_and_group(self): + """Test processing AND group (nested list).""" + analysis = MockAnalysis( + 'TestAnalysis', + event=self.event, + needs=[['pipeline: bayeswave', 'status: finished']] + ) + result = analysis._process_dependencies(analysis._needs) + expected = [ + [ + (['pipeline'], 'bayeswave', False, False), + (['status'], 'finished', False, False) + ] + ] + self.assertEqual(result, expected) + + def test_dependencies_simple_or_logic(self): + """Test OR logic with multiple simple dependencies.""" + analysis = MockAnalysis( + 'TestAnalysis', + event=self.event, + needs=['Prod1', 'Prod2'] + ) + + deps = analysis.dependencies + self.assertEqual(len(deps), 2) + self.assertIn('Prod1', deps) + self.assertIn('Prod2', deps) + + def test_dependencies_property_or_logic(self): + """Test OR logic with property-based dependencies.""" + analysis = MockAnalysis( + 'TestAnalysis', + event=self.event, + needs=['waveform.approximant: IMRPhenomXPHM', 'waveform.approximant: SEOBNRv5PHM'] + ) + + deps = analysis.dependencies + # Should match Prod1 (IMRPhenomXPHM), Prod2 (SEOBNRv5PHM), and Prod3 (SEOBNRv5PHM) + self.assertEqual(len(deps), 3) + self.assertIn('Prod1', deps) + self.assertIn('Prod2', deps) + self.assertIn('Prod3', deps) + + def test_dependencies_and_logic(self): + """Test AND logic with nested list.""" + analysis = MockAnalysis( + 'TestAnalysis', + event=self.event, + needs=[['pipeline: bayeswave', 'waveform.approximant: IMRPhenomXPHM']] + ) + + deps = analysis.dependencies + # Should only match Prod1 (bayeswave AND IMRPhenomXPHM) + self.assertEqual(len(deps), 1) + self.assertIn('Prod1', deps) + + def test_dependencies_negation(self): + """Test negation in dependencies.""" + analysis = MockAnalysis( + 'TestAnalysis', + event=self.event, + needs=['pipeline: !bilby'] + ) + + deps = analysis.dependencies + # Should match Prod1 and Prod3 (both not bilby) + self.assertEqual(len(deps), 2) + self.assertIn('Prod1', deps) + self.assertIn('Prod3', deps) + self.assertNotIn('Prod2', deps) + + def test_dependencies_complex_and_or(self): + """Test complex combination of AND and OR logic.""" + analysis = MockAnalysis( + 'TestAnalysis', + event=self.event, + needs=[ + ['pipeline: bayeswave', 'status: finished'], # AND group + 'waveform.approximant: SEOBNRv5PHM' # OR'd with the AND group + ] + ) + + deps = analysis.dependencies + # Should match: + # - Prod1 (bayeswave AND finished) + # - Prod2 (SEOBNRv5PHM) + # - Prod3 (SEOBNRv5PHM) + self.assertEqual(len(deps), 3) + self.assertIn('Prod1', deps) + self.assertIn('Prod2', deps) + self.assertIn('Prod3', deps) + + def test_staleness_detection(self): + """Test staleness detection.""" + analysis = MockAnalysis('TestAnalysis', event=self.event, needs=['Prod1']) + + # Initially not stale (no resolved dependencies) + self.assertFalse(analysis.is_stale) + + # Record dependencies + analysis.resolved_dependencies = ['Prod1'] + self.assertFalse(analysis.is_stale) + + # Change dependencies + analysis._needs = ['Prod2'] + # Now dependencies changed, should be stale + current_deps = set(analysis.dependencies) + resolved_deps = set(analysis.resolved_dependencies) + self.assertNotEqual(current_deps, resolved_deps) + + def test_refreshable_flag(self): + """Test refreshable flag.""" + analysis = MockAnalysis('TestAnalysis', event=self.event) + + # Default is False + self.assertFalse(analysis.is_refreshable) + + # Set to True + analysis.is_refreshable = True + self.assertTrue(analysis.is_refreshable) + self.assertTrue(analysis.meta['refreshable']) + + # Set to False + analysis.is_refreshable = False + self.assertFalse(analysis.is_refreshable) + + def test_parse_dict_format_dependency(self): + """Test parsing dict format (YAML without quotes).""" + analysis = MockAnalysis('TestAnalysis', event=self.event) + + # Dict format as parsed by YAML when no quotes are used + dict_need = {'waveform.approximant': 'IMRPhenomXPHM'} + result = analysis._parse_single_dependency(dict_need) + + expected = (['waveform', 'approximant'], 'IMRPhenomXPHM', False, False) + self.assertEqual(result, expected) + + def test_parse_dict_format_with_negation(self): + """Test parsing dict format with negation.""" + analysis = MockAnalysis('TestAnalysis', event=self.event) + + dict_need = {'pipeline': '!bayeswave'} + result = analysis._parse_single_dependency(dict_need) + + expected = (['pipeline'], 'bayeswave', True, False) + self.assertEqual(result, expected) + + def test_self_dependency_exclusion(self): + """Test that an analysis is never a dependency of itself.""" + # Create analyses where ProdA would match its own filter + event = Mock() + + prod1 = MockAnalysis('Prod1', meta={'pipeline': 'bilby'}) + prod1.event = event + + prod2 = MockAnalysis('Prod2', meta={'pipeline': 'bilby'}) + prod2.event = event + + prodA = MockAnalysis('ProdA', meta={'pipeline': 'bilby'}, + needs=[{'pipeline': 'bilby'}]) + prodA.event = event + + event.analyses = [prod1, prod2, prodA] + + deps = prodA.dependencies + + # Should include Prod1 and Prod2, but NOT ProdA itself + self.assertIn('Prod1', deps) + self.assertIn('Prod2', deps) + self.assertNotIn('ProdA', deps) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_html_report.py b/tests/test_html_report.py new file mode 100644 index 00000000..56fd7e86 --- /dev/null +++ b/tests/test_html_report.py @@ -0,0 +1,134 @@ +""" +Test suite for HTML report generation improvements. +""" + +import unittest +from unittest.mock import Mock, MagicMock, patch + + +class TestHTMLReporting(unittest.TestCase): + """Test the HTML generation features for reports.""" + + def _create_mock_analysis(self, status="running", name="TestAnalysis", + rundir="/test/rundir", meta=None): + """Helper to create a mock analysis object.""" + analysis = Mock() + analysis.name = name + analysis.status = status + analysis.comment = None + analysis.pipeline = Mock() + analysis.pipeline.name = "TestPipeline" + analysis.pipeline.html = Mock(return_value="") + analysis.rundir = rundir + analysis.meta = meta or {} + analysis._reviews = Mock() + analysis._reviews.__len__ = Mock(return_value=0) + + # Import the html method from analysis module and bind it + from asimov.analysis import SubjectAnalysis + analysis.html = lambda: SubjectAnalysis.html(analysis) + + return analysis + + def test_analysis_html_contains_status_class(self): + """Test that analysis HTML includes status-specific CSS class.""" + analysis = self._create_mock_analysis(status="running") + + html = analysis.html() + + # Check for status-specific class + self.assertIn("asimov-analysis-running", html) + # Check for running indicator + self.assertIn("running-indicator", html) + # Check for the analysis name + self.assertIn("TestAnalysis", html) + + def test_analysis_html_collapsible_details(self): + """Test that analysis HTML includes collapsible details section.""" + analysis = self._create_mock_analysis( + status="finished", + meta={"approximant": "IMRPhenomPv2"} + ) + + html = analysis.html() + + # Check for collapsible toggle + self.assertIn("toggle-details", html) + # Check for details content div + self.assertIn("details-content", html) + # Check that approximant is in details + self.assertIn("IMRPhenomPv2", html) + + def test_analysis_html_with_metadata(self): + """Test that analysis HTML displays metadata correctly.""" + analysis = self._create_mock_analysis( + status="finished", + meta={ + "approximant": "IMRPhenomPv2", + "quality": "high", + "sampler": {"nsamples": 1000} + } + ) + + html = analysis.html() + + # Check for metadata fields + self.assertIn("Waveform approximant", html) + self.assertIn("IMRPhenomPv2", html) + self.assertIn("Quality", html) + self.assertIn("high", html) + + def test_event_html_basic_structure(self): + """Test that event HTML has basic structure.""" + from asimov.event import Event + + event = Mock(spec=Event) + event.name = "GW150914_095045" + event.productions = [] + event.meta = {"gps": 1126259462.4} + event.graph = MagicMock() + event.graph.nodes = Mock(return_value=[]) + + # Import and bind the html method + from asimov.event import Event as RealEvent + event.html = lambda: RealEvent.html(event) + + html = event.html() + + # Check for event name + self.assertIn("GW150914_095045", html) + # Check for GPS time + self.assertIn("GPS Time", html) + self.assertIn("1126259462.4", html) + # Check for card structure + self.assertIn("event-data", html) + + def test_event_html_with_interferometers(self): + """Test that event HTML displays interferometer information.""" + from asimov.event import Event + + event = Mock(spec=Event) + event.name = "GW150914_095045" + event.productions = [] + event.meta = { + "gps": 1126259462.4, + "interferometers": ["H1", "L1"] + } + event.graph = MagicMock() + event.graph.nodes = Mock(return_value=[]) + + # Import and bind the html method + from asimov.event import Event as RealEvent + event.html = lambda: RealEvent.html(event) + + html = event.html() + + # Check for interferometers + self.assertIn("Interferometers", html) + # Should contain both IFOs + self.assertIn("H1", html) + self.assertIn("L1", html) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_logging.py b/tests/test_logging.py new file mode 100644 index 00000000..1cf2220d --- /dev/null +++ b/tests/test_logging.py @@ -0,0 +1,151 @@ +""" +Tests for the logging interface improvements. +""" +import unittest +import os +import shutil +import tempfile +from unittest.mock import patch +from click.testing import CliRunner +from asimov.cli import project +from asimov.olivaw import olivaw + + +class TestLogging(unittest.TestCase): + """Test the logging interface improvements.""" + + def setUp(self): + """Set up test environment.""" + self.test_dir = tempfile.mkdtemp() + self.original_cwd = os.getcwd() + # Reset the global file handler to ensure test isolation + # Using patch to properly mock the private variable + import asimov + asimov._file_handler = None + + def tearDown(self): + """Clean up test environment.""" + os.chdir(self.original_cwd) + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_help_does_not_create_log(self): + """Test that --help does not create a log file.""" + os.chdir(self.test_dir) + runner = CliRunner() + result = runner.invoke(olivaw, ['--help']) + + # Check that no asimov.log file was created + self.assertFalse(os.path.exists('asimov.log'), + "asimov.log should not be created for --help command") + self.assertEqual(result.exit_code, 0) + + def test_version_does_not_create_log(self): + """Test that --version does not create a log file.""" + os.chdir(self.test_dir) + runner = CliRunner() + result = runner.invoke(olivaw, ['--version']) + + # Check that no asimov.log file was created + self.assertFalse(os.path.exists('asimov.log'), + "asimov.log should not be created for --version command") + self.assertEqual(result.exit_code, 0) + + def test_init_creates_log_in_logs_directory(self): + """Test that init command creates log in the logs directory.""" + os.chdir(self.test_dir) + runner = CliRunner() + result = runner.invoke(project.init, ['Test Project', '--root', self.test_dir]) + + # Check that logs directory was created + self.assertTrue(os.path.exists('logs'), + "logs directory should exist") + + # Check that log was created in logs directory, not current directory + self.assertFalse(os.path.exists('asimov.log'), + "asimov.log should not be in current directory") + self.assertTrue(os.path.exists(os.path.join('logs', 'asimov.log')), + "asimov.log should be in logs directory") + self.assertEqual(result.exit_code, 0) + + # Verify the log contains expected content + with open(os.path.join('logs', 'asimov.log'), 'r') as f: + log_content = f.read() + self.assertIn('A new project was created', log_content) + self.assertIn('[INFO]', log_content) + + def test_log_rotation_config(self): + """Test that log rotation configuration is read correctly.""" + from asimov import setup_file_logging + import asimov + + # Reset handler + asimov._file_handler = None + + os.chdir(self.test_dir) + + # Create a test log file + log_path = os.path.join(self.test_dir, 'test.log') + setup_file_logging(logfile=log_path) + + # Verify handler was created + self.assertIsNotNone(asimov._file_handler) + + # Check that it's a RotatingFileHandler with expected defaults + from logging.handlers import RotatingFileHandler + self.assertIsInstance(asimov._file_handler, RotatingFileHandler) + self.assertEqual(asimov._file_handler.maxBytes, 10 * 1024 * 1024) # 10 MB + self.assertEqual(asimov._file_handler.backupCount, 5) + + def test_invalid_log_directory_fallback(self): + """Test that invalid log directory falls back to current directory.""" + from asimov import setup_file_logging + import asimov + + # Reset handler + asimov._file_handler = None + + os.chdir(self.test_dir) + + # Try to create a log in a directory that cannot be created (invalid path) + with patch('asimov.config.get') as mock_config: + # Return an invalid path that will fail os.makedirs + mock_config.return_value = '/root/invalid_path_no_permission' + + # This should fall back gracefully + setup_file_logging() + + # Should have created handler in current directory as fallback + # Note: may be None if both attempts fail, which is acceptable + + def test_setup_file_logging_thread_safety(self): + """Test that setup_file_logging is thread-safe.""" + from asimov import setup_file_logging + import asimov + import threading + + # Reset handler + asimov._file_handler = None + + os.chdir(self.test_dir) + + log_path = os.path.join(self.test_dir, 'thread_test.log') + results = [] + + def call_setup(): + setup_file_logging(logfile=log_path) + results.append(asimov._file_handler) + + # Call from multiple threads + threads = [threading.Thread(target=call_setup) for _ in range(5)] + for t in threads: + t.start() + for t in threads: + t.join() + + # All threads should see the same handler (only one created) + self.assertTrue(all(h == results[0] for h in results)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_monitor_api.py b/tests/test_monitor_api.py new file mode 100644 index 00000000..3dc2e802 --- /dev/null +++ b/tests/test_monitor_api.py @@ -0,0 +1,175 @@ +""" +Unit tests for the programmatic monitor API. +""" + +import unittest +from unittest.mock import Mock, patch + +from asimov.monitor_api import ( + run_monitor, + get_analysis_status, + list_active_analyses, +) + + +class TestMonitorAPI(unittest.TestCase): + """Test the programmatic monitor API.""" + + def setUp(self): + """Set up test fixtures.""" + self.mock_ledger = Mock() + self.mock_ledger.project_analyses = [] + self.mock_ledger.get_event = Mock(return_value=[]) + + @patch('asimov.monitor_api.condor') + @patch('asimov.monitor_api.ledger') + @patch('asimov.monitor_api.monitor_analysis') + def test_run_monitor_basic(self, mock_monitor_analysis, mock_ledger, mock_condor): + """Test basic run_monitor call.""" + # Mock condor job list + mock_job_list = Mock() + mock_condor.CondorJobList.return_value = mock_job_list + + # Mock ledger + mock_ledger.project_analyses = [] + mock_ledger.get_event.return_value = [] + + # Run monitor + results = run_monitor() + + # Check results + self.assertIsInstance(results, dict) + self.assertIn('total', results) + self.assertIn('project_analyses', results) + self.assertIn('event_analyses', results) + self.assertEqual(results['total'], 0) + + @patch('asimov.monitor_api.condor') + @patch('asimov.monitor_api.ledger') + @patch('asimov.monitor_api.monitor_analysis') + def test_run_monitor_with_analyses(self, mock_monitor_analysis, mock_ledger, mock_condor): + """Test run_monitor with active analyses.""" + # Mock condor + mock_job_list = Mock() + mock_condor.CondorJobList.return_value = mock_job_list + + # Mock analyses + mock_analysis = Mock() + mock_analysis.name = "test_analysis" + mock_analysis.status = "running" + mock_analysis.pipeline = "bilby" + + mock_ledger.project_analyses = [mock_analysis] + mock_ledger.get_event.return_value = [] + + # Run monitor + results = run_monitor() + + # Check that monitor_analysis was called + mock_monitor_analysis.assert_called_once() + self.assertEqual(results['project_analyses'], 1) + self.assertEqual(results['total'], 1) + + @patch('asimov.monitor_api.condor') + def test_run_monitor_no_condor(self, mock_condor): + """Test run_monitor raises error when condor not available.""" + # Mock condor error - use the class without instantiation + mock_condor.htcondor.HTCondorLocateError = Exception + mock_condor.CondorJobList.side_effect = Exception + + # Should raise RuntimeError + with self.assertRaises(RuntimeError): + run_monitor() + + @patch('asimov.monitor_api.ledger') + def test_get_analysis_status(self, mock_ledger): + """Test get_analysis_status function.""" + # Mock analyses + mock_analysis = Mock() + mock_analysis.name = "test_analysis" + mock_analysis.status = "running" + + mock_ledger.project_analyses = [mock_analysis] + mock_ledger.get_event.return_value = [] + + # Get statuses + statuses = get_analysis_status() + + # Check results + self.assertIsInstance(statuses, dict) + self.assertIn("project_analyses/test_analysis", statuses) + self.assertEqual(statuses["project_analyses/test_analysis"], "running") + + @patch('asimov.monitor_api.ledger') + def test_get_analysis_status_filtered(self, mock_ledger): + """Test get_analysis_status with filter.""" + # Mock analyses + mock_analysis1 = Mock() + mock_analysis1.name = "test_analysis_1" + mock_analysis1.status = "running" + + mock_analysis2 = Mock() + mock_analysis2.name = "test_analysis_2" + mock_analysis2.status = "finished" + + mock_ledger.project_analyses = [mock_analysis1, mock_analysis2] + mock_ledger.get_event.return_value = [] + + # Get status for specific analysis + statuses = get_analysis_status(analysis_name="test_analysis_1") + + # Should only have one + self.assertEqual(len(statuses), 1) + self.assertIn("project_analyses/test_analysis_1", statuses) + + @patch('asimov.monitor_api.ledger') + def test_list_active_analyses(self, mock_ledger): + """Test list_active_analyses function.""" + # Mock analyses + mock_analysis = Mock() + mock_analysis.name = "test_analysis" + mock_analysis.status = "running" + mock_analysis.pipeline = "bilby" + + mock_ledger.project_analyses = [mock_analysis] + mock_ledger.get_event.return_value = [] + + # List analyses + analyses = list_active_analyses() + + # Check results + self.assertIsInstance(analyses, list) + self.assertEqual(len(analyses), 1) + self.assertEqual(analyses[0]['name'], "test_analysis") + self.assertEqual(analyses[0]['status'], "running") + self.assertEqual(analyses[0]['type'], "project") + + @patch('asimov.monitor_api.ledger') + def test_list_active_analyses_with_events(self, mock_ledger): + """Test list_active_analyses with event analyses.""" + # Mock event + mock_event = Mock() + mock_event.name = "GW150914" + + mock_production = Mock() + mock_production.name = "bilby_prod" + mock_production.status = "running" + mock_production.pipeline = "bilby" + + mock_event.productions = [mock_production] + + mock_ledger.project_analyses = [] + mock_ledger.get_event.return_value = [mock_event] + + # List analyses + analyses = list_active_analyses() + + # Check results + self.assertEqual(len(analyses), 1) + self.assertEqual(analyses[0]['name'], "bilby_prod") + self.assertEqual(analyses[0]['type'], "event") + self.assertEqual(analyses[0]['event'], "GW150914") + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_monitor_helpers.py b/tests/test_monitor_helpers.py new file mode 100644 index 00000000..654f2d02 --- /dev/null +++ b/tests/test_monitor_helpers.py @@ -0,0 +1,232 @@ +""" +Unit tests for the monitor helper functions. +""" + +import unittest +from unittest.mock import Mock, patch + +from asimov.monitor_helpers import monitor_analysis, monitor_analyses_list + + + +class TestMonitorAnalysis(unittest.TestCase): + """Test the monitor_analysis function.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.status = "running" + self.analysis.pipeline = "bilby" + self.analysis.event = Mock() + self.analysis.event.name = "GW150914" + + self.job_list = Mock() + self.job_list.jobs = {} + + self.ledger = Mock() + self.ledger.update_event = Mock() + self.ledger.save = Mock() + + @patch('asimov.monitor_helpers.get_state_handler') + @patch('asimov.monitor_helpers.click.echo') + def test_monitor_analysis_success(self, mock_echo, mock_get_handler): + """Test successful monitoring of an analysis.""" + # Mock state handler + mock_handler = Mock() + mock_handler.handle = Mock(return_value=True) + mock_get_handler.return_value = mock_handler + + result = monitor_analysis( + self.analysis, + self.job_list, + self.ledger, + dry_run=False + ) + + self.assertTrue(result) + mock_handler.handle.assert_called_once() + + @patch('asimov.monitor_helpers.get_state_handler') + @patch('asimov.monitor_helpers.click.echo') + def test_monitor_analysis_with_path(self, mock_echo, mock_get_handler): + """Test monitoring with explicit analysis path.""" + mock_handler = Mock() + mock_handler.handle = Mock(return_value=True) + mock_get_handler.return_value = mock_handler + + result = monitor_analysis( + self.analysis, + self.job_list, + self.ledger, + analysis_path="custom/path" + ) + + self.assertTrue(result) + + @patch('asimov.monitor_helpers.get_state_handler') + @patch('asimov.monitor_helpers.click.echo') + def test_monitor_analysis_inactive_skipped(self, mock_echo, mock_get_handler): + """Test that inactive analyses are skipped.""" + self.analysis.status = "cancelled" # Not in ACTIVE_STATES + + result = monitor_analysis( + self.analysis, + self.job_list, + self.ledger + ) + + # Should still return True (successfully skipped) + self.assertTrue(result) + mock_get_handler.assert_not_called() + + @patch('asimov.monitor_helpers.get_state_handler') + @patch('asimov.monitor_helpers.click.echo') + def test_monitor_analysis_unknown_state(self, mock_echo, mock_get_handler): + """Test handling of unknown state.""" + mock_get_handler.return_value = None # No handler found + + result = monitor_analysis( + self.analysis, + self.job_list, + self.ledger + ) + + self.assertFalse(result) + + @patch('asimov.monitor_helpers.get_state_handler') + @patch('asimov.monitor_helpers.click.echo') + @patch('asimov.monitor_helpers.logger') + def test_monitor_analysis_handler_exception(self, mock_logger, mock_echo, mock_get_handler): + """Test handling of exception in state handler.""" + mock_handler = Mock() + mock_handler.handle = Mock(side_effect=Exception("Test error")) + mock_get_handler.return_value = mock_handler + + result = monitor_analysis( + self.analysis, + self.job_list, + self.ledger + ) + + self.assertFalse(result) + mock_logger.exception.assert_called_once() + + @patch('asimov.monitor_helpers.get_state_handler') + @patch('asimov.monitor_helpers.click.echo') + def test_monitor_project_analysis(self, mock_echo, mock_get_handler): + """Test monitoring a project analysis (no event attribute).""" + # Remove event attribute to simulate ProjectAnalysis + delattr(self.analysis, 'event') + + mock_handler = Mock() + mock_handler.handle = Mock(return_value=True) + mock_get_handler.return_value = mock_handler + + result = monitor_analysis( + self.analysis, + self.job_list, + self.ledger + ) + + self.assertTrue(result) + + @patch('asimov.monitor_helpers.get_state_handler') + @patch('asimov.monitor_helpers.click.echo') + def test_monitor_analysis_with_pipeline_states(self, mock_echo, mock_get_handler): + """Test that pipeline is passed to get_state_handler.""" + mock_handler = Mock() + mock_handler.handle = Mock(return_value=True) + mock_get_handler.return_value = mock_handler + + # Create a mock pipeline + mock_pipeline = Mock() + self.analysis.pipeline = mock_pipeline + + monitor_analysis( + self.analysis, + self.job_list, + self.ledger + ) + + # Verify get_state_handler was called with pipeline + mock_get_handler.assert_called_once() + call_kwargs = mock_get_handler.call_args[1] + self.assertEqual(call_kwargs.get('pipeline'), mock_pipeline) + + +class TestMonitorAnalysesList(unittest.TestCase): + """Test the monitor_analyses_list function.""" + + def setUp(self): + """Set up test fixtures.""" + self.job_list = Mock() + self.ledger = Mock() + + @patch('asimov.monitor_helpers.monitor_analysis') + def test_monitor_empty_list(self, mock_monitor): + """Test monitoring an empty list of analyses.""" + stats = monitor_analyses_list([], self.job_list, self.ledger) + + self.assertEqual(stats["total"], 0) + mock_monitor.assert_not_called() + + @patch('asimov.monitor_helpers.monitor_analysis') + def test_monitor_multiple_analyses(self, mock_monitor): + """Test monitoring multiple analyses.""" + mock_monitor.return_value = True + + analyses = [ + Mock(name=f"analysis_{i}", status="running") + for i in range(3) + ] + + stats = monitor_analyses_list( + analyses, + self.job_list, + self.ledger, + label="test_analyses" + ) + + self.assertEqual(stats["total"], 3) + self.assertEqual(mock_monitor.call_count, 3) + + @patch('asimov.monitor_helpers.monitor_analysis') + def test_monitor_mixed_states(self, mock_monitor): + """Test monitoring analyses in different states.""" + mock_monitor.return_value = True + + analyses = [ + Mock(name="running_1", status="running"), + Mock(name="stuck_1", status="stuck"), + Mock(name="ready_1", status="ready"), + Mock(name="finished_1", status="finished"), + Mock(name="running_2", status="running"), + ] + + stats = monitor_analyses_list(analyses, self.job_list, self.ledger) + + self.assertEqual(stats["total"], 5) + self.assertEqual(stats["running"], 2) + self.assertEqual(stats["stuck"], 1) + self.assertEqual(stats["ready"], 1) + self.assertEqual(stats["finished"], 1) + + @patch('asimov.monitor_helpers.monitor_analysis') + def test_monitor_skips_inactive(self, mock_monitor): + """Test that inactive analyses are skipped.""" + analyses = [ + Mock(name="active", status="running"), + Mock(name="cancelled", status="cancelled"), + Mock(name="manual", status="manual"), + ] + + stats = monitor_analyses_list(analyses, self.job_list, self.ledger) + + # Only the active one should be monitored + self.assertEqual(stats["total"], 1) + mock_monitor.assert_called_once() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_monitor_states.py b/tests/test_monitor_states.py new file mode 100644 index 00000000..3fc83cca --- /dev/null +++ b/tests/test_monitor_states.py @@ -0,0 +1,566 @@ +""" +Unit tests for the monitor state machine implementation. +""" + +import unittest +from unittest.mock import Mock, patch + + +from asimov.monitor_states import ( + MonitorState, + ReadyState, + StopState, + RunningState, + FinishedState, + ProcessingState, + StuckState, + StoppedState, + get_state_handler, + register_state, + discover_custom_states, + STATE_REGISTRY, +) +from asimov.monitor_context import MonitorContext + + +class TestStateRegistry(unittest.TestCase): + """Test the state registry and handler lookup.""" + + def test_get_state_handler_ready(self): + """Test getting handler for ready state.""" + handler = get_state_handler("ready") + self.assertIsInstance(handler, ReadyState) + + def test_get_state_handler_running(self): + """Test getting handler for running state.""" + handler = get_state_handler("running") + self.assertIsInstance(handler, RunningState) + + def test_get_state_handler_case_insensitive(self): + """Test that state lookup is case insensitive.""" + handler1 = get_state_handler("READY") + handler2 = get_state_handler("ready") + self.assertEqual(type(handler1), type(handler2)) + + def test_get_state_handler_unknown(self): + """Test getting handler for unknown state returns None.""" + handler = get_state_handler("unknown_state") + self.assertIsNone(handler) + + def test_all_states_registered(self): + """Test that all expected states are in the registry.""" + expected_states = [ + "ready", "stop", "running", "finished", + "processing", "stuck", "stopped" + ] + for state in expected_states: + self.assertIn(state, STATE_REGISTRY) + + def test_get_state_handler_with_pipeline(self): + """Test getting pipeline-specific state handler.""" + # Create a custom state + class CustomRunningState(MonitorState): + @property + def state_name(self): + return "running" + + def handle(self, context): + return True + + # Create a mock pipeline with custom handlers + mock_pipeline = Mock() + mock_pipeline.name = "test_pipeline" + mock_pipeline.get_state_handlers = Mock(return_value={ + "running": CustomRunningState() + }) + + # Get handler with pipeline + handler = get_state_handler("running", pipeline=mock_pipeline) + + # Should be the custom handler + self.assertIsInstance(handler, CustomRunningState) + + def test_get_state_handler_pipeline_fallback(self): + """Test that handler falls back to default when pipeline has no custom handler.""" + # Create a mock pipeline with no custom handler for 'ready' + mock_pipeline = Mock() + mock_pipeline.name = "test_pipeline" + mock_pipeline.get_state_handlers = Mock(return_value={ + "running": Mock() # Has custom running, but not ready + }) + + # Get handler for 'ready' with pipeline + handler = get_state_handler("ready", pipeline=mock_pipeline) + + # Should fall back to default ReadyState + self.assertIsInstance(handler, ReadyState) + + def test_get_state_handler_pipeline_error(self): + """Test that errors in pipeline.get_state_handlers() are handled gracefully.""" + # Create a mock pipeline that raises an error + mock_pipeline = Mock() + mock_pipeline.name = "test_pipeline" + mock_pipeline.get_state_handlers = Mock(side_effect=Exception("Test error")) + + # Should fall back to default without raising + handler = get_state_handler("running", pipeline=mock_pipeline) + self.assertIsInstance(handler, RunningState) + + +class TestPluginSystem(unittest.TestCase): + """Test the plugin system for custom states.""" + + def setUp(self): + """Set up test fixtures.""" + # Save the original registry + self.original_registry = STATE_REGISTRY.copy() + + def tearDown(self): + """Restore the original registry.""" + STATE_REGISTRY.clear() + STATE_REGISTRY.update(self.original_registry) + + def test_register_state(self): + """Test registering a custom state.""" + class CustomState(MonitorState): + @property + def state_name(self): + return "custom" + + def handle(self, context): + return True + + custom = CustomState() + register_state(custom) + + self.assertIn("custom", STATE_REGISTRY) + self.assertEqual(STATE_REGISTRY["custom"], custom) + + def test_register_state_invalid_type(self): + """Test that registering non-MonitorState raises TypeError.""" + with self.assertRaises(TypeError): + register_state("not a state") + + def test_register_state_overwrites_warning(self): + """Test that overwriting existing state logs warning.""" + class CustomReady(MonitorState): + @property + def state_name(self): + return "ready" + + def handle(self, context): + return True + + with patch('asimov.monitor_states.logger') as mock_logger: + register_state(CustomReady()) + mock_logger.warning.assert_called_once() + + @patch('asimov.monitor_states.entry_points') + def test_discover_custom_states(self, mock_entry_points): + """Test discovering custom states via entry points.""" + # Create a mock custom state + class MockCustomState(MonitorState): + @property + def state_name(self): + return "mock_custom" + + def handle(self, context): + return True + + # Mock entry point + mock_ep = Mock() + mock_ep.name = "mock_custom" + mock_ep.value = "test.states:MockCustomState" + mock_ep.load.return_value = MockCustomState + + mock_entry_points.return_value = [mock_ep] + + # Clear registry before discovery + STATE_REGISTRY.clear() + STATE_REGISTRY.update(self.original_registry) + + # Discover states + discover_custom_states() + + # Check that custom state was registered + self.assertIn("mock_custom", STATE_REGISTRY) + + @patch('asimov.monitor_states.entry_points') + def test_discover_custom_states_instance(self, mock_entry_points): + """Test discovering custom states that return instances.""" + class MockCustomState(MonitorState): + @property + def state_name(self): + return "mock_instance" + + def handle(self, context): + return True + + # Mock entry point that returns an instance + mock_instance = MockCustomState() + mock_ep = Mock() + mock_ep.name = "mock_instance" + mock_ep.value = "test.states:custom_state_instance" + mock_ep.load.return_value = mock_instance + + mock_entry_points.return_value = [mock_ep] + + STATE_REGISTRY.clear() + STATE_REGISTRY.update(self.original_registry) + + discover_custom_states() + + self.assertIn("mock_instance", STATE_REGISTRY) + self.assertEqual(STATE_REGISTRY["mock_instance"], mock_instance) + + @patch('asimov.monitor_states.entry_points') + def test_discover_custom_states_error_handling(self, mock_entry_points): + """Test that errors in loading custom states are handled gracefully.""" + # Mock entry point that raises an error + mock_ep = Mock() + mock_ep.name = "broken_state" + mock_ep.load.side_effect = ImportError("Module not found") + + mock_entry_points.return_value = [mock_ep] + + # Should not raise an exception + with patch('asimov.monitor_states.logger') as mock_logger: + discover_custom_states() + mock_logger.warning.assert_called() + + +class TestMonitorContext(unittest.TestCase): + """Test the MonitorContext class.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.meta = {"scheduler": {"job id": "12345"}} + self.analysis.event = Mock() + self.analysis.event.name = "GW150914" + + self.job_list = Mock() + self.job_list.jobs = {"12345": Mock()} + self.job_list.refresh = Mock() + + self.ledger = Mock() + self.ledger.update_event = Mock() + self.ledger.update_analysis_in_project_analysis = Mock() + self.ledger.save = Mock() + + self.context = MonitorContext( + analysis=self.analysis, + job_list=self.job_list, + ledger=self.ledger, + dry_run=False, + analysis_path="GW150914/test_analysis" + ) + + def test_job_id_retrieval(self): + """Test that job_id property returns correct ID.""" + self.assertEqual(self.context.job_id, "12345") + + def test_job_id_missing(self): + """Test job_id when scheduler metadata is missing.""" + self.analysis.meta = {} + self.assertIsNone(self.context.job_id) + + def test_has_condor_job(self): + """Test has_condor_job returns True when job ID exists.""" + self.assertTrue(self.context.has_condor_job()) + + def test_has_condor_job_missing(self): + """Test has_condor_job returns False when job ID is missing.""" + self.analysis.meta = {} + context = MonitorContext(self.analysis, self.job_list, self.ledger) + self.assertFalse(context.has_condor_job()) + + def test_clear_job_id(self): + """Test clearing the job ID.""" + self.context.clear_job_id() + self.assertIsNone(self.analysis.meta["scheduler"]["job id"]) + + def test_update_ledger_event_analysis(self): + """Test ledger update for event analysis.""" + self.context.update_ledger() + self.ledger.update_event.assert_called_once_with(self.analysis.event) + self.ledger.save.assert_called_once() + + def test_update_ledger_project_analysis(self): + """Test ledger update for project analysis.""" + # Remove event attribute to simulate project analysis + delattr(self.analysis, 'event') + self.context.update_ledger() + self.ledger.update_analysis_in_project_analysis.assert_called_once_with( + self.analysis + ) + self.ledger.save.assert_called_once() + + def test_update_ledger_dry_run(self): + """Test that ledger is not updated in dry run mode.""" + context = MonitorContext( + self.analysis, self.job_list, self.ledger, dry_run=True + ) + context.update_ledger() + self.ledger.update_event.assert_not_called() + self.ledger.save.assert_not_called() + + def test_refresh_job_list(self): + """Test refreshing job list.""" + self.context.refresh_job_list() + self.job_list.refresh.assert_called_once() + + def test_refresh_job_list_dry_run(self): + """Test that job list is not refreshed in dry run mode.""" + context = MonitorContext( + self.analysis, self.job_list, self.ledger, dry_run=True + ) + context.refresh_job_list() + self.job_list.refresh.assert_not_called() + + +class TestReadyState(unittest.TestCase): + """Test the ReadyState handler.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.status = "ready" + self.analysis.event = Mock() + self.analysis.event.name = "GW150914" + + self.context = Mock(spec=MonitorContext) + self.context.analysis = self.analysis + self.context.analysis_path = "GW150914/test_analysis" + + self.state = ReadyState() + + def test_state_name(self): + """Test state name property.""" + self.assertEqual(self.state.state_name, "ready") + + @patch('asimov.monitor_states.click.secho') + def test_handle_ready_state(self, mock_secho): + """Test handling ready state.""" + result = self.state.handle(self.context) + self.assertTrue(result) + mock_secho.assert_called_once() + + +class TestStopState(unittest.TestCase): + """Test the StopState handler.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.status = "stop" + self.analysis.pipeline = Mock() + self.analysis.pipeline.eject_job = Mock() + + self.context = Mock(spec=MonitorContext) + self.context.analysis = self.analysis + self.context.analysis_path = "GW150914/test_analysis" + self.context.dry_run = False + self.context.update_ledger = Mock() + + self.state = StopState() + + def test_state_name(self): + """Test state name property.""" + self.assertEqual(self.state.state_name, "stop") + + @patch('asimov.monitor_states.click.secho') + def test_handle_stop_state(self, mock_secho): + """Test handling stop state.""" + result = self.state.handle(self.context) + self.assertTrue(result) + self.analysis.pipeline.eject_job.assert_called_once() + self.assertEqual(self.analysis.status, "stopped") + self.context.update_ledger.assert_called_once() + + @patch('asimov.monitor_states.click.echo') + def test_handle_stop_state_dry_run(self, mock_echo): + """Test handling stop state in dry run mode.""" + self.context.dry_run = True + result = self.state.handle(self.context) + self.assertTrue(result) + self.analysis.pipeline.eject_job.assert_not_called() + self.context.update_ledger.assert_not_called() + + +class TestRunningState(unittest.TestCase): + """Test the RunningState handler.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.status = "running" + self.analysis.meta = {"scheduler": {"job id": "12345"}} + self.analysis.pipeline = Mock() + self.analysis.pipeline.while_running = Mock() + self.analysis.pipeline.detect_completion = Mock(return_value=False) + + self.job = Mock() + self.job.status = "running" + + self.context = Mock(spec=MonitorContext) + self.context.analysis = self.analysis + self.context.job = self.job + self.context.job_id = "12345" + self.context.analysis_path = "GW150914/test_analysis" + self.context.dry_run = False + self.context.has_condor_job = Mock(return_value=True) + self.context.update_ledger = Mock() + self.context.refresh_job_list = Mock() + + self.state = RunningState() + + def test_state_name(self): + """Test state name property.""" + self.assertEqual(self.state.state_name, "running") + + @patch('asimov.monitor_states.click.echo') + def test_handle_running_job(self, mock_echo): + """Test handling a running condor job.""" + self.job.status = "running" + result = self.state.handle(self.context) + self.assertTrue(result) + self.analysis.pipeline.while_running.assert_called_once() + self.context.update_ledger.assert_called_once() + + @patch('asimov.monitor_states.click.echo') + def test_handle_idle_job(self, mock_echo): + """Test handling an idle condor job.""" + self.job.status = "idle" + result = self.state.handle(self.context) + self.assertTrue(result) + mock_echo.assert_called_once() + + @patch('asimov.monitor_states.click.echo') + def test_handle_completed_job(self, mock_echo): + """Test handling a completed condor job.""" + self.job.status = "completed" + result = self.state.handle(self.context) + self.assertTrue(result) + self.analysis.pipeline.after_completion.assert_called_once() + self.context.refresh_job_list.assert_called_once() + + @patch('asimov.monitor_states.click.echo') + def test_handle_held_job(self, mock_echo): + """Test handling a held condor job.""" + self.job.status = "held" + result = self.state.handle(self.context) + self.assertTrue(result) + self.assertEqual(self.analysis.status, "stuck") + self.context.update_ledger.assert_called_once() + + +class TestFinishedState(unittest.TestCase): + """Test the FinishedState handler.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.status = "finished" + self.analysis.pipeline = Mock() + self.analysis.pipeline.after_completion = Mock() + + self.context = Mock(spec=MonitorContext) + self.context.analysis = self.analysis + self.context.analysis_path = "GW150914/test_analysis" + self.context.refresh_job_list = Mock() + + self.state = FinishedState() + + def test_state_name(self): + """Test state name property.""" + self.assertEqual(self.state.state_name, "finished") + + @patch('asimov.monitor_states.click.echo') + def test_handle_finished_state(self, mock_echo): + """Test handling finished state.""" + # Ensure pipeline has after_completion method + self.analysis.pipeline.after_completion = Mock() + + result = self.state.handle(self.context) + self.assertTrue(result) + self.analysis.pipeline.after_completion.assert_called_once() + self.context.refresh_job_list.assert_called_once() + + +class TestProcessingState(unittest.TestCase): + """Test the ProcessingState handler.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.status = "processing" + self.analysis.pipeline = Mock() + self.analysis.pipeline.detect_completion_processing = Mock(return_value=True) + self.analysis.pipeline.after_processing = Mock() + self.analysis.pipeline.detect_completion = Mock(return_value=False) + + self.context = Mock(spec=MonitorContext) + self.context.analysis = self.analysis + self.context.analysis_path = "GW150914/test_analysis" + self.context.job_id = "12345" + + self.state = ProcessingState() + + def test_state_name(self): + """Test state name property.""" + self.assertEqual(self.state.state_name, "processing") + + @patch('asimov.monitor_states.click.echo') + def test_handle_processing_complete(self, mock_echo): + """Test handling completed processing.""" + self.analysis.pipeline.detect_completion_processing.return_value = True + result = self.state.handle(self.context) + self.assertTrue(result) + self.analysis.pipeline.after_processing.assert_called_once() + + @patch('asimov.monitor_states.click.echo') + def test_handle_processing_running(self, mock_echo): + """Test handling running processing.""" + self.analysis.pipeline.detect_completion_processing.return_value = False + self.analysis.pipeline.detect_completion.return_value = True + result = self.state.handle(self.context) + self.assertTrue(result) + mock_echo.assert_called() + + +class TestStuckState(unittest.TestCase): + """Test the StuckState handler.""" + + def setUp(self): + """Set up test fixtures.""" + self.analysis = Mock() + self.analysis.name = "test_analysis" + self.analysis.status = "stuck" + + self.context = Mock(spec=MonitorContext) + self.context.analysis = self.analysis + self.context.analysis_path = "GW150914/test_analysis" + + self.state = StuckState() + + def test_state_name(self): + """Test state name property.""" + self.assertEqual(self.state.state_name, "stuck") + + @patch('asimov.monitor_states.click.echo') + def test_handle_stuck_state(self, mock_echo): + """Test handling stuck state.""" + result = self.state.handle(self.context) + self.assertTrue(result) + mock_echo.assert_called_once() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_optional_dependencies.py b/tests/test_optional_dependencies.py new file mode 100644 index 00000000..0d705204 --- /dev/null +++ b/tests/test_optional_dependencies.py @@ -0,0 +1,170 @@ +""" +Tests for optional dependency handling. +""" +import os +import shutil +import unittest + +from asimov.ledger import YAMLLedger +from asimov.cli.project import make_project +from asimov.cli.application import apply_page + + +class OptionalDependencyTests(unittest.TestCase): + """Tests for optional dependency specifications.""" + + @classmethod + def setUpClass(cls): + cls.cwd = os.getcwd() + + @classmethod + def tearDownClass(cls): + """Destroy all the products of this test.""" + os.chdir(cls.cwd) + + def setUp(self): + os.makedirs(f"{self.cwd}/tests/tmp/optional_dep_project") + os.chdir(f"{self.cwd}/tests/tmp/optional_dep_project") + make_project(name="Test project", root=f"{self.cwd}/tests/tmp/optional_dep_project") + self.ledger = YAMLLedger(f".asimov/ledger.yml") + apply_page(file=f"{self.cwd}/tests/test_data/testing_pe.yaml", event=None, ledger=self.ledger) + apply_page(file=f"{self.cwd}/tests/test_data/events_blueprint.yaml", ledger=self.ledger) + + def tearDown(self): + shutil.rmtree(f"{self.cwd}/tests/tmp/optional_dep_project") + + def test_optional_dependency_parsing(self): + """Test that optional dependencies are correctly parsed.""" + blueprint = """ +kind: analysis +name: Prod0 +pipeline: bayeswave +status: finished +--- +kind: analysis +name: Prod1 +pipeline: bilby +needs: + - optional: true + pipeline: bayeswave +""" + with open('test_optional.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_optional.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + prod1 = [p for p in event.productions if p.name == 'Prod1'][0] + + # Should still resolve the dependency + self.assertEqual(len(prod1.dependencies), 1) + self.assertIn('Prod0', prod1.dependencies) + + # But required dependencies should be empty + self.assertEqual(len(prod1.required_dependencies), 0) + + # And required dependencies should be satisfied (vacuously true) + self.assertTrue(prod1.has_required_dependencies_satisfied) + + def test_required_dependency_not_satisfied(self): + """Test that an analysis recognizes when required dependencies are missing.""" + blueprint = """ +kind: analysis +name: Prod1 +pipeline: bilby +needs: + - pipeline: bayeswave +""" + with open('test_required_missing.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_required_missing.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + prod1 = [p for p in event.productions if p.name == 'Prod1'][0] + + # Should have no resolved dependencies + self.assertEqual(len(prod1.dependencies), 0) + + # Should have required dependencies spec + self.assertGreater(len(prod1.required_dependencies), 0) + + # Required dependencies should NOT be satisfied + self.assertFalse(prod1.has_required_dependencies_satisfied) + + def test_required_dependency_satisfied(self): + """Test that an analysis recognizes when required dependencies are present.""" + blueprint = """ +kind: analysis +name: Prod0 +pipeline: bayeswave +status: finished +--- +kind: analysis +name: Prod1 +pipeline: bilby +needs: + - pipeline: bayeswave +""" + with open('test_required_present.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_required_present.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + prod1 = [p for p in event.productions if p.name == 'Prod1'][0] + + # Should have resolved dependencies + self.assertEqual(len(prod1.dependencies), 1) + + # Should have required dependencies spec + self.assertGreater(len(prod1.required_dependencies), 0) + + # Required dependencies should be satisfied + self.assertTrue(prod1.has_required_dependencies_satisfied) + + def test_mixed_optional_required_dependencies(self): + """Test a mix of optional and required dependencies.""" + blueprint = """ +kind: analysis +name: Prod0 +pipeline: bayeswave +status: finished +--- +kind: analysis +name: Prod1 +pipeline: bilby +status: finished +--- +kind: analysis +name: Combiner +pipeline: lalinference +needs: + - pipeline: bayeswave + - optional: true + pipeline: rift +""" + with open('test_mixed.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_mixed.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + combiner = [p for p in event.productions if p.name == 'Combiner'][0] + + # Should resolve the bayeswave dependency (required and present) + self.assertIn('Prod0', combiner.dependencies) + + # Should have 1 required dependency + self.assertEqual(len(combiner.required_dependencies), 1) + + # Required dependencies should be satisfied (bayeswave is present) + self.assertTrue(combiner.has_required_dependencies_satisfied) + + # Should NOT have the rift dependency (optional and not present) + # Dependencies list only includes what's actually matched + self.assertEqual(len(combiner.dependencies), 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_pesummary_subject_analysis.py b/tests/test_pesummary_subject_analysis.py new file mode 100644 index 00000000..ebe9bcf7 --- /dev/null +++ b/tests/test_pesummary_subject_analysis.py @@ -0,0 +1,132 @@ +""" +Tests for PESummary as a SubjectAnalysis. +""" +import os +import shutil +import unittest + +from asimov.ledger import YAMLLedger +from asimov.cli.project import make_project +from asimov.cli.application import apply_page + + +class PESummarySubjectAnalysisTests(unittest.TestCase): + """Tests for PESummary working as a SubjectAnalysis.""" + + @classmethod + def setUpClass(cls): + cls.cwd = os.getcwd() + + @classmethod + def tearDownClass(cls): + """Destroy all the products of this test.""" + os.chdir(cls.cwd) + + def setUp(self): + os.makedirs(f"{self.cwd}/tests/tmp/pesummary_subject_project") + os.chdir(f"{self.cwd}/tests/tmp/pesummary_subject_project") + make_project(name="Test project", root=f"{self.cwd}/tests/tmp/pesummary_subject_project") + self.ledger = YAMLLedger(f".asimov/ledger.yml") + apply_page(file=f"{self.cwd}/tests/test_data/testing_pe.yaml", event=None, ledger=self.ledger) + apply_page(file=f"{self.cwd}/tests/test_data/events_blueprint.yaml", ledger=self.ledger) + + def tearDown(self): + shutil.rmtree(f"{self.cwd}/tests/tmp/pesummary_subject_project") + + def test_pesummary_subject_analysis_creation(self): + """Test that a PESummary SubjectAnalysis can be created.""" + blueprint = """ +kind: analysis +name: Bilby1 +pipeline: bilby +status: finished +--- +kind: analysis +name: Bilby2 +pipeline: bilby +status: finished +--- +kind: analysis +name: CombinedPESummary +pipeline: pesummary +analyses: + - pipeline: bilby +""" + with open('test_pesummary_subject.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_pesummary_subject.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + # Check that PESummary SubjectAnalysis was created + pesummary_analyses = [a for a in event.analyses if a.name == 'CombinedPESummary'] + self.assertEqual(len(pesummary_analyses), 1) + + pesummary = pesummary_analyses[0] + + # Check that it found the bilby dependencies + self.assertEqual(len(pesummary.dependencies), 0) # dependencies is only for needs + + # Check that the analyses attribute has the bilby runs + from asimov.analysis import SubjectAnalysis + self.assertIsInstance(pesummary, SubjectAnalysis) + + # Check that it has the right productions/analyses + if hasattr(pesummary, 'analyses'): + self.assertEqual(len(pesummary.analyses), 2) + analysis_names = [a.name for a in pesummary.analyses] + self.assertIn('Bilby1', analysis_names) + self.assertIn('Bilby2', analysis_names) + + def test_pesummary_with_required_dependencies(self): + """Test that PESummary won't run if required dependencies are missing.""" + blueprint = """ +kind: analysis +name: CombinedPESummary +pipeline: pesummary +analyses: + - pipeline: bilby +""" + with open('test_pesummary_no_deps.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_pesummary_no_deps.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + pesummary = [a for a in event.analyses if a.name == 'CombinedPESummary'][0] + + # The analyses list should be empty since no bilby jobs exist + self.assertEqual(len(pesummary.analyses), 0) + + def test_pesummary_with_optional_dependencies(self): + """Test that PESummary can run with optional dependencies.""" + blueprint = """ +kind: analysis +name: Bilby1 +pipeline: bilby +status: finished +--- +kind: analysis +name: CombinedPESummary +pipeline: pesummary +analyses: + - pipeline: bilby + - optional: true + pipeline: rift +""" + with open('test_pesummary_optional.yaml', 'w') as f: + f.write(blueprint) + + apply_page(file='test_pesummary_optional.yaml', event='GW150914_095045', ledger=self.ledger) + event = self.ledger.get_event('GW150914_095045')[0] + + pesummary = [a for a in event.analyses if a.name == 'CombinedPESummary'][0] + + # Should have the bilby analysis + if hasattr(pesummary, 'analyses'): + self.assertEqual(len(pesummary.analyses), 1) + self.assertEqual(pesummary.analyses[0].name, 'Bilby1') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_pipelines/test_bayeswave.py b/tests/test_pipelines/test_bayeswave.py index 6429fb80..95204f46 100644 --- a/tests/test_pipelines/test_bayeswave.py +++ b/tests/test_pipelines/test_bayeswave.py @@ -40,7 +40,7 @@ def setUpClass(cls): cls.cwd = os.getcwd() repo = git.Repo.init(cls.cwd+"/tests/test_data/s000000xx/") os.chdir(cls.cwd+"/tests/test_data/s000000xx/") - os.system("git add C01_offline/Prod1_test.ini C01_offline/s000000xx_gpsTime.txt") + os.system("git add analyses/Prod0_test.ini analyses/s000000xx_gpsTime.txt") os.system("git commit -m 'test'") def setUp(self): @@ -193,5 +193,5 @@ def setUp(self): def test_dag(self): """Check that a DAG is actually produced.""" - print(f"{self.cwd}/tests/tmp/s000000xx/C01_offline/Prod1/lalinference_1248617392-1248617397.dag") - self.assertEqual(os.path.exists(f"{self.cwd}/tests/tmp/s000000xx/C01_offline/Prod1/lalinference_1248617392-1248617397.dag"), 1) + print(f"{self.cwd}/tests/tmp/s000000xx/analyses/Prod1/lalinference_1248617392-1248617397.dag") + self.assertEqual(os.path.exists(f"{self.cwd}/tests/tmp/s000000xx/analyses/Prod1/lalinference_1248617392-1248617397.dag"), 1) diff --git a/tests/test_pipelines/test_bilby.py b/tests/test_pipelines/test_bilby.py index ae300f00..58f84e06 100644 --- a/tests/test_pipelines/test_bilby.py +++ b/tests/test_pipelines/test_bilby.py @@ -17,6 +17,7 @@ from asimov.ledger import YAMLLedger import io import contextlib +from tests.blueprints import DEFAULTS_PE, DEFAULTS_PE_PRIORS, EVENTS as BLUEPRINT_EVENTS, PIPELINES class BilbyTests(unittest.TestCase): """Test bilby interface""" @@ -42,12 +43,12 @@ def tearDown(self): @unittest.skip("I need to get this to work properly.") def test_build_cli(self): """Check that a bilby config file can be built.""" - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe-priors.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE_PRIORS, event=None, ledger=self.ledger) event = "GW150914_095045" pipeline = "bilby" - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{event}.yaml", event=None, ledger=self.ledger) - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{pipeline}.yaml", event=event, ledger=self.ledger) + apply_page(file=BLUEPRINT_EVENTS[event], event=None, ledger=self.ledger) + apply_page(file=PIPELINES[pipeline], event=event, ledger=self.ledger) runner = CliRunner() result = runner.invoke(manage.build, "--dryrun") @@ -55,12 +56,12 @@ def test_build_cli(self): def test_build_api(self): """Check that a bilby config file can be built.""" - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe-priors.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE_PRIORS, event=None, ledger=self.ledger) event = "GW150914_095045" pipeline = "bilby" - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{event}.yaml", event=None, ledger=self.ledger) - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{pipeline}.yaml", event=event, ledger=self.ledger) + apply_page(file=BLUEPRINT_EVENTS[event], event=None, ledger=self.ledger) + apply_page(file=PIPELINES[pipeline], event=event, ledger=self.ledger) f = io.StringIO() with contextlib.redirect_stdout(f): diff --git a/tests/test_pipelines/test_lalinference.py b/tests/test_pipelines/test_lalinference.py index b7a156f3..81a35647 100644 --- a/tests/test_pipelines/test_lalinference.py +++ b/tests/test_pipelines/test_lalinference.py @@ -31,7 +31,7 @@ def setUpClass(cls): cls.cwd = os.getcwd() repo = git.Repo.init(cls.cwd+"/tests/test_data/s000000xx/") os.chdir(cls.cwd+"/tests/test_data/s000000xx/") - os.system("git add C01_offline/Prod0_test.ini C01_offline/s000000xx_gpsTime.txt") + os.system("git add analyses/Prod0_test.ini analyses/s000000xx_gpsTime.txt") os.system("git commit -m 'test'") os.chdir(cls.cwd) diff --git a/tests/test_pipelines/test_testing_pipelines.py b/tests/test_pipelines/test_testing_pipelines.py new file mode 100644 index 00000000..814d5a7a --- /dev/null +++ b/tests/test_pipelines/test_testing_pipelines.py @@ -0,0 +1,357 @@ +"""Tests for the testing pipelines.""" + +import unittest +import shutil +import os +import tempfile +from pathlib import Path +from unittest.mock import patch, MagicMock + +from asimov.pipelines.testing import ( + SimpleTestPipeline, + SubjectTestPipeline, + ProjectTestPipeline +) +from asimov.analysis import SimpleAnalysis, SubjectAnalysis, ProjectAnalysis + +from asimov.ledger import YAMLLedger + +from asimov.cli.application import apply_page + +from click.testing import CliRunner +from asimov.cli import project + + +class TestingPipelineTests(unittest.TestCase): + """Test the testing pipelines.""" + + @classmethod + def setUpClass(cls): + cls.cwd = os.getcwd() + + def setUp(self): + """Set up test environment.""" + self.test_dir = tempfile.mkdtemp() + os.chdir(self.test_dir) + + runner = CliRunner() + result = runner.invoke( + project.init, + ['Test Project', '--root', self.test_dir] + ) + self.assertEqual(result.exit_code, 0) + self.ledger = YAMLLedger(f"{self.test_dir}/.asimov/ledger.yml") + + def tearDown(self): + """Clean up test environment.""" + os.chdir(self.cwd) + shutil.rmtree(self.test_dir, ignore_errors=True) + + def test_simple_pipeline_instantiation(self): + """Test that SimpleTestPipeline can be instantiated.""" + # Load test data + apply_page( + file=f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger + ) + apply_page( + file=f"{self.cwd}/tests/test_data/testing_events.yaml", + ledger=self.ledger + ) + + # Create a test event and analysis + event = self.ledger.get_event("GW150914_095045")[0] + + # Create a simple analysis with the test pipeline + analysis = SimpleAnalysis( + subject=event, + name="test-simple", + pipeline="simpletestpipeline", + status="ready", + ledger=self.ledger + ) + + # Check the pipeline was created correctly + self.assertIsInstance(analysis.pipeline, SimpleTestPipeline) + self.assertEqual(analysis.pipeline.name, "SimpleTestPipeline") + + @patch('subprocess.run') + def test_simple_pipeline_submit(self, mock_run): + """Test that SimpleTestPipeline can submit a job.""" + # Mock condor_submit_dag response + mock_result = MagicMock() + mock_result.stdout = "1 job(s) submitted to cluster 12345." + mock_result.returncode = 0 + mock_run.return_value = mock_result + + apply_page( + file=f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger + ) + apply_page( + file=f"{self.cwd}/tests/test_data/testing_events.yaml", + ledger=self.ledger + ) + + event = self.ledger.get_event("GW150914_095045")[0] + + analysis = SimpleAnalysis( + subject=event, + name="test-simple", + pipeline="simpletestpipeline", + status="ready", + ledger=self.ledger, + rundir=os.path.join(self.test_dir, "simple_run") + ) + + # Submit the job + job_id = analysis.pipeline.submit_dag(dryrun=False) + + # Check job was submitted + self.assertEqual(job_id, 12345) + self.assertTrue(os.path.exists(analysis.rundir)) + self.assertTrue( + os.path.exists(os.path.join(analysis.rundir, "test_job.sh")) + ) + + def test_simple_pipeline_completion(self): + """Test that SimpleTestPipeline can detect completion.""" + apply_page( + file=f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger + ) + apply_page( + file=f"{self.cwd}/tests/test_data/testing_events.yaml", + ledger=self.ledger + ) + + event = self.ledger.get_event("GW150914_095045")[0] + + analysis = SimpleAnalysis( + subject=event, + name="test-simple", + pipeline="simpletestpipeline", + status="ready", + ledger=self.ledger, + rundir=os.path.join(self.test_dir, "simple_run") + ) + + # Initially not complete + self.assertFalse(analysis.pipeline.detect_completion()) + + # Create results file + Path(analysis.rundir).mkdir(parents=True, exist_ok=True) + with open(os.path.join(analysis.rundir, "results.dat"), "w") as f: + f.write("test results\n") + + # Now should be complete + self.assertTrue(analysis.pipeline.detect_completion()) + + def test_simple_pipeline_samples(self): + """Test that SimpleTestPipeline can generate sample files.""" + apply_page( + file=f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger + ) + apply_page( + file=f"{self.cwd}/tests/test_data/testing_events.yaml", + ledger=self.ledger + ) + + event = self.ledger.get_event("GW150914_095045")[0] + + analysis = SimpleAnalysis( + subject=event, + name="test-simple", + pipeline="simpletestpipeline", + status="ready", + ledger=self.ledger, + rundir=os.path.join(self.test_dir, "simple_run") + ) + + # Get samples (should create file) + samples = analysis.pipeline.samples(absolute=True) + + self.assertEqual(len(samples), 1) + self.assertTrue(os.path.exists(samples[0])) + self.assertTrue("posterior_samples.dat" in samples[0]) + + def test_pipeline_names(self): + """Test that all testing pipelines have the correct names.""" + apply_page( + file=f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger + ) + apply_page( + file=f"{self.cwd}/tests/test_data/testing_events.yaml", + ledger=self.ledger + ) + + event = self.ledger.get_event("GW150914_095045")[0] + + # Test SimpleTestPipeline + simple = SimpleAnalysis( + subject=event, + name="test-simple", + pipeline="simpletestpipeline", + status="ready", + ledger=self.ledger + ) + self.assertEqual(simple.pipeline.name, "SimpleTestPipeline") + + # Test SubjectTestPipeline + subject = SubjectAnalysis( + subject=event, + name="test-subject", + pipeline="subjecttestpipeline", + status="ready" + ) + self.assertEqual(subject.pipeline.name, "SubjectTestPipeline") + + # Test ProjectTestPipeline + project_analysis = ProjectAnalysis( + name="test-project", + pipeline="projecttestpipeline", + status="ready", + subjects=["GW150914_095045"], + ledger=self.ledger + ) + self.assertEqual(project_analysis.pipeline.name, "ProjectTestPipeline") + + +class SubjectPipelineTests(unittest.TestCase): + """Test the SubjectTestPipeline specifically.""" + + @classmethod + def setUpClass(cls): + cls.cwd = os.getcwd() + + def setUp(self): + """Set up test environment.""" + self.test_dir = tempfile.mkdtemp() + os.chdir(self.test_dir) + + runner = CliRunner() + result = runner.invoke( + project.init, + ['Test Project', '--root', self.test_dir] + ) + self.assertEqual(result.exit_code, 0) + self.ledger = YAMLLedger(f"{self.test_dir}/.asimov/ledger.yml") + + def tearDown(self): + """Clean up test environment.""" + os.chdir(self.cwd) + shutil.rmtree(self.test_dir, ignore_errors=True) + + @patch('subprocess.run') + def test_subject_pipeline_submit(self, mock_run): + """Test that SubjectTestPipeline can submit a job.""" + # Mock condor_submit_dag response + mock_result = MagicMock() + mock_result.stdout = "1 job(s) submitted to cluster 23456." + mock_result.returncode = 0 + mock_run.return_value = mock_result + + apply_page( + file=f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger + ) + apply_page( + file=f"{self.cwd}/tests/test_data/testing_events.yaml", + ledger=self.ledger + ) + + event = self.ledger.get_event("GW150914_095045")[0] + + analysis = SubjectAnalysis( + subject=event, + name="test-subject", + pipeline="subjecttestpipeline", + status="ready", + rundir=os.path.join(self.test_dir, "subject_run") + ) + + # Submit the job + job_id = analysis.pipeline.submit_dag(dryrun=False) + + # Check job was submitted + self.assertEqual(job_id, 23456) + self.assertTrue(os.path.exists(analysis.rundir)) + self.assertTrue( + os.path.exists(os.path.join(analysis.rundir, "test_subject_job.sh")) + ) + + +class ProjectPipelineTests(unittest.TestCase): + """Test the ProjectTestPipeline specifically.""" + + @classmethod + def setUpClass(cls): + cls.cwd = os.getcwd() + + def setUp(self): + """Set up test environment.""" + self.test_dir = tempfile.mkdtemp() + os.chdir(self.test_dir) + + runner = CliRunner() + result = runner.invoke( + project.init, + ['Test Project', '--root', self.test_dir] + ) + self.assertEqual(result.exit_code, 0) + self.ledger = YAMLLedger(f"{self.test_dir}/.asimov/ledger.yml") + + def tearDown(self): + """Clean up test environment.""" + os.chdir(self.cwd) + shutil.rmtree(self.test_dir, ignore_errors=True) + + @patch('subprocess.run') + def test_project_pipeline_submit(self, mock_run): + """Test that ProjectTestPipeline can submit a job.""" + # Mock condor_submit_dag response + mock_result = MagicMock() + mock_result.stdout = "1 job(s) submitted to cluster 34567." + mock_result.returncode = 0 + mock_run.return_value = mock_result + + apply_page( + file=f"{self.cwd}/tests/test_data/testing_pe.yaml", + event=None, + ledger=self.ledger + ) + apply_page( + file=f"{self.cwd}/tests/test_data/testing_events.yaml", + ledger=self.ledger + ) + + analysis = ProjectAnalysis( + name="test-project", + pipeline="projecttestpipeline", + status="ready", + subjects=["GW150914_095045"], + ledger=self.ledger, + working_directory=os.path.join(self.test_dir, "project_run") + ) + + # Submit the job + job_id = analysis.pipeline.submit_dag(dryrun=False) + + # Check job was submitted + self.assertEqual(job_id, 34567) + self.assertTrue(os.path.exists(analysis.rundir)) + self.assertTrue( + os.path.exists(os.path.join(analysis.rundir, "test_project_job.sh")) + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_priors.py b/tests/test_priors.py new file mode 100644 index 00000000..2feb7957 --- /dev/null +++ b/tests/test_priors.py @@ -0,0 +1,278 @@ +""" +Test suites for the prior specification and interface system. +""" + +import unittest +from asimov.priors import ( + PriorSpecification, + PriorDict, + PriorInterface, + Reparameterization, +) +# Import BilbyPriorInterface from its new location +try: + from asimov.pipelines.bilby import BilbyPriorInterface +except ImportError: + # For testing without full asimov environment + BilbyPriorInterface = None + + +class TestPriorSpecification(unittest.TestCase): + """Test the PriorSpecification model.""" + + def test_basic_prior(self): + """Test creating a basic prior specification.""" + prior = PriorSpecification(minimum=10, maximum=1000) + self.assertEqual(prior.minimum, 10) + self.assertEqual(prior.maximum, 1000) + self.assertIsNone(prior.type) + + def test_prior_with_type(self): + """Test creating a prior with a type.""" + prior = PriorSpecification( + minimum=0, + maximum=1, + type="Uniform", + boundary="periodic" + ) + self.assertEqual(prior.type, "Uniform") + self.assertEqual(prior.boundary, "periodic") + + def test_powerlaw_prior(self): + """Test creating a PowerLaw prior.""" + prior = PriorSpecification( + minimum=10, + maximum=1000, + type="PowerLaw", + alpha=2 + ) + self.assertEqual(prior.alpha, 2) + + def test_extra_fields_allowed(self): + """Test that extra fields are allowed.""" + prior = PriorSpecification( + minimum=10, + maximum=1000, + custom_field="custom_value" + ) + # Extra fields should be stored + self.assertTrue(hasattr(prior, '__pydantic_extra__')) + + +class TestPriorDict(unittest.TestCase): + """Test the PriorDict model.""" + + def test_empty_prior_dict(self): + """Test creating an empty prior dictionary.""" + priors = PriorDict() + self.assertIsNone(priors.default) + + def test_prior_dict_with_default(self): + """Test creating a prior dictionary with a default.""" + priors = PriorDict(default="BBHPriorDict") + self.assertEqual(priors.default, "BBHPriorDict") + + def test_prior_dict_from_dict(self): + """Test creating a PriorDict from a plain dictionary.""" + data = { + "default": "BBHPriorDict", + "luminosity distance": { + "minimum": 10, + "maximum": 1000, + "type": "PowerLaw", + "alpha": 2 + }, + "mass ratio": { + "minimum": 0.1, + "maximum": 1.0 + } + } + priors = PriorDict.from_dict(data) + self.assertEqual(priors.default, "BBHPriorDict") + + # Get a prior + lum_dist = priors.get_prior("luminosity distance") + self.assertIsNotNone(lum_dist) + self.assertEqual(lum_dist.minimum, 10) + self.assertEqual(lum_dist.maximum, 1000) + + def test_prior_dict_to_dict(self): + """Test converting a PriorDict back to a plain dictionary.""" + data = { + "default": "BBHPriorDict", + "mass ratio": { + "minimum": 0.1, + "maximum": 1.0 + } + } + priors = PriorDict.from_dict(data) + result = priors.to_dict() + + self.assertEqual(result["default"], "BBHPriorDict") + self.assertIn("mass ratio", result) + self.assertEqual(result["mass ratio"]["minimum"], 0.1) + + def test_get_nonexistent_prior(self): + """Test getting a prior that doesn't exist.""" + priors = PriorDict(default="BBHPriorDict") + result = priors.get_prior("nonexistent") + self.assertIsNone(result) + + +class TestReparameterization(unittest.TestCase): + """Test the Reparameterization model.""" + + def test_basic_reparameterization(self): + """Test creating a basic reparameterization.""" + reparam = Reparameterization( + from_parameters=["mass_1", "mass_2"], + to_parameters=["chirp_mass", "mass_ratio"] + ) + self.assertEqual(reparam.from_parameters, ["mass_1", "mass_2"]) + self.assertEqual(reparam.to_parameters, ["chirp_mass", "mass_ratio"]) + + def test_reparameterization_with_transform(self): + """Test creating a reparameterization with a transform.""" + reparam = Reparameterization( + from_parameters=["mass_1", "mass_2"], + to_parameters=["chirp_mass", "mass_ratio"], + transform="mass_to_chirp_mass_ratio" + ) + self.assertEqual(reparam.transform, "mass_to_chirp_mass_ratio") + + +class TestPriorInterface(unittest.TestCase): + """Test the PriorInterface base class.""" + + def test_interface_with_none(self): + """Test creating an interface with no priors.""" + interface = PriorInterface(None) + self.assertIsNone(interface.prior_dict) + + def test_interface_with_dict(self): + """Test creating an interface with a dictionary.""" + data = { + "default": "BBHPriorDict", + "mass ratio": { + "minimum": 0.1, + "maximum": 1.0 + } + } + interface = PriorInterface(data) + self.assertIsInstance(interface.prior_dict, PriorDict) + self.assertEqual(interface.prior_dict.default, "BBHPriorDict") + + def test_interface_with_prior_dict(self): + """Test creating an interface with a PriorDict.""" + priors = PriorDict(default="BBHPriorDict") + interface = PriorInterface(priors) + self.assertIs(interface.prior_dict, priors) + + def test_interface_validate(self): + """Test the validate method.""" + interface = PriorInterface(None) + self.assertTrue(interface.validate()) + + def test_interface_convert_not_implemented(self): + """Test that convert raises NotImplementedError.""" + interface = PriorInterface(None) + with self.assertRaises(NotImplementedError): + interface.convert() + + +class TestBilbyPriorInterface(unittest.TestCase): + """Test the BilbyPriorInterface.""" + + def setUp(self): + """Skip tests if BilbyPriorInterface is not available.""" + if BilbyPriorInterface is None: + self.skipTest("BilbyPriorInterface not available") + + def test_bilby_interface_with_none(self): + """Test bilby interface with no priors.""" + interface = BilbyPriorInterface(None) + result = interface.convert() + self.assertEqual(result, {}) + + def test_bilby_interface_convert(self): + """Test bilby interface conversion.""" + data = { + "default": "BBHPriorDict", + "luminosity distance": { + "minimum": 10, + "maximum": 1000, + "type": "PowerLaw", + "alpha": 2 + } + } + interface = BilbyPriorInterface(data) + result = interface.convert() + + self.assertEqual(result["default"], "BBHPriorDict") + self.assertIn("luminosity distance", result) + + def test_bilby_default_prior(self): + """Test getting the default prior.""" + interface = BilbyPriorInterface(None) + default = interface.get_default_prior() + self.assertEqual(default, "BBHPriorDict") + + interface = BilbyPriorInterface({"default": "BNSPriorDict"}) + default = interface.get_default_prior() + self.assertEqual(default, "BNSPriorDict") + + +class TestBackwardCompatibility(unittest.TestCase): + """Test that the new system is backward compatible with existing blueprints.""" + + def test_simple_prior_dict(self): + """Test a simple prior dictionary like in existing blueprints.""" + data = { + "luminosity distance": { + "minimum": 10, + "maximum": 10000 + }, + "mass ratio": { + "minimum": 0.05, + "maximum": 1.0 + } + } + priors = PriorDict.from_dict(data) + result = priors.to_dict() + + # Should preserve the structure + self.assertIn("luminosity distance", result) + self.assertEqual(result["luminosity distance"]["minimum"], 10) + + def test_complex_prior_dict(self): + """Test a complex prior dictionary from actual blueprints.""" + data = { + "amplitude order": 1, + "chirp mass": { + "maximum": 41.97447913941358, + "minimum": 21.418182160215295 + }, + "luminosity distance": { + "maximum": 10000, + "minimum": 10 + }, + "mass 1": { + "maximum": 1000, + "minimum": 1 + }, + "mass ratio": { + "maximum": 1.0, + "minimum": 0.05 + } + } + # Should not raise an exception + priors = PriorDict.from_dict(data) + result = priors.to_dict() + + # Verify structure is preserved + self.assertIn("chirp mass", result) + self.assertEqual(result["chirp mass"]["minimum"], 21.418182160215295) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_project.py b/tests/test_project.py new file mode 100644 index 00000000..be310b53 --- /dev/null +++ b/tests/test_project.py @@ -0,0 +1,218 @@ +""" +Tests for the Project Python API. +""" + +import unittest +import os +import shutil +import tempfile +from asimov.project import Project +from asimov.event import Event + + +class TestProject(unittest.TestCase): + """Test the Project class.""" + + def setUp(self): + """Set up test fixtures.""" + self.test_dir = tempfile.mkdtemp() + self.project_name = "Test Project" + + def tearDown(self): + """Clean up test fixtures.""" + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_project_creation(self): + """Test that a project can be created programmatically.""" + project = Project(self.project_name, location=self.test_dir) + + # Verify that the project object has the expected attributes + self.assertEqual(project.name, self.project_name) + self.assertEqual(project.location, self.test_dir) + + # Check that the project directory was created + self.assertTrue(os.path.exists(self.test_dir)) + + # Check that the config file was created + config_path = os.path.join(self.test_dir, ".asimov", "asimov.conf") + self.assertTrue(os.path.exists(config_path)) + + # Check that the ledger was created + ledger_path = os.path.join(self.test_dir, ".asimov", "ledger.yml") + self.assertTrue(os.path.exists(ledger_path)) + + # Check that subdirectories were created + self.assertTrue(os.path.exists(os.path.join(self.test_dir, "working"))) + self.assertTrue(os.path.exists(os.path.join(self.test_dir, "checkouts"))) + self.assertTrue(os.path.exists(os.path.join(self.test_dir, "results"))) + self.assertTrue(os.path.exists(os.path.join(self.test_dir, "logs"))) + + def test_project_load(self): + """Test that an existing project can be loaded.""" + # First create a project + Project(self.project_name, location=self.test_dir) + + # Now load it + project2 = Project.load(self.test_dir) + + # Check that the loaded project has the same name + self.assertEqual(project2.name, self.project_name) + self.assertEqual(project2.location, self.test_dir) + + def test_project_load_nonexistent(self): + """Test that loading a nonexistent project raises an error.""" + nonexistent_dir = os.path.join(self.test_dir, "nonexistent") + + with self.assertRaises(FileNotFoundError): + Project.load(nonexistent_dir) + + def test_project_context_manager(self): + """Test that the project works as a context manager.""" + project = Project(self.project_name, location=self.test_dir) + + # Should be able to use as a context manager + with project: + # The ledger should be accessible + self.assertIsNotNone(project.ledger) + + def test_add_subject(self): + """Test adding a subject to the project.""" + project = Project(self.project_name, location=self.test_dir) + + with project: + # Add a subject + subject = project.add_subject(name="GW150914") + + # Check that the subject was created + self.assertIsInstance(subject, Event) + self.assertEqual(subject.name, "GW150914") + + # After exiting the context, the subject should be in the ledger + events = project.get_event() + self.assertEqual(len(events), 1) + self.assertEqual(events[0].name, "GW150914") + + def test_add_subject_outside_context(self): + """Test that adding a subject outside a context manager raises an error.""" + project = Project(self.project_name, location=self.test_dir) + + # Should raise an error when not in a context + with self.assertRaises(RuntimeError): + project.add_subject(name="GW150914") + + def test_add_event_alias(self): + """Test that add_event is an alias for add_subject.""" + project = Project(self.project_name, location=self.test_dir) + + with project: + # Add an event + event = project.add_event(name="GW150914") + + # Check that the event was created + self.assertIsInstance(event, Event) + self.assertEqual(event.name, "GW150914") + + def test_project_repr(self): + """Test the string representation of a project.""" + project = Project(self.project_name, location=self.test_dir) + + repr_str = repr(project) + self.assertIn(self.project_name, repr_str) + self.assertIn(self.test_dir, repr_str) + + def test_add_multiple_subjects(self): + """Test adding multiple subjects to a project.""" + project = Project(self.project_name, location=self.test_dir) + + with project: + subject1 = project.add_subject(name="GW150914") + subject2 = project.add_subject(name="GW151226") + + # Check that the returned subjects are correct + self.assertIsInstance(subject1, Event) + self.assertIsInstance(subject2, Event) + self.assertEqual(subject1.name, "GW150914") + self.assertEqual(subject2.name, "GW151226") + + # Check that both subjects are in the ledger + events = project.get_event() + self.assertEqual(len(events), 2) + event_names = {event.name for event in events} + self.assertEqual(event_names, {"GW150914", "GW151226"}) + + def test_add_analysis_to_subject(self): + """Test adding an analysis to a subject within a project context.""" + project = Project(self.project_name, location=self.test_dir) + + with project: + subject = project.add_subject(name="GW150914") + # Add a production/analysis to the subject + from asimov.analysis import GravitationalWaveTransient + production = GravitationalWaveTransient( + subject=subject, + name="prod_bilby", + pipeline="bilby", + status="ready", + ledger=project.ledger + ) + subject.add_production(production) + project.ledger.update_event(subject) + + # Reload and check that the production was saved + events = project.get_event() + self.assertEqual(len(events), 1) + self.assertEqual(len(events[0].productions), 1) + self.assertEqual(events[0].productions[0].name, "prod_bilby") + + def test_project_creation_on_existing_fails(self): + """Test that creating a project on an existing project directory raises an error.""" + # First create a project + Project(self.project_name, location=self.test_dir) + + # Try to create another project in the same location + with self.assertRaises(RuntimeError) as context: + Project("Another Project", location=self.test_dir) + + self.assertIn("already contains an asimov project", str(context.exception)) + + def test_load_with_malformed_config(self): + """Test that loading a project with incomplete config raises a clear error.""" + # Create a directory with a malformed config + os.makedirs(os.path.join(self.test_dir, ".asimov")) + + # Create a config file with missing sections + import configparser + config = configparser.ConfigParser() + config.add_section("project") + config.set("project", "name", "Test") + # Missing other required sections + + config_path = os.path.join(self.test_dir, ".asimov", "asimov.conf") + with open(config_path, "w") as f: + config.write(f) + + # Try to load the project + with self.assertRaises(ValueError) as context: + Project.load(self.test_dir) + + self.assertIn("incomplete or malformed", str(context.exception)) + + def test_context_manager_exception_handling(self): + """Test that ledger is not saved when an exception occurs in context.""" + project = Project(self.project_name, location=self.test_dir) + + # Try to add a subject but raise an exception + with self.assertRaises(ValueError): + with project: + project.add_subject(name="GW150914") + # Raise an exception before exiting context + raise ValueError("Test exception") + + # Verify that the subject was not saved + events = project.get_event() + self.assertEqual(len(events), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_review.py b/tests/test_review.py index dfc24d70..ce7e8409 100644 --- a/tests/test_review.py +++ b/tests/test_review.py @@ -18,6 +18,7 @@ from asimov.cli.project import make_project from asimov.cli.application import apply_page from asimov.ledger import YAMLLedger +from tests.blueprints import DEFAULTS_PE, DEFAULTS_PE_PRIORS, GWTC21_EVENTS, EVENTS as BLUEPRINT_EVENTS, PIPELINES EVENTS = ["GW150914_095045", "GW190924_021846", "GW190929_012149", "GW191109_010717"] pipelines = {"bayeswave"} @@ -79,8 +80,8 @@ def setUp(self): os.chdir(f"{self.cwd}/tests/tmp/project") make_project(name="Test project", root=f"{self.cwd}/tests/tmp/project") self.ledger = YAMLLedger(f".asimov/ledger.yml") - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/events/gwtc-2-1/GW150914_095045.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=GWTC21_EVENTS["GW150914_095045"], event=None, ledger=self.ledger) self.event = asimov.event.Event.from_yaml(TEST_YAML.format(self.cwd), ledger=self.ledger) @@ -139,12 +140,12 @@ def setUp(self): f = io.StringIO() with contextlib.redirect_stdout(f): - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe-priors.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE_PRIORS, event=None, ledger=self.ledger) for event in EVENTS: for pipeline in pipelines: - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{event}.yaml", event=None, ledger=self.ledger) - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{pipeline}.yaml", event=event, ledger=self.ledger) + apply_page(file=BLUEPRINT_EVENTS[event], event=None, ledger=self.ledger) + apply_page(file=PIPELINES[pipeline], event=event, ledger=self.ledger) # def test_show_review_no_review(self): # """Check that the CLI can show a review report with no reviews""" diff --git a/tests/test_specific_events.py b/tests/test_specific_events.py index b37c2463..efe161da 100644 --- a/tests/test_specific_events.py +++ b/tests/test_specific_events.py @@ -1,43 +1,83 @@ """ -These tests are designed to verify that specific tests produce specific +These tests are designed to verify that specific events produce specific outputs for each pipeline. + +There are two test classes: +- TestGravitationalWaveEventsLocal: Uses local copies of the blueprints (must pass). +- TestGravitationalWaveEventsExternal: Uses blueprints fetched from the external + asimov-data repository (allowed to fail in CI as external data may lag behind + code changes). This class is run as a separate CI job from + ``external_blueprint_compat.py``. """ import os -import unittest -import shutil -import git -import asimov.event -from asimov.cli.project import make_project + from asimov.cli.application import apply_page -from asimov.ledger import YAMLLedger +from asimov.testing import AsimovTestCase pipelines = {"bayeswave", "bilby", "rift"} EVENTS = {"GW150914_095045", "GW190924_021846", "GW190929_012149", "GW191109_010717"} -class TestGravitationalWaveEvents(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.cwd = os.getcwd() - git.Repo.init(cls.cwd+"/tests/test_data/s000000xx/") - - def setUp(self): - os.makedirs(f"{self.cwd}/tests/tmp/project") - os.chdir(f"{self.cwd}/tests/tmp/project") - make_project(name="Test project", root=f"{self.cwd}/tests/tmp/project") - self.ledger = YAMLLedger(f".asimov/ledger.yml") - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - - def tearDown(self): - os.chdir(self.cwd) - shutil.rmtree(f"{self.cwd}/tests/tmp/") +EXTERNAL_DEFAULTS_URL = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml" +EXTERNAL_TESTS_BASE_URL = "https://git.ligo.org/asimov/data/-/raw/main/tests" + +# Absolute path to the local blueprint data directory, computed from this file's +# location so that the tests are not sensitive to the current working directory. +_BLUEPRINTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_data", "blueprints") + + +class _GravitationalWaveEventsBase: + """Base class (mixin) providing the fiducial event test logic. + + Not a TestCase itself - used as a mixin with AsimovTestCase in the + concrete test classes to prevent auto-discovery running it directly. + The setUp/tearDown/setUpClass lifecycle is inherited from AsimovTestCase. + """ + + def _apply_defaults(self): + raise NotImplementedError + + def _get_event_blueprint(self, event): + raise NotImplementedError + + def _get_pipeline_blueprint(self, pipeline): + raise NotImplementedError def test_fiducial_events(self): + self._apply_defaults() for event in EVENTS: for pipeline in pipelines: with self.subTest(event=event, pipeline=pipeline): - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{event}.yaml", event=None, ledger=self.ledger) - apply_page(file = f"https://git.ligo.org/asimov/data/-/raw/main/tests/{pipeline}.yaml", event=event, ledger=self.ledger) - + apply_page( + file=self._get_event_blueprint(event), + event=None, + ledger=self.ledger, + ) + apply_page( + file=self._get_pipeline_blueprint(pipeline), + event=event, + ledger=self.ledger, + ) event_o = self.ledger.get_event(event)[0] production = event_o.productions[0] production.make_config(f"{self.cwd}/tests/tmp/test_config.ini") + + +class TestGravitationalWaveEventsLocal(_GravitationalWaveEventsBase, AsimovTestCase): + """ + Tests using local copies of blueprints updated to conform to the v0.7 + requirement that minimum frequency lives in the 'waveform' section. + These tests must always pass. + """ + + def _apply_defaults(self): + apply_page( + file=os.path.join(_BLUEPRINTS_DIR, "production-pe.yaml"), + event=None, + ledger=self.ledger, + ) + + def _get_event_blueprint(self, event): + return os.path.join(_BLUEPRINTS_DIR, f"{event}.yaml") + + def _get_pipeline_blueprint(self, pipeline): + return os.path.join(_BLUEPRINTS_DIR, f"{pipeline}.yaml") diff --git a/tests/test_strategies.py b/tests/test_strategies.py new file mode 100644 index 00000000..9b7a7ced --- /dev/null +++ b/tests/test_strategies.py @@ -0,0 +1,271 @@ +""" +Tests for strategy expansion functionality. +""" + +import unittest +from copy import deepcopy + +from asimov.strategies import expand_strategy, set_nested_value + + +class TestSetNestedValue(unittest.TestCase): + """Tests for the set_nested_value helper function.""" + + def test_simple_path(self): + """Test setting a simple (non-nested) value.""" + d = {} + set_nested_value(d, "key", "value") + self.assertEqual(d, {"key": "value"}) + + def test_nested_path(self): + """Test setting a nested value.""" + d = {} + set_nested_value(d, "waveform.approximant", "IMRPhenomXPHM") + self.assertEqual(d, {"waveform": {"approximant": "IMRPhenomXPHM"}}) + + def test_deeply_nested_path(self): + """Test setting a deeply nested value.""" + d = {} + set_nested_value(d, "a.b.c.d", "value") + self.assertEqual(d, {"a": {"b": {"c": {"d": "value"}}}}) + + def test_existing_structure(self): + """Test setting a value in an existing structure.""" + d = {"waveform": {"other": "data"}} + set_nested_value(d, "waveform.approximant", "IMRPhenomXPHM") + self.assertEqual(d, {"waveform": {"other": "data", "approximant": "IMRPhenomXPHM"}}) + + +class TestExpandStrategy(unittest.TestCase): + """Tests for the expand_strategy function.""" + + def test_no_strategy(self): + """Test that blueprints without a strategy are returned unchanged.""" + blueprint = { + "kind": "analysis", + "name": "test-analysis", + "pipeline": "bilby" + } + result = expand_strategy(blueprint) + self.assertEqual(len(result), 1) + self.assertEqual(result[0], blueprint) + + def test_single_parameter_strategy(self): + """Test strategy with a single parameter.""" + blueprint = { + "kind": "analysis", + "name": "bilby-{waveform.approximant}", + "pipeline": "bilby", + "strategy": { + "waveform.approximant": ["IMRPhenomXPHM", "SEOBNRv4PHM"] + } + } + result = expand_strategy(deepcopy(blueprint)) + + # Should create 2 analyses + self.assertEqual(len(result), 2) + + # Check first analysis + self.assertEqual(result[0]["name"], "bilby-IMRPhenomXPHM") + self.assertEqual(result[0]["pipeline"], "bilby") + self.assertEqual(result[0]["waveform"]["approximant"], "IMRPhenomXPHM") + self.assertNotIn("strategy", result[0]) + + # Check second analysis + self.assertEqual(result[1]["name"], "bilby-SEOBNRv4PHM") + self.assertEqual(result[1]["pipeline"], "bilby") + self.assertEqual(result[1]["waveform"]["approximant"], "SEOBNRv4PHM") + self.assertNotIn("strategy", result[1]) + + def test_multi_parameter_strategy_matrix(self): + """Test strategy with multiple parameters (matrix/cross-product).""" + blueprint = { + "kind": "analysis", + "name": "bilby-{waveform.approximant}-{sampler.sampler}", + "pipeline": "bilby", + "strategy": { + "waveform.approximant": ["IMRPhenomXPHM", "SEOBNRv4PHM"], + "sampler.sampler": ["dynesty", "emcee"] + } + } + result = expand_strategy(deepcopy(blueprint)) + + # Should create 4 analyses (2 x 2) + self.assertEqual(len(result), 4) + + # Check that all combinations are created + combinations = [ + ("IMRPhenomXPHM", "dynesty"), + ("IMRPhenomXPHM", "emcee"), + ("SEOBNRv4PHM", "dynesty"), + ("SEOBNRv4PHM", "emcee") + ] + + for i, (waveform, sampler) in enumerate(combinations): + self.assertEqual(result[i]["waveform"]["approximant"], waveform) + self.assertEqual(result[i]["sampler"]["sampler"], sampler) + self.assertEqual(result[i]["name"], f"bilby-{waveform}-{sampler}") + + def test_strategy_with_numeric_values(self): + """Test strategy with numeric parameter values.""" + blueprint = { + "kind": "analysis", + "name": "bilby-fref-{waveform.frequency}", + "pipeline": "bilby", + "strategy": { + "waveform.frequency": [20, 50, 100] + } + } + result = expand_strategy(deepcopy(blueprint)) + + self.assertEqual(len(result), 3) + self.assertEqual(result[0]["waveform"]["frequency"], 20) + self.assertEqual(result[1]["waveform"]["frequency"], 50) + self.assertEqual(result[2]["waveform"]["frequency"], 100) + + def test_strategy_preserves_other_fields(self): + """Test that strategy expansion preserves other blueprint fields.""" + blueprint = { + "kind": "analysis", + "name": "bilby-{waveform.approximant}", + "pipeline": "bilby", + "event": "GW150914", + "comment": "Test analysis", + "needs": ["generate-psd"], + "likelihood": { + "sample rate": 4096 + }, + "strategy": { + "waveform.approximant": ["IMRPhenomXPHM", "SEOBNRv4PHM"] + } + } + result = expand_strategy(deepcopy(blueprint)) + + self.assertEqual(len(result), 2) + + for analysis in result: + self.assertEqual(analysis["pipeline"], "bilby") + self.assertEqual(analysis["event"], "GW150914") + self.assertEqual(analysis["comment"], "Test analysis") + self.assertEqual(analysis["needs"], ["generate-psd"]) + self.assertEqual(analysis["likelihood"]["sample rate"], 4096) + + def test_name_without_template(self): + """Test that names without templates work correctly.""" + blueprint = { + "kind": "analysis", + "name": "bilby-analysis", + "pipeline": "bilby", + "strategy": { + "waveform.approximant": ["IMRPhenomXPHM", "SEOBNRv4PHM"] + } + } + result = expand_strategy(deepcopy(blueprint)) + + # Both should have the same name (this might create a conflict, + # but we let the user handle that) + self.assertEqual(len(result), 2) + self.assertEqual(result[0]["name"], "bilby-analysis") + self.assertEqual(result[1]["name"], "bilby-analysis") + + def test_strategy_with_single_value(self): + """Test strategy with only one value (edge case).""" + blueprint = { + "kind": "analysis", + "name": "bilby-{waveform.approximant}", + "pipeline": "bilby", + "strategy": { + "waveform.approximant": ["IMRPhenomXPHM"] + } + } + result = expand_strategy(deepcopy(blueprint)) + + self.assertEqual(len(result), 1) + self.assertEqual(result[0]["name"], "bilby-IMRPhenomXPHM") + self.assertEqual(result[0]["waveform"]["approximant"], "IMRPhenomXPHM") + + def test_complex_nested_values(self): + """Test strategy with complex nested parameter paths.""" + blueprint = { + "kind": "analysis", + "name": "test", + "pipeline": "bilby", + "strategy": { + "likelihood.marginalisation.distance": [True, False] + } + } + result = expand_strategy(deepcopy(blueprint)) + + self.assertEqual(len(result), 2) + self.assertEqual(result[0]["likelihood"]["marginalisation"]["distance"], True) + self.assertEqual(result[1]["likelihood"]["marginalisation"]["distance"], False) + + def test_boolean_values_in_name(self): + """Test that boolean values are converted to lowercase in names.""" + blueprint = { + "kind": "analysis", + "name": "test-{likelihood.marginalisation.distance}", + "pipeline": "bilby", + "strategy": { + "likelihood.marginalisation.distance": [True, False] + } + } + result = expand_strategy(deepcopy(blueprint)) + + self.assertEqual(len(result), 2) + self.assertEqual(result[0]["name"], "test-true") + self.assertEqual(result[1]["name"], "test-false") + + def test_empty_strategy(self): + """Test that an empty strategy raises an error.""" + blueprint = { + "kind": "analysis", + "name": "test", + "pipeline": "bilby", + "strategy": {} + } + with self.assertRaises(ValueError) as context: + expand_strategy(deepcopy(blueprint)) + self.assertIn("empty", str(context.exception).lower()) + + def test_empty_parameter_list(self): + """Test that empty parameter lists raise an error.""" + blueprint = { + "kind": "analysis", + "name": "test", + "pipeline": "bilby", + "strategy": { + "waveform.approximant": [] + } + } + with self.assertRaises(ValueError) as context: + expand_strategy(deepcopy(blueprint)) + self.assertIn("empty", str(context.exception).lower()) + self.assertIn("waveform.approximant", str(context.exception)) + + def test_non_list_parameter_value(self): + """Test that non-list parameter values raise an error.""" + blueprint = { + "kind": "analysis", + "name": "test", + "pipeline": "bilby", + "strategy": { + "waveform.approximant": "IMRPhenomXPHM" # Should be a list + } + } + with self.assertRaises(TypeError) as context: + expand_strategy(deepcopy(blueprint)) + self.assertIn("must be a list", str(context.exception)) + self.assertIn("waveform.approximant", str(context.exception)) + + def test_set_nested_value_with_non_dict_intermediate(self): + """Test that set_nested_value raises error for non-dict intermediate values.""" + d = {"waveform": "some_string"} + with self.assertRaises(TypeError) as context: + set_nested_value(d, "waveform.approximant", "IMRPhenomXPHM") + self.assertIn("intermediate key", str(context.exception).lower()) + self.assertIn("waveform", str(context.exception)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_yaml.py b/tests/test_yaml.py index 2d73843b..447d85b9 100644 --- a/tests/test_yaml.py +++ b/tests/test_yaml.py @@ -8,6 +8,7 @@ from asimov.cli.project import make_project from asimov.cli.application import apply_page from asimov.ledger import YAMLLedger +from tests.blueprints import DEFAULTS_PE, GWTC21_EVENTS TEST_YAML = """ name: S000000xx @@ -74,8 +75,8 @@ def setUp(self): os.chdir(f"{self.cwd}/tests/tmp/project") make_project(name="Test project", root=f"{self.cwd}/tests/tmp/project") self.ledger = YAMLLedger(f".asimov/ledger.yml") - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/events/gwtc-2-1/GW150914_095045.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=GWTC21_EVENTS["GW150914_095045"], event=None, ledger=self.ledger) self.event = asimov.event.Event.from_yaml(TEST_YAML.format(self.cwd), ledger=self.ledger) @@ -109,8 +110,8 @@ def setUp(self): os.chdir(f"{self.cwd}/tests/tmp/project") make_project(name="Test project", root=f"{self.cwd}/tests/tmp/project") self.ledger = YAMLLedger(f".asimov/ledger.yml") - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/defaults/production-pe.yaml", event=None, ledger=self.ledger) - apply_page(file = "https://git.ligo.org/asimov/data/-/raw/main/events/gwtc-2-1/GW150914_095045.yaml", event=None, ledger=self.ledger) + apply_page(file=DEFAULTS_PE, event=None, ledger=self.ledger) + apply_page(file=GWTC21_EVENTS["GW150914_095045"], event=None, ledger=self.ledger) self.event = asimov.event.Event("S000000xx", ledger=self.ledger) def tearDown(self):