Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 15 additions & 17 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,41 +1,39 @@
megatron/core/ @NVIDIA/core-adlr @NVIDIA/core-nemo

megatron/core/models/gpt/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/gpt
megatron/core/models/gpt/ @NVIDIA/gpt

megatron/core/models/multimodal/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/multi-modal
megatron/core/models/multimodal/ @NVIDIA/multi-modal

megatron/core/models/mamba/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-mamba
megatron/core/ssm/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-mamba
megatron/core/models/mamba/ @NVIDIA/hybrid-mamba
megatron/core/ssm/ @NVIDIA/hybrid-mamba

megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets
megatron/core/datasets/ @NVIDIA/datasets

megatron/core/distributed/fsdp/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp
megatron/core/distributed/fsdp/ @NVIDIA/megatron-fsdp

megatron/core/transformer/fsdp_dtensor_checkpoint.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp
megatron/core/transformer/fsdp_dtensor_checkpoint.py @NVIDIA/megatron-fsdp

megatron/core/dist_checkpointing/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-checkpointing
megatron/core/dist_checkpointing/ @NVIDIA/dist-checkpointing

megatron/core/optimizer/distrib_optimizer/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-optimizer
megatron/core/optimizer/distrib_optimizer/ @NVIDIA/dist-optimizer

megatron/core/inference/modelopt_support @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/quantization-and-inference
megatron/core/inference/modelopt_support @NVIDIA/quantization-and-inference

megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets

megatron/core/pipeline_parallel/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/pipeline-parallelism
megatron/core/pipeline_parallel/ @NVIDIA/pipeline-parallelism

megatron/core/transformer/ @NVIDIA/core-adlr @NVIDIA/core-nemo

megatron/core/transformer/moe/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mixture-of-experts-adlr @NVIDIA/mixture-of-experts-devtech
megatron/core/transformer/moe/ @NVIDIA/mixture-of-experts-adlr @NVIDIA/mixture-of-experts-devtech

megatron/core/inference/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/inference
megatron/core/inference/ @NVIDIA/inference

megatron/core/parallel_state.py @NVIDIA/core-adlr @NVIDIA/core-nemo

megatron/core/post_training/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/post-training
megatron/core/post_training/ @NVIDIA/post-training

megatron/post_training/ @NVIDIA/post-training

megatron/core/transformer/cuda_graphs.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/cuda-graphs
megatron/core/transformer/cuda_graphs.py @NVIDIA/cuda-graphs

.gitlab/ @NVIDIA/ci
.github/ @NVIDIA/ci
Expand Down
97 changes: 97 additions & 0 deletions .github/workflows/multi-approval-bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,103 @@ jobs:
if: startsWith(github.ref, 'refs/heads/pull-request/')
uses: nv-gha-runners/get-pr-info@main

- name: Check for complexity:low label and core paths
id: check-conditions
env:
GH_TOKEN: ${{ secrets.PAT }}
PR_INFO: ${{ steps.get-pr-info.outputs.pr-info }}
PR_NUMBER: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
run: |
# Check for complexity:low label
if echo "$PR_INFO" | jq -e '.labels[]? | select(.name == "complexity: low")' > /dev/null 2>&1; then
echo "is_low_complexity=true" >> $GITHUB_OUTPUT
else
echo "is_low_complexity=false" >> $GITHUB_OUTPUT
fi

# Check if any changed files are in megatron/core or megatron/training
CHANGED_FILES=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/files --jq '.[].filename')
if echo "$CHANGED_FILES" | grep -qE '^megatron/(core|training)/'; then
echo "has_core_changes=true" >> $GITHUB_OUTPUT
echo "PR has changes in megatron/core or megatron/training"
else
echo "has_core_changes=false" >> $GITHUB_OUTPUT
echo "PR does not have changes in megatron/core or megatron/training"
fi

- name: Request reviews from required teams
if: steps.check-conditions.outputs.has_core_changes == 'true'
env:
GH_TOKEN: ${{ secrets.PAT }}
PR_NUMBER: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
IS_LOW_COMPLEXITY: ${{ steps.check-conditions.outputs.is_low_complexity }}
run: |
# Request core-nemo for core changes
gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/requested_reviewers \
--method POST -f team_reviewers[]="core-nemo" 2>/dev/null || true

# Request core-adlr unless complexity:low
if [ "$IS_LOW_COMPLEXITY" != "true" ]; then
gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/requested_reviewers \
--method POST -f team_reviewers[]="core-adlr" 2>/dev/null || true
fi

- name: Check core-nemo approval (required for core changes)
if: steps.check-conditions.outputs.has_core_changes == 'true'
env:
GH_TOKEN: ${{ secrets.PAT }}
PR_NUMBER: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
run: |
# Get team members
MEMBERS=$(gh api orgs/NVIDIA/teams/core-nemo/members --jq '.[].login' 2>/dev/null || echo "")
if [ -z "$MEMBERS" ]; then
echo "Warning: Could not fetch core-nemo team members"
exit 1
fi

# Get PR approvers
APPROVERS=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/reviews \
--jq '[.[] | select(.state == "APPROVED")] | [.[].user.login] | unique | .[]')

# Check if any team member approved
for member in $MEMBERS; do
if echo "$APPROVERS" | grep -q "^${member}$"; then
echo "✅ core-nemo approval found from: $member"
exit 0
fi
done

echo "❌ Missing approval from @NVIDIA/core-nemo"
exit 1

- name: Check core-adlr approval (required for core changes unless complexity:low)
if: steps.check-conditions.outputs.has_core_changes == 'true' && steps.check-conditions.outputs.is_low_complexity != 'true'
env:
GH_TOKEN: ${{ secrets.PAT }}
PR_NUMBER: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
run: |
# Get team members
MEMBERS=$(gh api orgs/NVIDIA/teams/core-adlr/members --jq '.[].login' 2>/dev/null || echo "")
if [ -z "$MEMBERS" ]; then
echo "Warning: Could not fetch core-adlr team members"
exit 1
fi

# Get PR approvers
APPROVERS=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/reviews \
--jq '[.[] | select(.state == "APPROVED")] | [.[].user.login] | unique | .[]')

# Check if any team member approved
for member in $MEMBERS; do
if echo "$APPROVERS" | grep -q "^${member}$"; then
echo "✅ core-adlr approval found from: $member"
exit 0
fi
done

echo "❌ Missing approval from @NVIDIA/core-adlr"
exit 1

- name: Checkout action
uses: actions/checkout@v3
with:
Expand Down