diff --git a/eng/common/pipelines/templates/steps/verify-links.yml b/eng/common/pipelines/templates/steps/verify-links.yml index 896b30d0fe..595afc3456 100644 --- a/eng/common/pipelines/templates/steps/verify-links.yml +++ b/eng/common/pipelines/templates/steps/verify-links.yml @@ -31,4 +31,5 @@ steps: -checkLinkGuidance: ${{ parameters.CheckLinkGuidance }} -localBuildRepoName "$env:BUILD_REPOSITORY_NAME" -localBuildRepoPath $(Build.SourcesDirectory) + -localBuildTargetBranch "$env:SYSTEM_PULLREQUEST_TARGETBRANCH" -inputCacheFile "https://azuresdkartifacts.blob.core.windows.net/verify-links-cache/verify-links-cache.txt" diff --git a/eng/common/scripts/Verify-Links.ps1 b/eng/common/scripts/Verify-Links.ps1 index 0eb1798da6..a003c98635 100644 --- a/eng/common/scripts/Verify-Links.ps1 +++ b/eng/common/scripts/Verify-Links.ps1 @@ -51,6 +51,9 @@ .PARAMETER localBuildRepoPath The path to the local build repo. This is used to resolve links to local files in the repo instead of making web requests. + .PARAMETER localBuildTargetBranch + The target branch of the PR. This is used to resolve links to local files when the link points to this branch. + .PARAMETER requestTimeoutSec The number of seconds before we timeout when sending an individual web request. Default is 15 seconds. @@ -80,6 +83,7 @@ param ( [string] $localGithubClonedRoot = "", [string] $localBuildRepoName = "", [string] $localBuildRepoPath = "", + [string] $localBuildTargetBranch = "", [string] $requestTimeoutSec = 15 ) @@ -91,23 +95,80 @@ $ProgressPreference = "SilentlyContinue"; # Disable invoke-webrequest progress d function ProcessLink([System.Uri]$linkUri) { # To help improve performance and rate limiting issues with github links we try to resolve them based on a local clone if one exists. - if (($localGithubClonedRoot -or $localBuildRepoName) -and $linkUri -match '^https://github.com/(?Azure)/(?[^/]+)/(?:blob|tree)/(main|.*_[^/]+|.*/v[^/]+)/(?.*)$') { - - if ($localBuildRepoName -eq ($matches['org'] + "/" + $matches['repo'])) { - # If the link is to the current repo, use the local build path - $localPath = Join-Path $localBuildRepoPath $matches['path'] - } - else { - # Otherwise use the local github clone path - $localPath = Join-Path $localGithubClonedRoot $matches['repo'] $matches['path'] + # Match github.com blob/tree URLs (path excludes fragments and query params to prevent ReDoS) + $githubPattern = '^https://github\.com/(?[^/]+)/(?[^/]+)/(?:blob|tree)/(?[^/]+)/(?[^#?]*)$' + # Match raw.githubusercontent.com URLs (path excludes fragments and query params to prevent ReDoS) + $rawPattern = '^https://raw\.githubusercontent\.com/(?[^/]+)/(?[^/]+)/(?[^/]+)/(?[^#?]*)$' + + $matchedPattern = $false + $org = $null + $repo = $null + $branch = $null + $path = $null + + if ($linkUri -match $githubPattern) { + $matchedPattern = $true + $org = $matches['org'] + $repo = $matches['repo'] + $branch = $matches['branch'] + $path = $matches['path'] + } + elseif ($linkUri -match $rawPattern) { + $matchedPattern = $true + $org = $matches['org'] + $repo = $matches['repo'] + $branch = $matches['branch'] + $path = $matches['path'] + } + + if ($matchedPattern -and ($localGithubClonedRoot -or $localBuildRepoName)) { + $repoFullName = "$org/$repo" + + # Legacy branch pattern for Azure org backward compatibility + $legacyAzureBranchPattern = '^(main|.*_[^/]+|.*/v[^/]+)$' + + # Check if this link points to the current repo + if ($localBuildRepoName -eq $repoFullName) { + # Check if the link points to the target branch (if specified) + # If no target branch is specified, fall back to checking any Azure org link with specific branch patterns (legacy behavior) + $shouldCheckLocalFile = $false + + if ($localBuildTargetBranch -and $branch -eq $localBuildTargetBranch) { + # Link points to current repo and target branch - check local filesystem + $shouldCheckLocalFile = $true + } + elseif (!$localBuildTargetBranch -and $org -eq "Azure" -and $branch -match $legacyAzureBranchPattern) { + # Legacy behavior: check local files for Azure org with specific branch patterns + $shouldCheckLocalFile = $true + } + + if ($shouldCheckLocalFile) { + $localPath = Join-Path $localBuildRepoPath $path + + if (Test-Path $localPath) { + # File exists locally - link will be valid + return $true + } + else { + # File does not exist locally - this PR would break the link + # Log only the relative path to avoid exposing full filesystem structure + LogError "Link points to file that does not exist in local repo: $linkUri (path: $path)" + return $false + } + } } - - if (Test-Path $localPath) { - return $true + elseif ($localGithubClonedRoot -and $org -eq "Azure") { + # For other Azure repos, use the local github clone path (legacy behavior) + $localPath = Join-Path $localGithubClonedRoot $repo $path + if (Test-Path $localPath) { + return $true + } } + + # If we didn't return above, fall through to standard link checking return ProcessStandardLink $linkUri } - if ($linkUri -match '^https?://?github\.com/(?)[^/]+/(?)[^/]+/wiki/.+') { + if ($linkUri -match '^https?://github\.com/[^/]+/[^/]+/wiki/.+') { # in an unauthenticated session, urls for missing pages will redirect to the wiki root return ProcessRedirectLink $linkUri -invalidStatusCodes 302 }