From 616292c7f133cc7def3ff763e0a2989b9a79d868 Mon Sep 17 00:00:00 2001 From: web-dev0521 Date: Fri, 22 May 2026 09:15:42 -0600 Subject: [PATCH 1/2] fix: add word boundary to LINK_REGEX to prevent false positive issue links Closes #137 --- src/lib/pr-linking.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/pr-linking.ts b/src/lib/pr-linking.ts index f57f8a6..aafe875 100644 --- a/src/lib/pr-linking.ts +++ b/src/lib/pr-linking.ts @@ -1,7 +1,7 @@ import type { Pull } from '@/types/entities'; const LINK_REGEX = - /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*:?\s*(?:(?:https?:\/\/github\.com\/)?([\w.-]+\/[\w.-]+))?#(\d+)/gi; + /\b(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*:?\s*(?:(?:https?:\/\/github\.com\/)?([\w.-]+\/[\w.-]+))?#(\d+)/gi; export interface LinkedIssueRef { repo: string | null; From 0f0da830038cd14f09edc57a44c2985a3e88128e Mon Sep 17 00:00:00 2001 From: web-dev0521 Date: Sun, 24 May 2026 19:50:11 -0600 Subject: [PATCH 2/2] fix: purge already-persisted false-positive pr_issue_links (#137) The word-boundary regex fix only stops NEW bad links; existing rows created by the old boundaryless pattern (e.g. "bugfix #42", "discloses #42") persist because pr_issue_links is append-only and the per-repo backfill is gated on the table being empty. Add a one-shot data migration in getDb(), guarded by PRAGMA user_version so it runs exactly once per database file. For every cached PR it recomputes same-repo links under both the old (boundaryless) and new (fixed) patterns and deletes only the difference. GraphQL/sidebar links are untouched: a PR with no closing keyword yields an empty old-match set and is skipped entirely, and any link still produced by the fixed regex is retained. --- src/lib/db.ts | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/lib/db.ts b/src/lib/db.ts index b57b8ef..685a909 100644 --- a/src/lib/db.ts +++ b/src/lib/db.ts @@ -10,6 +10,56 @@ const DB_PATH = path.join(DATA_DIR, 'cache.db'); let _db: Database.Database | null = null; let _readDb: Database.Database | null = null; +// --- One-shot data migration: purge false-positive pr_issue_links (issue #137) --- +// The old link regex lacked a word boundary, so substrings like "bugfix #42" +// or "discloses #42" were persisted as real PR->issue links. The extractor was +// fixed to require `\b`, but `pr_issue_links` is append-only, so the bad rows +// linger. This migration recomputes each cached PR's same-repo links under both +// the old (boundaryless) and new (fixed) patterns and deletes only the +// difference — links the old pattern produced that the fixed one does not. +// Links from GraphQL/sidebar sources are left intact unless they happen to +// coincide with a boundaryless-only regex match (rare; accepted per #137). +const PR_ISSUE_LINKS_SCHEMA_VERSION = 1; +const OLD_LINK_REGEX_NO_BOUNDARY = + /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*:?\s*(?:(?:https?:\/\/github\.com\/)?([\w.-]+\/[\w.-]+))?#(\d+)/gi; +const NEW_LINK_REGEX_WITH_BOUNDARY = + /\b(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*:?\s*(?:(?:https?:\/\/github\.com\/)?([\w.-]+\/[\w.-]+))?#(\d+)/gi; + +function sameRepoIssueNumbers(pattern: RegExp, repoFullName: string, title: string, body: string | null): Set { + const text = `${title}\n${body ?? ''}`; + const out = new Set(); + for (const m of text.matchAll(pattern)) { + const repo = m[1] || repoFullName; + if (repo !== repoFullName) continue; // mirror the stored same-repo-only filter + const n = parseInt(m[2], 10); + if (Number.isFinite(n)) out.add(n); + } + return out; +} + +function purgeBoundarylessPrIssueLinks(db: Database.Database): number { + const pulls = db + .prepare('SELECT repo_full_name, number, title, body FROM pulls') + .all() as Array<{ repo_full_name: string; number: number; title: string; body: string | null }>; + const del = db.prepare( + 'DELETE FROM pr_issue_links WHERE repo_full_name = ? AND pr_number = ? AND issue_number = ?', + ); + let removed = 0; + const tx = db.transaction(() => { + for (const pr of pulls) { + const oldIssues = sameRepoIssueNumbers(OLD_LINK_REGEX_NO_BOUNDARY, pr.repo_full_name, pr.title, pr.body); + if (oldIssues.size === 0) continue; + const newIssues = sameRepoIssueNumbers(NEW_LINK_REGEX_WITH_BOUNDARY, pr.repo_full_name, pr.title, pr.body); + for (const issueNum of oldIssues) { + if (newIssues.has(issueNum)) continue; // still valid under the fixed regex + removed += del.run(pr.repo_full_name, pr.number, issueNum).changes; + } + } + }); + tx(); + return removed; +} + /** * Separate read-only handle so foreground GET routes don't queue behind the * poller's big upsert transactions on the writer connection. Both handles @@ -258,6 +308,17 @@ export function getDb(): Database.Database { db.exec('ALTER TABLE pulls ADD COLUMN author_association TEXT'); } + // One-shot purge of false-positive pr_issue_links (issue #137). Guarded by + // PRAGMA user_version so it runs exactly once per database file. + const schemaVersion = (db.prepare('PRAGMA user_version').get() as { user_version: number }).user_version; + if (schemaVersion < PR_ISSUE_LINKS_SCHEMA_VERSION) { + const removed = purgeBoundarylessPrIssueLinks(db); + db.exec(`PRAGMA user_version = ${PR_ISSUE_LINKS_SCHEMA_VERSION}`); + if (removed > 0) { + console.log(`[migration] purged ${removed} false-positive pr_issue_links (issue #137)`); + } + } + _db = db; return db; }