File tree Expand file tree Collapse file tree 2 files changed +37
-1
lines changed
Expand file tree Collapse file tree 2 files changed +37
-1
lines changed Original file line number Diff line number Diff line change @@ -35,6 +35,12 @@ import type {
3535 Redirects ,
3636} from '@/search/scripts/scrape/types'
3737
38+ // GitHub-style alert markers (> [!NOTE], > [!TIP], etc.) that appear in
39+ // markdown returned by the Article API. The rehype alerts plugin only runs
40+ // in the HTML pipeline, so these leak through as literal text when we index
41+ // the markdown-only output. Strip them so they don't appear in search results.
42+ const ALERT_MARKER_REGEXP = / \[ ! ( N O T E | T I P | W A R N I N G | I M P O R T A N T | C A U T I O N ) \] \n ? / gi
43+
3844// Same ignored headings as the HTML scraping approach
3945const IGNORED_HEADING_SLUGS = new Set ( [ 'in-this-article' , 'further-reading' , 'prerequisites' ] )
4046
@@ -190,7 +196,7 @@ export function extractFromMarkdown(markdown: string): { headings: string; conte
190196
191197 // 2. Convert full AST to plain text (code blocks are kept so that terms
192198 // appearing only in code examples remain searchable).
193- const content = astToPlainText ( ast )
199+ const content = astToPlainText ( ast ) . replace ( ALERT_MARKER_REGEXP , '' )
194200
195201 return { headings : headings . join ( '\n' ) , content }
196202}
Original file line number Diff line number Diff line change @@ -212,6 +212,36 @@ More text.
212212 expect ( text ) . toContain ( 'Second paragraph in blockquote.' )
213213 } )
214214
215+ test ( 'strips GitHub alert markers from plain text' , ( ) => {
216+ const markdown = `> [!NOTE]
217+ > This is a note.
218+
219+ > [!TIP]
220+ > This is a tip.
221+
222+ > [!WARNING]
223+ > This is a warning.
224+
225+ > [!IMPORTANT]
226+ > This is important.
227+
228+ > [!CAUTION]
229+ > This is a caution.
230+ `
231+ const text = markdownToPlainText ( markdown )
232+ expect ( text ) . not . toContain ( '[!NOTE]' )
233+ expect ( text ) . not . toContain ( '[!TIP]' )
234+ expect ( text ) . not . toContain ( '[!WARNING]' )
235+ expect ( text ) . not . toContain ( '[!IMPORTANT]' )
236+ expect ( text ) . not . toContain ( '[!CAUTION]' )
237+ // The alert body text should still be present
238+ expect ( text ) . toContain ( 'This is a note.' )
239+ expect ( text ) . toContain ( 'This is a tip.' )
240+ expect ( text ) . toContain ( 'This is a warning.' )
241+ expect ( text ) . toContain ( 'This is important.' )
242+ expect ( text ) . toContain ( 'This is a caution.' )
243+ } )
244+
215245 test ( 'handles GFM tables cleanly' , ( ) => {
216246 const markdown = `Some intro.
217247
You can’t perform that action at this time.
0 commit comments