diff --git a/apps/pages/templatetags/__init__.py b/apps/pages/templatetags/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/pages/templatetags/page_tags.py b/apps/pages/templatetags/page_tags.py new file mode 100644 index 000000000..21e27bf7c --- /dev/null +++ b/apps/pages/templatetags/page_tags.py @@ -0,0 +1,87 @@ +"""Custom template tags and filters for the pages app.""" + +import re + +from django import template +from django.utils.safestring import mark_safe +from django.utils.text import slugify + +register = template.Library() + +# Match h1–h4 elements; capture tag name, existing attributes, and inner HTML. +# Using DOTALL so inner content can span multiple lines. +_HEADING_RE = re.compile(r"<(h[1-4])([^>]*)>(.*?)", re.IGNORECASE | re.DOTALL) + +# Extract the value of an existing id attribute, e.g. id="my-section". +_EXISTING_ID_RE = re.compile(r'\bid\s*=\s*["\'](.*?)["\']', re.IGNORECASE) + + +@register.filter(is_safe=True) +def add_heading_anchors(html): + """Add self-link anchors to h1\u2013h4 headings. + + Given the rendered HTML of a CMS page, this filter finds every ``

``, + ``

``, ``

``, and ``

`` element and injects a pilcrow (\u00b6) + anchor inside it so visitors can copy a direct link to any section. + + Two cases are handled: + + * **Heading already has an ``id``** (common for RST-generated content where + docutils injects ids automatically): the existing id is reused as the + anchor target and a pilcrow link is appended. The heading is otherwise + left intact. + * **Heading has no ``id``**: a URL-safe id is derived from the heading's + plain text via :func:`django.utils.text.slugify`, a ``-N`` suffix is + appended for duplicates, and both the id and the pilcrow link are added. + + Headings whose text produces an empty slug *and* that carry no existing id + are left completely untouched. The filter is idempotent: headings that + already contain a ``headerlink`` anchor are skipped. + + Usage in a template:: + + {% load page_tags %} + {{ page.content|add_heading_anchors }} + """ + seen_slugs: dict[str, int] = {} + + def _replace(match: re.Match) -> str: + tag = match.group(1).lower() + attrs = match.group(2) + inner = match.group(3) + + # Idempotency: skip headings that already have a pilcrow link. + if "headerlink" in inner: + return match.group(0) + + # If the heading already carries an id (e.g. from RST/docutils), + # reuse it for the pilcrow link rather than skipping the heading. + existing = _EXISTING_ID_RE.search(attrs) + if existing: + anchor_id = existing.group(1) + link = ( + f'\u00b6' + ) + return f'<{tag}{attrs}>{inner} {link}' + + # Derive a slug from the plain text (strip any nested HTML tags). + plain_text = re.sub(r"<[^>]+>", "", inner).strip() + base_slug = slugify(plain_text) + + if not base_slug: + return match.group(0) + + # Deduplicate: first occurrence keeps the bare slug; subsequent + # occurrences become slug-2, slug-3, ... + count = seen_slugs.get(base_slug, 0) + 1 + seen_slugs[base_slug] = count + anchor_id = base_slug if count == 1 else f"{base_slug}-{count}" + + link = ( + f'\u00b6' + ) + return f'<{tag} id="{anchor_id}"{attrs}>{inner} {link}' + + return mark_safe(_HEADING_RE.sub(_replace, str(html))) diff --git a/apps/pages/tests/test_templatetags.py b/apps/pages/tests/test_templatetags.py new file mode 100644 index 000000000..71501ee84 --- /dev/null +++ b/apps/pages/tests/test_templatetags.py @@ -0,0 +1,97 @@ +"""Tests for apps/pages/templatetags/page_tags.py.""" + +from django.test import SimpleTestCase + +from apps.pages.templatetags.page_tags import add_heading_anchors + + +class AddHeadingAnchorsFilterTests(SimpleTestCase): + """Tests for the ``add_heading_anchors`` template filter.""" + + def test_h2_gets_id_and_anchor_link(self): + """An h2 heading receives an id attribute and a pilcrow anchor link.""" + html = "

2023

" + result = add_heading_anchors(html) + self.assertIn('id="2023"', result) + self.assertIn('href="#2023"', result) + self.assertIn("¶", result) + + def test_h1_h3_h4_also_processed(self): + """h1, h3, and h4 headings are also processed.""" + for tag in ("h1", "h3", "h4"): + html = f"<{tag}>Section Title" + result = add_heading_anchors(html) + self.assertIn('id="section-title"', result) + self.assertIn('href="#section-title"', result) + + def test_h5_is_not_changed(self): + """h5 headings are left untouched.""" + html = "
Title
" + result = add_heading_anchors(html) + self.assertNotIn("id=", result) + self.assertNotIn("href=", result) + + def test_duplicate_headings_get_unique_ids(self): + """Duplicate heading text produces unique, numbered ids.""" + html = "

Board Resolution

Board Resolution

" + result = add_heading_anchors(html) + self.assertIn('id="board-resolution"', result) + self.assertIn('id="board-resolution-2"', result) + + def test_heading_with_existing_id_gets_pilcrow_link(self): + """A heading with an existing id (e.g. from RST/docutils) gets a pilcrow + link using that id, without the id being changed or duplicated.""" + html = '

My Section

' + result = str(add_heading_anchors(html)) + # Original id is preserved and not duplicated. + self.assertIn('id="custom-id"', result) + self.assertEqual(result.count('id="'), 1) + # Pilcrow link is injected using the existing id. + self.assertIn('href="#custom-id"', result) + self.assertIn("headerlink", result) + + def test_rst_generated_headings_get_pilcrow_links(self): + """RST/docutils headings that already carry ids get pilcrow links added.""" + html = ( + '

Board Resolutions

' + '

Resolution 1: Budget

' + ) + result = str(add_heading_anchors(html)) + self.assertIn('href="#board-resolutions"', result) + self.assertIn('href="#resolution-1-budget"', result) + self.assertEqual(result.count("headerlink"), 2) + + def test_filter_is_idempotent(self): + """Running the filter twice does not add duplicate pilcrow links.""" + html = "

Section

" + once = str(add_heading_anchors(html)) + twice = str(add_heading_anchors(once)) + self.assertEqual(once, twice) + + def test_heading_with_nested_html_tags(self): + """Plain text is extracted from headings that contain nested tags.""" + html = "

Nested Heading

" + result = add_heading_anchors(html) + self.assertIn('id="nested-heading"', result) + + def test_non_heading_html_is_unchanged(self): + """Non-heading elements are passed through unmodified.""" + html = "

Some paragraph

" + result = add_heading_anchors(html) + self.assertEqual(str(result), html) + + def test_empty_string_returns_empty_string(self): + """Passing an empty string returns an empty string.""" + self.assertEqual(str(add_heading_anchors("")), "") + + def test_heading_with_empty_text_is_unchanged(self): + """A heading whose text slugifies to an empty string is left alone.""" + html = "

" + result = add_heading_anchors(html) + self.assertNotIn("id=", result) + + def test_anchor_link_is_inside_heading(self): + """The pilcrow anchor link appears inside the heading element.""" + html = "

Resolutions 2022

" + result = str(add_heading_anchors(html)) + self.assertIn("¶", result) diff --git a/templates/psf/default.html b/templates/psf/default.html index 10cf0377d..0ce1f37d0 100644 --- a/templates/psf/default.html +++ b/templates/psf/default.html @@ -3,6 +3,7 @@ {% extends "base.html" %} {% load boxes %} {% load banners %} +{% load page_tags %} {# TODO: Try to deduplicate this and templates/pages/default.html. #} {% block page_title %}{{ page.title }} | Python Software Foundation{% endblock %} @@ -52,7 +53,7 @@

{{ page.title }}

- {{ page.content }} + {{ page.content|add_heading_anchors }} {% endblock content %} @@ -71,3 +72,4 @@

{{ page.title }}

{% endblock left_sidebar %} +