Skip to content

Bump markdownify from 0.14.1 to 1.1.0 #3286

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions bot/exts/info/doc/_markdown.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import re
from urllib.parse import urljoin

import markdownify
from bs4.element import PageElement

# See https://github.com/matthewwithanm/python-markdownify/issues/31
markdownify.whitespace_re = re.compile(r"[\r\n\s\t ]+")


class DocMarkdownConverter(markdownify.MarkdownConverter):
"""Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""

def __init__(self, *, page_url: str, **options):
super().__init__(**options)
# Reflow text to avoid unwanted line breaks.
default_options = {"wrap": True, "wrap_width": None}

super().__init__(**default_options | options)
self.page_url = page_url

def convert_li(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
def convert_li(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
"""Fix markdownify's erroneous indexing in ol tags."""
parent = el.parent
if parent is not None and parent.name == "ol":
Expand All @@ -31,38 +30,38 @@ def convert_li(self, el: PageElement, text: str, convert_as_inline: bool) -> str
bullet = bullets[depth % len(bullets)]
return f"{bullet} {text}\n"

def _convert_hn(self, _n: int, el: PageElement, text: str, convert_as_inline: bool) -> str:
def _convert_hn(self, _n: int, el: PageElement, text: str, parent_tags: set[str]) -> str:
"""Convert h tags to bold text with ** instead of adding #."""
if convert_as_inline:
if "_inline" in parent_tags:
return text
return f"**{text}**\n\n"

def convert_code(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
def convert_code(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
"""Undo `markdownify`s underscore escaping."""
return f"`{text}`".replace("\\", "")

def convert_pre(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
def convert_pre(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
"""Wrap any codeblocks in `py` for syntax highlighting."""
code = "".join(el.strings)
return f"```py\n{code}```"

def convert_a(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
def convert_a(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
"""Resolve relative URLs to `self.page_url`."""
el["href"] = urljoin(self.page_url, el["href"])
# Discord doesn't handle titles properly, showing links with them as raw text.
el["title"] = None
return super().convert_a(el, text, convert_as_inline)
return super().convert_a(el, text, parent_tags)

def convert_p(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
def convert_p(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
"""Include only one newline instead of two when the parent is a li tag."""
if convert_as_inline:
if "_inline" in parent_tags:
return text

parent = el.parent
if parent is not None and parent.name == "li":
return f"{text}\n"
return super().convert_p(el, text, convert_as_inline)
return super().convert_p(el, text, parent_tags)

def convert_hr(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
def convert_hr(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
"""Ignore `hr` tag."""
return ""
2 changes: 1 addition & 1 deletion bot/exts/info/doc/_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def _get_truncated_description(

if rendered_length + element_length < max_length:
if is_tag:
element_markdown = markdown_converter.process_tag(element, convert_as_inline=False)
element_markdown = markdown_converter.process_tag(element)
else:
element_markdown = markdown_converter.process_text(element)

Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ deepdiff = "7.0.1"
emoji = "2.14.1"
feedparser = "6.0.11"
lxml = "5.3.1"
markdownify = "0.14.1"
markdownify = "1.1.0"
pydantic = "2.10.6"
pydantic-settings = "2.8.1"
python-dateutil = "2.9.0.post0"
Expand Down