From 92745534a6b8e69ddadaf22e1ee09906f3bba349 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 8 Mar 2025 05:02:33 +0000 Subject: [PATCH 1/3] Bump markdownify from 0.14.1 to 1.1.0 Bumps [markdownify](https://github.com/matthewwithanm/python-markdownify) from 0.14.1 to 1.1.0. - [Release notes](https://github.com/matthewwithanm/python-markdownify/releases) - [Commits](https://github.com/matthewwithanm/python-markdownify/compare/0.14.1...1.1.0) --- updated-dependencies: - dependency-name: markdownify dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 88bbc19042..1977ede8e5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1191,14 +1191,14 @@ source = ["Cython (>=3.0.11,<3.1.0)"] [[package]] name = "markdownify" -version = "0.14.1" +version = "1.1.0" description = "Convert HTML to markdown." optional = false python-versions = "*" groups = ["main"] files = [ - {file = "markdownify-0.14.1-py3-none-any.whl", hash = "sha256:4c46a6c0c12c6005ddcd49b45a5a890398b002ef51380cd319db62df5e09bc2a"}, - {file = "markdownify-0.14.1.tar.gz", hash = "sha256:a62a7a216947ed0b8dafb95b99b2ef4a0edd1e18d5653c656f68f03db2bfb2f1"}, + {file = "markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef"}, + {file = "markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd"}, ] [package.dependencies] @@ -2758,4 +2758,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.1" python-versions = "3.12.*" -content-hash = "ab706b41230a6e46d4aaa098f0fc65de8802889995d047a6f8df61cacdceb5df" +content-hash = "fba27b9411ee45b438bdecca84881f55f0d72f9933ecee31469e516dcfbe8d45" diff --git a/pyproject.toml b/pyproject.toml index c8858057d1..6b4b44a0ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ deepdiff = "7.0.1" emoji = "2.14.1" feedparser = "6.0.11" lxml = "5.3.1" -markdownify = "0.14.1" +markdownify = "1.1.0" pydantic = "2.10.6" pydantic-settings = "2.8.1" python-dateutil = "2.9.0.post0" From 0a30daf906841ec136114e1bc8688ef1c6898c84 Mon Sep 17 00:00:00 2001 From: wookie184 Date: Tue, 8 Apr 2025 16:53:46 +0100 Subject: [PATCH 2/3] Fix markdownify breaking change: convert_as_inline->parent_tags Now _inline is set as a pseudo value in parent_tags --- bot/exts/info/doc/_markdown.py | 22 +++++++++++----------- bot/exts/info/doc/_parsing.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bot/exts/info/doc/_markdown.py b/bot/exts/info/doc/_markdown.py index a030903ed6..1ff50ee95e 100644 --- a/bot/exts/info/doc/_markdown.py +++ b/bot/exts/info/doc/_markdown.py @@ -15,7 +15,7 @@ def __init__(self, *, page_url: str, **options): super().__init__(**options) self.page_url = page_url - def convert_li(self, el: PageElement, text: str, convert_as_inline: bool) -> str: + def convert_li(self, el: PageElement, text: str, parent_tags: set[str]) -> str: """Fix markdownify's erroneous indexing in ol tags.""" parent = el.parent if parent is not None and parent.name == "ol": @@ -31,38 +31,38 @@ def convert_li(self, el: PageElement, text: str, convert_as_inline: bool) -> str bullet = bullets[depth % len(bullets)] return f"{bullet} {text}\n" - def _convert_hn(self, _n: int, el: PageElement, text: str, convert_as_inline: bool) -> str: + def _convert_hn(self, _n: int, el: PageElement, text: str, parent_tags: set[str]) -> str: """Convert h tags to bold text with ** instead of adding #.""" - if convert_as_inline: + if "_inline" in parent_tags: return text return f"**{text}**\n\n" - def convert_code(self, el: PageElement, text: str, convert_as_inline: bool) -> str: + def convert_code(self, el: PageElement, text: str, parent_tags: set[str]) -> str: """Undo `markdownify`s underscore escaping.""" return f"`{text}`".replace("\\", "") - def convert_pre(self, el: PageElement, text: str, convert_as_inline: bool) -> str: + def convert_pre(self, el: PageElement, text: str, parent_tags: set[str]) -> str: """Wrap any codeblocks in `py` for syntax highlighting.""" code = "".join(el.strings) return f"```py\n{code}```" - def convert_a(self, el: PageElement, text: str, convert_as_inline: bool) -> str: + def convert_a(self, el: PageElement, text: str, parent_tags: set[str]) -> str: """Resolve relative URLs to `self.page_url`.""" el["href"] = urljoin(self.page_url, el["href"]) # Discord doesn't handle titles properly, showing links with them as raw text. el["title"] = None - return super().convert_a(el, text, convert_as_inline) + return super().convert_a(el, text, parent_tags) - def convert_p(self, el: PageElement, text: str, convert_as_inline: bool) -> str: + def convert_p(self, el: PageElement, text: str, parent_tags: set[str]) -> str: """Include only one newline instead of two when the parent is a li tag.""" - if convert_as_inline: + if "_inline" in parent_tags: return text parent = el.parent if parent is not None and parent.name == "li": return f"{text}\n" - return super().convert_p(el, text, convert_as_inline) + return super().convert_p(el, text, parent_tags) - def convert_hr(self, el: PageElement, text: str, convert_as_inline: bool) -> str: + def convert_hr(self, el: PageElement, text: str, parent_tags: set[str]) -> str: """Ignore `hr` tag.""" return "" diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py index bc5a5bd31f..0f5734a152 100644 --- a/bot/exts/info/doc/_parsing.py +++ b/bot/exts/info/doc/_parsing.py @@ -159,7 +159,7 @@ def _get_truncated_description( if rendered_length + element_length < max_length: if is_tag: - element_markdown = markdown_converter.process_tag(element, convert_as_inline=False) + element_markdown = markdown_converter.process_tag(element) else: element_markdown = markdown_converter.process_text(element) From 4092ce8fbf073d968878f7cda9ece7845ed1dbb7 Mon Sep 17 00:00:00 2001 From: wookie184 Date: Tue, 8 Apr 2025 16:54:22 +0100 Subject: [PATCH 3/3] Remove broken hack for reflowing markdown text --- bot/exts/info/doc/_markdown.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bot/exts/info/doc/_markdown.py b/bot/exts/info/doc/_markdown.py index 1ff50ee95e..52e00c2f3d 100644 --- a/bot/exts/info/doc/_markdown.py +++ b/bot/exts/info/doc/_markdown.py @@ -1,18 +1,17 @@ -import re from urllib.parse import urljoin import markdownify from bs4.element import PageElement -# See https://github.com/matthewwithanm/python-markdownify/issues/31 -markdownify.whitespace_re = re.compile(r"[\r\n\s\t ]+") - class DocMarkdownConverter(markdownify.MarkdownConverter): """Subclass markdownify's MarkdownCoverter to provide custom conversion methods.""" def __init__(self, *, page_url: str, **options): - super().__init__(**options) + # Reflow text to avoid unwanted line breaks. + default_options = {"wrap": True, "wrap_width": None} + + super().__init__(**default_options | options) self.page_url = page_url def convert_li(self, el: PageElement, text: str, parent_tags: set[str]) -> str: