diff --git a/packages/markitdown/src/markitdown/converters/_exiftool.py b/packages/markitdown/src/markitdown/converters/_exiftool.py index f605024fd..d684d9e1d 100644 --- a/packages/markitdown/src/markitdown/converters/_exiftool.py +++ b/packages/markitdown/src/markitdown/converters/_exiftool.py @@ -1,5 +1,4 @@ import json -import locale import subprocess from typing import Any, BinaryIO, Union @@ -45,8 +44,6 @@ def exiftool_metadata( text=False, ).stdout - return json.loads( - output.decode(locale.getpreferredencoding(False)), - )[0] + return json.loads(output.decode("utf-8"))[0] finally: file_stream.seek(cur_pos) diff --git a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py index 6f1306fe8..b5114fc5d 100644 --- a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py +++ b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py @@ -8,11 +8,6 @@ # Try loading optional (but in this case, required) dependencies # Save reporting of any exceptions for later _dependency_exc_info = None -try: - import mammoth # noqa: F401 -except ImportError: - # Preserve the error and stack trace for later - _dependency_exc_info = sys.exc_info() ACCEPTED_MIME_TYPE_PREFIXES = [ "text/", diff --git a/packages/markitdown/src/markitdown/converters/_rss_converter.py b/packages/markitdown/src/markitdown/converters/_rss_converter.py index bec42484f..e36b2b31e 100644 --- a/packages/markitdown/src/markitdown/converters/_rss_converter.py +++ b/packages/markitdown/src/markitdown/converters/_rss_converter.py @@ -143,6 +143,7 @@ def _parse_rss_type(self, doc: Document) -> DocumentConverterResult: channel_title = self._get_data_by_tag_name(channel, "title") channel_description = self._get_data_by_tag_name(channel, "description") items = channel.getElementsByTagName("item") + md_text = "" if channel_title: md_text = f"# {channel_title}\n" if channel_description: diff --git a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py index c20018659..3692a3793 100644 --- a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py +++ b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py @@ -75,7 +75,8 @@ def convert( main_title = title_elm.string # Convert the page - webpage_text = f"# {main_title}\n\n" + _CustomMarkdownify( + title_prefix = f"# {main_title}\n\n" if main_title else "" + webpage_text = title_prefix + _CustomMarkdownify( **kwargs ).convert_soup(body_elm) else: