Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Proper WMF/EMF image handling in PPTXReader #17819

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
"""

import os
import sys
import shutil
import tempfile
import subprocess
from pathlib import Path
from typing import Dict, List, Optional
from fsspec import AbstractFileSystem
Expand Down Expand Up @@ -56,9 +59,52 @@ def __init__(self) -> None:
"tokenizer": tokenizer,
}

def find_libreoffice(self) -> str:
"""Finds the LibreOffice executable path."""
libreoffice_path = shutil.which("soffice")

if not libreoffice_path and sys.platform == "win32":
# Check common installation paths on Windows
possible_paths = [
r"C:\Program Files\LibreOffice\program\soffice.exe",
r"C:\Program Files (x86)\LibreOffice\program\soffice.exe",
]
libreoffice_path = next(
(path for path in possible_paths if os.path.exists(path)), None
)

if not libreoffice_path:
raise OSError(
"LibreOffice (soffice) not found. Please install LibreOffice or add it to your system PATH."
)

return libreoffice_path

def convert_wmf_to_png(self, input_path: str) -> str:
"""Convert WMF/EMF to PNG using LibreOffice."""
file_path = Path(input_path)
output_path = file_path.with_suffix(".png")

libreoffice_path = self.find_libreoffice()

subprocess.run(
[
libreoffice_path,
"--headless",
"--convert-to",
"png",
"--outdir",
str(file_path.parent),
str(file_path),
],
check=True,
)

return str(output_path)

def caption_image(self, tmp_image_file: str) -> str:
"""Generate text caption of image."""
from PIL import Image
from PIL import Image, UnidentifiedImageError

model = self.parser_config["model"]
feature_extractor = self.parser_config["feature_extractor"]
Expand All @@ -71,7 +117,20 @@ def caption_image(self, tmp_image_file: str) -> str:
num_beams = 4
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}

i_image = Image.open(tmp_image_file)
try:
i_image = Image.open(tmp_image_file)
image_format = i_image.format
except UnidentifiedImageError:
return "Error opening image file."

if image_format in ["WMF", "EMF"]:
try:
converted_path = self.convert_wmf_to_png(tmp_image_file)
i_image = Image.open(converted_path)
except Exception as e:
print(f"Error converting WMF/EMF image: {e}")
return f"Error converting WMF/EMF image"

if i_image.mode != "RGB":
i_image = i_image.convert(mode="RGB")

Expand Down