Skip to content

pdfload: control region to be rendered via page_box (pdfium only) #4605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ master
- add vips__worker_exit(): enables fast threadpool shutdown
- larger mmap windows on 64-bit machines improve random access mode for many
file formats
- pdfload: control region to be rendered via `page_box` [lovell]

7/7/25 8.17.1

Expand Down
3 changes: 3 additions & 0 deletions cplusplus/include/vips/VImage8.h
Original file line number Diff line number Diff line change
Expand Up @@ -4860,6 +4860,7 @@ class VImage : public VObject {
* - **scale** -- Factor to scale by, double.
* - **background** -- Background colour, std::vector<double>.
* - **password** -- Password to decrypt with, const char *.
* - **page_box** -- The region of the page to render, VipsForeignPdfPageBox.
* - **memory** -- Force open via memory, bool.
* - **access** -- Required access pattern for this file, VipsAccess.
* - **fail_on** -- Error level to fail on, VipsFailOn.
Expand All @@ -4881,6 +4882,7 @@ class VImage : public VObject {
* - **scale** -- Factor to scale by, double.
* - **background** -- Background colour, std::vector<double>.
* - **password** -- Password to decrypt with, const char *.
* - **page_box** -- The region of the page to render, VipsForeignPdfPageBox.
* - **memory** -- Force open via memory, bool.
* - **access** -- Required access pattern for this file, VipsAccess.
* - **fail_on** -- Error level to fail on, VipsFailOn.
Expand All @@ -4902,6 +4904,7 @@ class VImage : public VObject {
* - **scale** -- Factor to scale by, double.
* - **background** -- Background colour, std::vector<double>.
* - **password** -- Password to decrypt with, const char *.
* - **page_box** -- The region of the page to render, VipsForeignPdfPageBox.
* - **memory** -- Force open via memory, bool.
* - **access** -- Required access pattern for this file, VipsAccess.
* - **fail_on** -- Error level to fail on, VipsFailOn.
Expand Down
6 changes: 6 additions & 0 deletions libvips/foreign/foreign.c
Original file line number Diff line number Diff line change
Expand Up @@ -2721,6 +2721,9 @@ vips_jxlsave_target(VipsImage *in, VipsTarget *target, ...)
*
* Use @password to supply a decryption password.
*
* When using pdfium, the region of a page to render can be selected with
* @page_box, defaulting to the crop box.
*
* The operation fills a number of header fields with metadata, for example
* "pdf-author". They may be useful.
*
Expand All @@ -2733,6 +2736,7 @@ vips_jxlsave_target(VipsImage *in, VipsTarget *target, ...)
* * @dpi: `gdouble`, render at this DPI
* * @scale: `gdouble`, scale render by this factor
* * @background: [struct@ArrayDouble], background colour
* * @page_box: [enum@ForeignPdfPageBox], use this page box (pdfium only)
*
* ::: seealso
* [[email protected]_from_file], [[email protected]].
Expand Down Expand Up @@ -2771,6 +2775,7 @@ vips_pdfload(const char *filename, VipsImage **out, ...)
* * @dpi: `gdouble`, render at this DPI
* * @scale: `gdouble`, scale render by this factor
* * @background: [struct@ArrayDouble], background colour
* * @page_box: [enum@ForeignPdfPageBox], use this page box (pdfium only)
*
* ::: seealso
* [[email protected]].
Expand Down Expand Up @@ -2811,6 +2816,7 @@ vips_pdfload_buffer(void *buf, size_t len, VipsImage **out, ...)
* * @dpi: `gdouble`, render at this DPI
* * @scale: `gdouble`, scale render by this factor
* * @background: [struct@ArrayDouble], background colour
* * @page_box: [enum@ForeignPdfPageBox], use this page box (pdfium only)
*
* ::: seealso
* [[email protected]]
Expand Down
69 changes: 66 additions & 3 deletions libvips/foreign/pdfiumload.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ EOF
#include <fpdf_doc.h>
#include <fpdf_edit.h>
#include <fpdf_formfill.h>
#include <fpdf_transformpage.h>

#define TILE_SIZE (4000)

Expand Down Expand Up @@ -165,6 +166,10 @@ typedef struct _VipsForeignLoadPdf {
*/
VipsPel *ink;

/* Render this page box.
*/
VipsForeignPdfPageBox page_box;

} VipsForeignLoadPdf;

typedef VipsForeignLoadClass VipsForeignLoadPdfClass;
Expand Down Expand Up @@ -448,6 +453,49 @@ vips_foreign_load_pdf_set_image(VipsForeignLoadPdf *pdf, VipsImage *out)
return 0;
}

static void
vips_foreign_load_pdf_apply_page_box(FPDF_PAGE page, VipsForeignPdfPageBox box)
{
float left, bottom, right, top;

/* Avoid locking when no change in region to render.
*/
if (box == VIPS_FOREIGN_PDF_PAGE_BOX_CROP)
return;

g_mutex_lock(&vips_pdfium_mutex);
switch (box) {
case VIPS_FOREIGN_PDF_PAGE_BOX_MEDIA:
if (FPDFPage_GetMediaBox(page, &left, &bottom, &right, &top))
FPDFPage_SetCropBox(page, left, bottom, right, top);
else
g_warning("missing media box, using default crop box");
break;
case VIPS_FOREIGN_PDF_PAGE_BOX_TRIM:
if (FPDFPage_GetTrimBox(page, &left, &bottom, &right, &top))
FPDFPage_SetCropBox(page, left, bottom, right, top);
else
g_warning("missing trim box, using default crop box");
break;
case VIPS_FOREIGN_PDF_PAGE_BOX_BLEED:
if (FPDFPage_GetBleedBox(page, &left, &bottom, &right, &top))
FPDFPage_SetCropBox(page, left, bottom, right, top);
else
g_warning("missing bleed box, using default crop box");
break;
case VIPS_FOREIGN_PDF_PAGE_BOX_ART:
if (FPDFPage_GetArtBox(page, &left, &bottom, &right, &top))
FPDFPage_SetCropBox(page, left, bottom, right, top);
else
g_warning("missing art box, using default crop box");
break;
case VIPS_FOREIGN_PDF_PAGE_BOX_CROP:
default:
break;
}
g_mutex_unlock(&vips_pdfium_mutex);
}

static int
vips_foreign_load_pdf_header(VipsForeignLoad *load)
{
Expand Down Expand Up @@ -492,6 +540,12 @@ vips_foreign_load_pdf_header(VipsForeignLoad *load)
return -1;
pdf->pages[i].left = 0;
pdf->pages[i].top = top;

/* Attempt to apply selected page box using the page coordinate
* system (bottom left) before calculating render dimensions
* using the client coordinate system (top left). */
vips_foreign_load_pdf_apply_page_box(pdf->page, pdf->page_box);

/* We do round to nearest, in the same way that vips_resize()
* does round to nearest. Without this, things like
* shrink-on-load will break.
Expand Down Expand Up @@ -736,6 +790,14 @@ vips_foreign_load_pdf_class_init(VipsForeignLoadPdfClass *class)
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET(VipsForeignLoadPdf, password),
NULL);

VIPS_ARG_ENUM(class, "page_box", 26,
_("Page box"),
_("The region of the page to render"),
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET(VipsForeignLoadPdf, page_box),
VIPS_TYPE_FOREIGN_PDF_PAGE_BOX,
VIPS_FOREIGN_PDF_PAGE_BOX_CROP);
}

static void
Expand All @@ -746,6 +808,7 @@ vips_foreign_load_pdf_init(VipsForeignLoadPdf *pdf)
pdf->n = 1;
pdf->current_page = -1;
pdf->background = vips_array_double_newv(1, 255.0);
pdf->page_box = VIPS_FOREIGN_PDF_PAGE_BOX_CROP;
}

typedef struct _VipsForeignLoadPdfFile {
Expand Down Expand Up @@ -804,7 +867,7 @@ vips_foreign_load_pdf_file_class_init(
gobject_class->get_property = vips_object_get_property;

object_class->nickname = "pdfload";
object_class->description = _("load PDF from file");
object_class->description = _("load PDF from file (pdfium)");
object_class->build = vips_foreign_load_pdf_file_build;

foreign_class->suffs = vips__pdf_suffs;
Expand Down Expand Up @@ -867,7 +930,7 @@ vips_foreign_load_pdf_buffer_class_init(
gobject_class->get_property = vips_object_get_property;

object_class->nickname = "pdfload_buffer";
object_class->description = _("load PDF from buffer");
object_class->description = _("load PDF from buffer (pdfium)");
object_class->build = vips_foreign_load_pdf_buffer_build;

load_class->is_a_buffer = vips__pdf_is_a_buffer;
Expand Down Expand Up @@ -925,7 +988,7 @@ vips_foreign_load_pdf_source_class_init(
gobject_class->get_property = vips_object_get_property;

object_class->nickname = "pdfload_source";
object_class->description = _("load PDF from source");
object_class->description = _("load PDF from source (pdfium)");
object_class->build = vips_foreign_load_pdf_source_build;

operation_class->flags |= VIPS_OPERATION_NOCACHE;
Expand Down
22 changes: 19 additions & 3 deletions libvips/foreign/popplerload.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ typedef struct _VipsForeignLoadPdf {
*/
VipsPel *ink;

/* Render this page box, currently only crop is supported.
*/
VipsForeignPdfPageBox page_box;

} VipsForeignLoadPdf;

typedef struct _VipsForeignLoadPdfClass {
Expand Down Expand Up @@ -337,6 +341,9 @@ vips_foreign_load_pdf_header(VipsForeignLoad *load)
if (!(pdf->pages = VIPS_ARRAY(pdf, pdf->n, VipsRect)))
return -1;

if (pdf->page_box != VIPS_FOREIGN_PDF_PAGE_BOX_CROP)
g_warning("only crop page box is supported");

top = 0;
pdf->image.left = 0;
pdf->image.top = 0;
Expand Down Expand Up @@ -580,6 +587,14 @@ vips_foreign_load_pdf_class_init(VipsForeignLoadPdfClass *class)
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET(VipsForeignLoadPdf, password),
NULL);

VIPS_ARG_ENUM(class, "page_box", 26,
_("Page box"),
_("The region of the page to render, only crop is supported"),
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET(VipsForeignLoadPdf, page_box),
VIPS_TYPE_FOREIGN_PDF_PAGE_BOX,
VIPS_FOREIGN_PDF_PAGE_BOX_CROP);
}

static void
Expand All @@ -590,6 +605,7 @@ vips_foreign_load_pdf_init(VipsForeignLoadPdf *pdf)
pdf->n = 1;
pdf->current_page = -1;
pdf->background = vips_array_double_newv(1, 255.0);
pdf->page_box = VIPS_FOREIGN_PDF_PAGE_BOX_CROP;
}

typedef struct _VipsForeignLoadPdfFile {
Expand Down Expand Up @@ -675,7 +691,7 @@ vips_foreign_load_pdf_file_class_init(
gobject_class->get_property = vips_object_get_property;

object_class->nickname = "pdfload";
object_class->description = _("load PDF from file");
object_class->description = _("load PDF from file (poppler)");
object_class->build = vips_foreign_load_pdf_file_build;

foreign_class->suffs = vips__pdf_suffs;
Expand Down Expand Up @@ -738,7 +754,7 @@ vips_foreign_load_pdf_buffer_class_init(
gobject_class->get_property = vips_object_get_property;

object_class->nickname = "pdfload_buffer";
object_class->description = _("load PDF from buffer");
object_class->description = _("load PDF from buffer (poppler)");
object_class->build = vips_foreign_load_pdf_buffer_build;

load_class->is_a_buffer = vips__pdf_is_a_buffer;
Expand Down Expand Up @@ -796,7 +812,7 @@ vips_foreign_load_pdf_source_class_init(
gobject_class->get_property = vips_object_get_property;

object_class->nickname = "pdfload_source";
object_class->description = _("load PDF from source");
object_class->description = _("load PDF from source (poppler)");
object_class->build = vips_foreign_load_pdf_source_build;

operation_class->flags |= VIPS_OPERATION_NOCACHE;
Expand Down
23 changes: 23 additions & 0 deletions libvips/include/vips/foreign.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,29 @@ VIPS_API
int vips_radsave_target(VipsImage *in, VipsTarget *target, ...)
G_GNUC_NULL_TERMINATED;

/**
* VipsForeignPdfPageBox:
* @VIPS_FOREIGN_PDF_PAGE_BOX_MEDIA
* @VIPS_FOREIGN_PDF_PAGE_BOX_CROP
* @VIPS_FOREIGN_PDF_PAGE_BOX_TRIM
* @VIPS_FOREIGN_PDF_PAGE_BOX_BLEED
* @VIPS_FOREIGN_PDF_PAGE_BOX_ART
*
* Each page of a PDF document can contain multiple page boxes,
* also known as boundary boxes or print marks.
*
* Each page box defines a region of the complete page that
* should be rendered. The default region is the crop box.
*/
typedef enum {
VIPS_FOREIGN_PDF_PAGE_BOX_MEDIA,
VIPS_FOREIGN_PDF_PAGE_BOX_CROP,
VIPS_FOREIGN_PDF_PAGE_BOX_TRIM,
VIPS_FOREIGN_PDF_PAGE_BOX_BLEED,
VIPS_FOREIGN_PDF_PAGE_BOX_ART,
VIPS_FOREIGN_PDF_PAGE_BOX_LAST /*< skip >*/
} VipsForeignPdfPageBox;

VIPS_API
int vips_pdfload(const char *filename, VipsImage **out, ...)
G_GNUC_NULL_TERMINATED;
Expand Down
1 change: 1 addition & 0 deletions test/test-suite/helpers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
FITS_FILE = os.path.join(IMAGES, "WFPC2u5780205r_c0fx.fits")
OPENSLIDE_FILE = os.path.join(IMAGES, "CMU-1-Small-Region.svs")
PDF_FILE = os.path.join(IMAGES, "ISO_12233-reschart.pdf")
PDF_PAGE_BOX_FILE = os.path.join(IMAGES, "page-box.pdf")
CMYK_PDF_FILE = os.path.join(IMAGES, "cmyktest.pdf")
SVG_FILE = os.path.join(IMAGES, "logo.svg")
SVGZ_FILE = os.path.join(IMAGES, "logo.svgz")
Expand Down
Binary file added test/test-suite/images/page-box.pdf
Binary file not shown.
40 changes: 40 additions & 0 deletions test/test-suite/test_foreign.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,46 @@ def pdf_valid(im):
assert abs(im.width * 2 - x.width) < 2
assert abs(im.height * 2 - x.height) < 2

im = pyvips.Image.new_from_file(PDF_PAGE_BOX_FILE)
assert im.width == 709
assert im.height == 955
assert im.get("pdf-creator") == "Adobe InDesign 20.4 (Windows)"
assert im.get("pdf-producer") == "Adobe PDF Library 17.0"

pdfloadOp = pyvips.Operation.new_from_name("pdfload").get_description()

if "poppler" in pdfloadOp:
# only crop is implemented, ignore requested page box
im = pyvips.Image.new_from_file(PDF_FILE, page_box="art")
assert im.width == 1134
assert im.height == 680
im = pyvips.Image.new_from_file(PDF_PAGE_BOX_FILE, page_box="art")
assert im.width == 709
assert im.height == 955

if "pdfium" in pdfloadOp:
im = pyvips.Image.new_from_file(PDF_FILE, page_box="art")
assert im.width == 1121
assert im.height == 680
im = pyvips.Image.new_from_file(PDF_FILE, page_box="trim") # missing, will fallback to crop
assert im.width == 1134
assert im.height == 680
im = pyvips.Image.new_from_file(PDF_PAGE_BOX_FILE, page_box="media")
assert im.width == 822
assert im.height == 1069
im = pyvips.Image.new_from_file(PDF_PAGE_BOX_FILE, page_box="crop")
assert im.width == 709
assert im.height == 955
im = pyvips.Image.new_from_file(PDF_PAGE_BOX_FILE, page_box="bleed")
assert im.width == 652
assert im.height == 899
im = pyvips.Image.new_from_file(PDF_PAGE_BOX_FILE, page_box="trim")
assert im.width == 595
assert im.height == 842
im = pyvips.Image.new_from_file(PDF_PAGE_BOX_FILE, page_box="art")
assert im.width == 539
assert im.height == 785

@skip_if_no("gifload")
def test_gifload(self):
def gif_valid(im):
Expand Down
Loading