Skip to content
This repository was archived by the owner on Jul 7, 2020. It is now read-only.

Minor fixes and cleanup #1

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions page.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,23 @@ type gstate struct {

// Content returns the page's content.
func (p Page) Content() Content {
strm := p.V.Key("Contents")
switch v := p.V.Key("Contents"); v.Kind() {
case Stream:
return p.contentForStream(v)
case Array:
var c Content
for i := 0; i < v.Len(); i++ {
cfs := p.contentForStream(v.Index(i))
c.Text = append(c.Text, cfs.Text...)
c.Rect = append(c.Rect, cfs.Rect...)
}
return c
default:
panic("bad content kind")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this panic here might cause undocumented and unexpected problems for users. I was using the package earlier on the bus back from school and wrote code that invoked p.Content() without explicitly checking the Kind(). Perhaps we can just make an explicit check for Array then let the contentForStream handle be the catch all?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there other known cases? Would contentForStream handle them correctly?

It's been ages since I've looked at this code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question @josharian: I am not sure, I am not well versed with the PDF spec but I thought it was a little unexpected to panic on other types. So far I've only seen mentions of Stream and Array for "Content" in the spec.
Perhaps: I think it would handle the "other cases" properly because it contains the almost the same code before this patch, and that has worked in the past.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or it was silently returning junk before this patch. I'd rather be explicit, all else being equal.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, yap that makes sense to me now.

}
}

func (p Page) contentForStream(strm Value) Content {
var enc TextEncoding = &nopEncoder{}

var g = gstate{
Expand Down Expand Up @@ -484,9 +500,12 @@ func (p Page) Content() Content {
gstack = append(gstack, g)

case "Q": // restore graphics state
n := len(gstack) - 1
g = gstack[n]
gstack = gstack[:n]
// gstack should not be empty...but sometimes it is
if len(gstack) > 0 {
n := len(gstack) - 1
g = gstack[n]
gstack = gstack[:n]
}

case "BT": // begin text (reset text matrix and line matrix)
g.Tm = ident
Expand Down
11 changes: 5 additions & 6 deletions read.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ import (
"crypto/cipher"
"crypto/md5"
"crypto/rc4"
"errors"
"fmt"
"io"
"io/ioutil"
Expand Down Expand Up @@ -123,13 +124,11 @@ func NewReader(f io.ReaderAt, size int64) (*Reader, error) {
const endChunk = 100
buf = make([]byte, endChunk)
f.ReadAt(buf, end-endChunk)
for len(buf) > 0 && buf[len(buf)-1] == '\n' || buf[len(buf)-1] == '\r' {
buf = buf[:len(buf)-1]
}
buf = bytes.TrimRight(buf, "\r\n\t ")
if !bytes.HasSuffix(buf, []byte("%%EOF")) {
return nil, fmt.Errorf("not a PDF file: missing %%%%EOF")
eof := bytes.LastIndex(buf, []byte("%%EOF"))
if eof == -1 {
return nil, errors.New("not a PDF file: missing %%EOF")
}
buf = buf[:eof]
i := findLastLine(buf, "startxref")
if i < 0 {
return nil, fmt.Errorf("malformed PDF file: missing final startxref")
Expand Down
21 changes: 21 additions & 0 deletions valuekind_string.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// generated by stringer -type=ValueKind; DO NOT EDIT

package pdf

import "fmt"

const _ValueKind_name = "NullBoolIntegerRealStringNameDictArrayStream"

var _ValueKind_index = [...]uint8{4, 8, 15, 19, 25, 29, 33, 38, 44}

func (i ValueKind) String() string {
if i < 0 || i >= ValueKind(len(_ValueKind_index)) {
return fmt.Sprintf("ValueKind(%d)", i)
}
hi := _ValueKind_index[i]
lo := uint8(0)
if i > 0 {
lo = _ValueKind_index[i-1]
}
return _ValueKind_name[lo:hi]
}