Skip to content

Commit 1db97bf

Browse files
committed
add pdf_img
1 parent 6e7a317 commit 1db97bf

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

pdf/img/test.pdf_img1.png

31.2 KB
Loading

pdf/pdf_img.py

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env python3
2+
3+
import fitz #pip install pymupdf
4+
import re
5+
import os
6+
7+
8+
def find_imag(path,img_path):
9+
10+
checkXO = r"/Type(?= */XObject)"
11+
checkIM = r"/Subtype(?= */Image)"
12+
13+
pdf = fitz.open(path)
14+
15+
img_count = 0
16+
len_XREF = pdf._getXrefLength()
17+
18+
print("文件名:{}, 页数: {}, 对象: {}".format(path, len(pdf), len_XREF - 1))
19+
20+
for i in range(1, len_XREF):
21+
text = pdf._getXrefString(i)
22+
isXObject = re.search(checkXO, text)
23+
24+
# 使用正则表达式查看是否是图片
25+
isImage = re.search(checkIM, text)
26+
27+
# 如果不是对象也不是图片,则continue
28+
if not isXObject or not isImage:
29+
continue
30+
img_count += 1
31+
# 根据索引生成图像
32+
pix = fitz.Pixmap(pdf, i)
33+
34+
new_name = path.replace('\\', '_') + "_img{}.png".format(img_count)
35+
new_name = new_name.replace(':', '')
36+
37+
# 如果pix.n<5,可以直接存为PNG
38+
if pix.n < 5:
39+
pix.writePNG(os.path.join(img_path, new_name))
40+
41+
else:
42+
pix0 = fitz.Pixmap(fitz.csRGB, pix)
43+
pix0.writePNG(os.path.join(img_path, new_name))
44+
pix0 = None
45+
46+
pix = None
47+
48+
print("提取了{}张图片".format(img_count))
49+
50+
51+
if __name__=='__main__':
52+
pdf_path = r'test.pdf'
53+
img_path = r'img'
54+
m = find_imag(pdf_path, img_path)

0 commit comments

Comments
 (0)