Skip to content

Commit 804c106

Browse files
committed
Explicitly close pdfium doc obj after use (#4362)
1 parent 30e6713 commit 804c106

File tree

3 files changed

+51
-41
lines changed

3 files changed

+51
-41
lines changed

paddlex/inference/models/formula_recognition/result.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -256,25 +256,28 @@ def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
256256
np.ndarray: The resulting image as a NumPy array, or None if the PDF is not single-page.
257257
"""
258258
pdfDoc = pdfium.PdfDocument(pdf_path)
259-
if len(pdfDoc) != 1:
260-
return None
261-
for page in pdfDoc:
262-
rotate = int(0)
263-
zoom = 2
264-
img = page.render(scale=zoom, rotation=rotate).to_pil()
265-
img = img.convert("RGB")
266-
img = np.array(img)
267-
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
268-
xywh = crop_white_area(img)
269-
270-
if xywh is not None:
271-
x, y, w, h = xywh
272-
img = img[y : y + h, x : x + w]
273-
if is_padding:
274-
img = cv2.copyMakeBorder(
275-
img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
276-
)
277-
return img
259+
try:
260+
if len(pdfDoc) != 1:
261+
return None
262+
for page in pdfDoc:
263+
rotate = int(0)
264+
zoom = 2
265+
img = page.render(scale=zoom, rotation=rotate).to_pil()
266+
img = img.convert("RGB")
267+
img = np.array(img)
268+
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
269+
xywh = crop_white_area(img)
270+
271+
if xywh is not None:
272+
x, y, w, h = xywh
273+
img = img[y : y + h, x : x + w]
274+
if is_padding:
275+
img = cv2.copyMakeBorder(
276+
img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
277+
)
278+
return img
279+
finally:
280+
pdfDoc.close()
278281
return None
279282

280283

paddlex/inference/serving/infra/utils.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -188,22 +188,25 @@ def read_pdf(
188188
page_info_list: List[PDFPageInfo] = []
189189
with _lock:
190190
doc = pdfium.PdfDocument(bytes_)
191-
for page in doc:
192-
if max_num_imgs is not None and len(images) >= max_num_imgs:
193-
break
194-
# TODO: Do not always use zoom=2.0
195-
zoom = 2.0
196-
deg = 0
197-
image = page.render(scale=zoom, rotation=deg).to_pil()
198-
image = image.convert("RGB")
199-
image = np.array(image)
200-
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
201-
images.append(image)
202-
page_info = PDFPageInfo(
203-
width=image.shape[1],
204-
height=image.shape[0],
205-
)
206-
page_info_list.append(page_info)
191+
try:
192+
for page in doc:
193+
if max_num_imgs is not None and len(images) >= max_num_imgs:
194+
break
195+
# TODO: Do not always use zoom=2.0
196+
zoom = 2.0
197+
deg = 0
198+
image = page.render(scale=zoom, rotation=deg).to_pil()
199+
image = image.convert("RGB")
200+
image = np.array(image)
201+
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
202+
images.append(image)
203+
page_info = PDFPageInfo(
204+
width=image.shape[1],
205+
height=image.shape[0],
206+
)
207+
page_info_list.append(page_info)
208+
finally:
209+
doc.close()
207210
pdf_info = PDFInfo(
208211
numPages=len(page_info_list),
209212
pages=page_info_list,

paddlex/inference/utils/io/readers.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -290,12 +290,16 @@ def __init__(self, rotate=0, zoom=2.0):
290290
self._scale = zoom
291291

292292
def read_file(self, in_path):
293-
for page in pdfium.PdfDocument(in_path):
294-
image = page.render(scale=self._scale, rotation=self._rotation).to_pil()
295-
image = image.convert("RGB")
296-
img_cv = np.array(image)
297-
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
298-
yield img_cv
293+
doc = pdfium.PdfDocument(in_path)
294+
try:
295+
for page in doc:
296+
image = page.render(scale=self._scale, rotation=self._rotation).to_pil()
297+
image = image.convert("RGB")
298+
img_cv = np.array(image)
299+
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
300+
yield img_cv
301+
finally:
302+
doc.close()
299303

300304

301305
class TXTReaderBackend(_BaseReaderBackend):

0 commit comments

Comments
 (0)