ok we exporting cropped pdfs ok

master
bel 2023-02-21 12:48:50 -07:00
parent 1119c46b97
commit ca2f4c0fac
1 changed files with 23 additions and 6 deletions

View File

@ -1,4 +1,6 @@
import config
import pypdf
import pdfplumber
class Chars:
def __init__(self, path, chars, page):
@ -75,7 +77,9 @@ class Chars:
result2.append(sub)
result = result2
j = 0
for i in result:
j += 1
i.merge()
assert(len(i.chars) == 1)
i.chars[0]["x0"] -= median_height
@ -83,12 +87,25 @@ class Chars:
i.chars[0]["y0"] -= median_height
i.chars[0]["y1"] += median_height
bounds = i._box()
i.page = self.page.crop((
bounds.x0,
self.page.height - bounds.y1,
bounds.x1,
self.page.height - bounds.y0,
), relative=True)
original_reader = pypdf.PdfReader(self.path)
modified_writer = pypdf.PdfWriter()
modified_page = original_reader.pages[self.page.page_number-1]
modified_page.mediabox.upper_right = (bounds.x0, bounds.y0)
modified_page.mediabox.upper_left = (bounds.x1, bounds.y0)
modified_page.mediabox.lower_right = (bounds.x0, bounds.y1)
modified_page.mediabox.lower_left = (bounds.x1, bounds.y1)
modified_writer.add_page(modified_page)
modified_path = "/tmp/{}-{:03d}-{}.modified.pdf".format(
self.path.split("/")[-1],
self.page.page_number,
j,
)
with open(modified_path, "wb") as mwf:
modified_writer.write(mwf)
with pdfplumber.open(modified_path) as modified_pdf:
i.path = modified_path
i.page = modified_pdf.pages[0]
return result