ok we exporting cropped pdfs ok

master
bel 2023-02-21 12:48:50 -07:00
parent 1119c46b97
commit ca2f4c0fac
1 changed files with 23 additions and 6 deletions

View File

@ -1,4 +1,6 @@
import config import config
import pypdf
import pdfplumber
class Chars: class Chars:
def __init__(self, path, chars, page): def __init__(self, path, chars, page):
@ -75,7 +77,9 @@ class Chars:
result2.append(sub) result2.append(sub)
result = result2 result = result2
j = 0
for i in result: for i in result:
j += 1
i.merge() i.merge()
assert(len(i.chars) == 1) assert(len(i.chars) == 1)
i.chars[0]["x0"] -= median_height i.chars[0]["x0"] -= median_height
@ -83,12 +87,25 @@ class Chars:
i.chars[0]["y0"] -= median_height i.chars[0]["y0"] -= median_height
i.chars[0]["y1"] += median_height i.chars[0]["y1"] += median_height
bounds = i._box() bounds = i._box()
i.page = self.page.crop((
bounds.x0, original_reader = pypdf.PdfReader(self.path)
self.page.height - bounds.y1, modified_writer = pypdf.PdfWriter()
bounds.x1, modified_page = original_reader.pages[self.page.page_number-1]
self.page.height - bounds.y0, modified_page.mediabox.upper_right = (bounds.x0, bounds.y0)
), relative=True) modified_page.mediabox.upper_left = (bounds.x1, bounds.y0)
modified_page.mediabox.lower_right = (bounds.x0, bounds.y1)
modified_page.mediabox.lower_left = (bounds.x1, bounds.y1)
modified_writer.add_page(modified_page)
modified_path = "/tmp/{}-{:03d}-{}.modified.pdf".format(
self.path.split("/")[-1],
self.page.page_number,
j,
)
with open(modified_path, "wb") as mwf:
modified_writer.write(mwf)
with pdfplumber.open(modified_path) as modified_pdf:
i.path = modified_path
i.page = modified_pdf.pages[0]
return result return result