little
parent
7e93ba51aa
commit
64324508b8
12
cluster.py
12
cluster.py
|
|
@ -91,10 +91,14 @@ class Chars:
|
|||
original_reader = pypdf.PdfReader(self.path)
|
||||
modified_writer = pypdf.PdfWriter()
|
||||
modified_page = original_reader.pages[self.page.page_number-1]
|
||||
modified_page.mediabox.upper_right = (bounds.x0, bounds.y0)
|
||||
modified_page.mediabox.upper_left = (bounds.x1, bounds.y0)
|
||||
modified_page.mediabox.lower_right = (bounds.x0, bounds.y1)
|
||||
modified_page.mediabox.lower_left = (bounds.x1, bounds.y1)
|
||||
modified_page.trimbox.upper_right = (bounds.x0, bounds.y0)
|
||||
modified_page.trimbox.upper_left = (bounds.x1, bounds.y0)
|
||||
modified_page.trimbox.lower_right = (bounds.x0, bounds.y1)
|
||||
modified_page.trimbox.lower_left = (bounds.x1, bounds.y1)
|
||||
modified_page.cropbox.upper_right = (bounds.x0, bounds.y0)
|
||||
modified_page.cropbox.upper_left = (bounds.x1, bounds.y0)
|
||||
modified_page.cropbox.lower_right = (bounds.x0, bounds.y1)
|
||||
modified_page.cropbox.lower_left = (bounds.x1, bounds.y1)
|
||||
modified_writer.add_page(modified_page)
|
||||
modified_path = "{}/{}-{:03d}-{}.modified.pdf".format(
|
||||
config.TEMP_DIR,
|
||||
|
|
|
|||
10
main.py
10
main.py
|
|
@ -5,6 +5,7 @@ import pdfplumber
|
|||
from multiprocessing.pool import ThreadPool
|
||||
import pypdf
|
||||
import queue
|
||||
import subprocess
|
||||
|
||||
def main():
|
||||
de_column_ify()
|
||||
|
|
@ -30,14 +31,21 @@ def de_column_ify():
|
|||
#if got_i.chars:
|
||||
# debug.debug_show(debug.debug_im(got_i.page))
|
||||
|
||||
log("merging", len(cropped_pages), "de-column-ified pages")
|
||||
cropped_pages = sorted(cropped_pages)
|
||||
writer = pypdf.PdfWriter()
|
||||
for cropped_page in cropped_pages:
|
||||
with open(cropped_page[1], "rb") as f:
|
||||
reader = pypdf.PdfReader(f)
|
||||
writer.add_page(reader.pages[0])
|
||||
with open(config.INPUT + ".de_column_ified.pdf", "wb") as f:
|
||||
log("dumping de-column-ified pages")
|
||||
fat_output = f'{config.TEMP_DIR}/{config.INPUT.split("/")[-1]}.de_column_ified.pdf'
|
||||
with open(fat_output, "wb") as f:
|
||||
writer.write(f)
|
||||
output = f'{config.INPUT}.de-column-ified.pdf'
|
||||
log("shrinking de-column-ified pages")
|
||||
subprocess.run(f"gs -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile='{output}' '{fat_output}'", shell=True)
|
||||
log(output)
|
||||
|
||||
def de_columnify_page(q, path, page):
|
||||
for _ in range(3):
|
||||
|
|
|
|||
Loading…
Reference in New Issue