it outputs...

master
bel 2023-02-21 12:55:00 -07:00
parent 006a66941f
commit ab878e9de8
1 changed files with 10 additions and 3 deletions

13
main.py
View File

@ -3,6 +3,7 @@ import cluster
import config
import pdfplumber
from multiprocessing.pool import ThreadPool
import pypdf
import queue
def main():
@ -25,9 +26,15 @@ def de_column_ify():
cropped_pages.append((got[0], got_i.path))
#if got_i.chars:
# debug.debug_show(debug.debug_im(got_i.page))
cropped_pages = sorted(cropped_pages)
for cropped_page in cropped_pages:
print(cropped_page)
cropped_pages = sorted(cropped_pages)
writer = pypdf.PdfWriter()
for cropped_page in cropped_pages:
with open(cropped_page[1], "rb") as f:
reader = pypdf.PdfReader(f)
writer.add_page(reader.pages[0])
with open(config.INPUT + ".de_column_ified.pdf", "wb") as f:
writer.write(f)
def de_columnify_page(q, path, page):
result = cluster.Chars(path, page.chars, page).divide_into_columns()