diff --git a/main.py b/main.py index b63afef..1b86da9 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ import cluster import config import pdfplumber from multiprocessing.pool import ThreadPool +import pypdf import queue def main(): @@ -25,9 +26,15 @@ def de_column_ify(): cropped_pages.append((got[0], got_i.path)) #if got_i.chars: # debug.debug_show(debug.debug_im(got_i.page)) - cropped_pages = sorted(cropped_pages) - for cropped_page in cropped_pages: - print(cropped_page) + + cropped_pages = sorted(cropped_pages) + writer = pypdf.PdfWriter() + for cropped_page in cropped_pages: + with open(cropped_page[1], "rb") as f: + reader = pypdf.PdfReader(f) + writer.add_page(reader.pages[0]) + with open(config.INPUT + ".de_column_ified.pdf", "wb") as f: + writer.write(f) def de_columnify_page(q, path, page): result = cluster.Chars(path, page.chars, page).divide_into_columns()