From 006a66941f8e4f73740e247faf74177d37269a83 Mon Sep 17 00:00:00 2001 From: bel Date: Tue, 21 Feb 2023 12:52:57 -0700 Subject: [PATCH] gather pages --- main.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index f9b09fd..b63afef 100644 --- a/main.py +++ b/main.py @@ -14,6 +14,7 @@ def log(*args): def de_column_ify(): q = queue.Queue(maxsize=4) with pdfplumber.open(config.INPUT) as pdf: + cropped_pages = [] with ThreadPool(4) as pool: for i in range(len(pdf.pages)): pool.apply_async(de_columnify_page, (q, config.INPUT, pdf.pages[i], )) @@ -21,8 +22,12 @@ def de_column_ify(): log("getting", i, "of", len(pdf.pages)) got = q.get() for got_i in got[1]: - if got_i.chars: - debug.debug_show(debug.debug_im(got_i.page)) + cropped_pages.append((got[0], got_i.path)) + #if got_i.chars: + # debug.debug_show(debug.debug_im(got_i.page)) + cropped_pages = sorted(cropped_pages) + for cropped_page in cropped_pages: + print(cropped_page) def de_columnify_page(q, path, page): result = cluster.Chars(path, page.chars, page).divide_into_columns()