diff --git a/main.py b/main.py index f9b09fd..b63afef 100644 --- a/main.py +++ b/main.py @@ -14,6 +14,7 @@ def log(*args): def de_column_ify(): q = queue.Queue(maxsize=4) with pdfplumber.open(config.INPUT) as pdf: + cropped_pages = [] with ThreadPool(4) as pool: for i in range(len(pdf.pages)): pool.apply_async(de_columnify_page, (q, config.INPUT, pdf.pages[i], )) @@ -21,8 +22,12 @@ def de_column_ify(): log("getting", i, "of", len(pdf.pages)) got = q.get() for got_i in got[1]: - if got_i.chars: - debug.debug_show(debug.debug_im(got_i.page)) + cropped_pages.append((got[0], got_i.path)) + #if got_i.chars: + # debug.debug_show(debug.debug_im(got_i.page)) + cropped_pages = sorted(cropped_pages) + for cropped_page in cropped_pages: + print(cropped_page) def de_columnify_page(q, path, page): result = cluster.Chars(path, page.chars, page).divide_into_columns()