From f54dbfbeec1f56fa6f0d6f37e519f5ddd4fe4931 Mon Sep 17 00:00:00 2001 From: bel Date: Tue, 21 Feb 2023 11:52:16 -0700 Subject: [PATCH] whee --- main.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/main.py b/main.py index 127b3d3..3982fb9 100644 --- a/main.py +++ b/main.py @@ -15,14 +15,12 @@ def de_column_ify(): q = queue.Queue(maxsize=4) with pdfplumber.open(config.INPUT) as pdf: with ThreadPool(4) as pool: - n = 0 for i in range(len(pdf.pages)): - pool.apply_async(de_columnify_page, (q, n, pdf.pages[i], )) - n += 1 - for i in range(n): - log("getting", i, "of", n) + pool.apply_async(de_columnify_page, (q, pdf.pages[i], )) + for i in range(len(pdf.pages)): + log("getting", i, "of", len(pdf.pages)) got = q.get() - debug.draw_boxes(got[1] , [ + debug.draw_boxes(got[0] , [ { "x0": i.chars[0]["x0"], "x1": i.chars[0]["x1"], @@ -30,13 +28,13 @@ def de_column_ify(): "y1": i.chars[0]["y1"], "debug_label": i.n, } - for i in got[2] if i.chars + for i in got[1] if i.chars ]) -def de_columnify_page(q, idx, page): +def de_columnify_page(q, page): result = cluster.Chars(page.chars, page).divide_into_columns() - log("putting", idx) - q.put((idx, page, result)) + log("putting", page.page_number) + q.put((page, result)) if __name__ == "__main__": main()