diff --git a/main.py b/main.py index 127b3d3..3982fb9 100644 --- a/main.py +++ b/main.py @@ -15,14 +15,12 @@ def de_column_ify(): q = queue.Queue(maxsize=4) with pdfplumber.open(config.INPUT) as pdf: with ThreadPool(4) as pool: - n = 0 for i in range(len(pdf.pages)): - pool.apply_async(de_columnify_page, (q, n, pdf.pages[i], )) - n += 1 - for i in range(n): - log("getting", i, "of", n) + pool.apply_async(de_columnify_page, (q, pdf.pages[i], )) + for i in range(len(pdf.pages)): + log("getting", i, "of", len(pdf.pages)) got = q.get() - debug.draw_boxes(got[1] , [ + debug.draw_boxes(got[0] , [ { "x0": i.chars[0]["x0"], "x1": i.chars[0]["x1"], @@ -30,13 +28,13 @@ def de_column_ify(): "y1": i.chars[0]["y1"], "debug_label": i.n, } - for i in got[2] if i.chars + for i in got[1] if i.chars ]) -def de_columnify_page(q, idx, page): +def de_columnify_page(q, page): result = cluster.Chars(page.chars, page).divide_into_columns() - log("putting", idx) - q.put((idx, page, result)) + log("putting", page.page_number) + q.put((page, result)) if __name__ == "__main__": main()