whee
parent
a08cb2927a
commit
f54dbfbeec
18
main.py
18
main.py
|
|
@ -15,14 +15,12 @@ def de_column_ify():
|
|||
q = queue.Queue(maxsize=4)
|
||||
with pdfplumber.open(config.INPUT) as pdf:
|
||||
with ThreadPool(4) as pool:
|
||||
n = 0
|
||||
for i in range(len(pdf.pages)):
|
||||
pool.apply_async(de_columnify_page, (q, n, pdf.pages[i], ))
|
||||
n += 1
|
||||
for i in range(n):
|
||||
log("getting", i, "of", n)
|
||||
pool.apply_async(de_columnify_page, (q, pdf.pages[i], ))
|
||||
for i in range(len(pdf.pages)):
|
||||
log("getting", i, "of", len(pdf.pages))
|
||||
got = q.get()
|
||||
debug.draw_boxes(got[1] , [
|
||||
debug.draw_boxes(got[0] , [
|
||||
{
|
||||
"x0": i.chars[0]["x0"],
|
||||
"x1": i.chars[0]["x1"],
|
||||
|
|
@ -30,13 +28,13 @@ def de_column_ify():
|
|||
"y1": i.chars[0]["y1"],
|
||||
"debug_label": i.n,
|
||||
}
|
||||
for i in got[2] if i.chars
|
||||
for i in got[1] if i.chars
|
||||
])
|
||||
|
||||
def de_columnify_page(q, idx, page):
|
||||
def de_columnify_page(q, page):
|
||||
result = cluster.Chars(page.chars, page).divide_into_columns()
|
||||
log("putting", idx)
|
||||
q.put((idx, page, result))
|
||||
log("putting", page.page_number)
|
||||
q.put((page, result))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
Loading…
Reference in New Issue