threading is coo
parent
f940365111
commit
a08cb2927a
28
main.py
28
main.py
|
|
@ -2,12 +2,27 @@ import debug
|
||||||
import cluster
|
import cluster
|
||||||
import config
|
import config
|
||||||
import pdfplumber
|
import pdfplumber
|
||||||
|
from multiprocessing.pool import ThreadPool
|
||||||
|
import queue
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
de_column_ify()
|
||||||
|
|
||||||
|
def log(*args):
|
||||||
|
print(*args, flush=True)
|
||||||
|
|
||||||
|
def de_column_ify():
|
||||||
|
q = queue.Queue(maxsize=4)
|
||||||
with pdfplumber.open(config.INPUT) as pdf:
|
with pdfplumber.open(config.INPUT) as pdf:
|
||||||
for page in pdf.pages:
|
with ThreadPool(4) as pool:
|
||||||
got = cluster.Chars(page.chars, page).divide_into_columns()
|
n = 0
|
||||||
debug.draw_boxes(page, [
|
for i in range(len(pdf.pages)):
|
||||||
|
pool.apply_async(de_columnify_page, (q, n, pdf.pages[i], ))
|
||||||
|
n += 1
|
||||||
|
for i in range(n):
|
||||||
|
log("getting", i, "of", n)
|
||||||
|
got = q.get()
|
||||||
|
debug.draw_boxes(got[1] , [
|
||||||
{
|
{
|
||||||
"x0": i.chars[0]["x0"],
|
"x0": i.chars[0]["x0"],
|
||||||
"x1": i.chars[0]["x1"],
|
"x1": i.chars[0]["x1"],
|
||||||
|
|
@ -15,8 +30,13 @@ def main():
|
||||||
"y1": i.chars[0]["y1"],
|
"y1": i.chars[0]["y1"],
|
||||||
"debug_label": i.n,
|
"debug_label": i.n,
|
||||||
}
|
}
|
||||||
for i in got if i.chars
|
for i in got[2] if i.chars
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def de_columnify_page(q, idx, page):
|
||||||
|
result = cluster.Chars(page.chars, page).divide_into_columns()
|
||||||
|
log("putting", idx)
|
||||||
|
q.put((idx, page, result))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue