gather pages
parent
689998d71f
commit
006a66941f
9
main.py
9
main.py
|
|
@ -14,6 +14,7 @@ def log(*args):
|
||||||
def de_column_ify():
|
def de_column_ify():
|
||||||
q = queue.Queue(maxsize=4)
|
q = queue.Queue(maxsize=4)
|
||||||
with pdfplumber.open(config.INPUT) as pdf:
|
with pdfplumber.open(config.INPUT) as pdf:
|
||||||
|
cropped_pages = []
|
||||||
with ThreadPool(4) as pool:
|
with ThreadPool(4) as pool:
|
||||||
for i in range(len(pdf.pages)):
|
for i in range(len(pdf.pages)):
|
||||||
pool.apply_async(de_columnify_page, (q, config.INPUT, pdf.pages[i], ))
|
pool.apply_async(de_columnify_page, (q, config.INPUT, pdf.pages[i], ))
|
||||||
|
|
@ -21,8 +22,12 @@ def de_column_ify():
|
||||||
log("getting", i, "of", len(pdf.pages))
|
log("getting", i, "of", len(pdf.pages))
|
||||||
got = q.get()
|
got = q.get()
|
||||||
for got_i in got[1]:
|
for got_i in got[1]:
|
||||||
if got_i.chars:
|
cropped_pages.append((got[0], got_i.path))
|
||||||
debug.debug_show(debug.debug_im(got_i.page))
|
#if got_i.chars:
|
||||||
|
# debug.debug_show(debug.debug_im(got_i.page))
|
||||||
|
cropped_pages = sorted(cropped_pages)
|
||||||
|
for cropped_page in cropped_pages:
|
||||||
|
print(cropped_page)
|
||||||
|
|
||||||
def de_columnify_page(q, path, page):
|
def de_columnify_page(q, path, page):
|
||||||
result = cluster.Chars(path, page.chars, page).divide_into_columns()
|
result = cluster.Chars(path, page.chars, page).divide_into_columns()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue