it outputs...
parent
006a66941f
commit
ab878e9de8
13
main.py
13
main.py
|
|
@ -3,6 +3,7 @@ import cluster
|
||||||
import config
|
import config
|
||||||
import pdfplumber
|
import pdfplumber
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
|
import pypdf
|
||||||
import queue
|
import queue
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -25,9 +26,15 @@ def de_column_ify():
|
||||||
cropped_pages.append((got[0], got_i.path))
|
cropped_pages.append((got[0], got_i.path))
|
||||||
#if got_i.chars:
|
#if got_i.chars:
|
||||||
# debug.debug_show(debug.debug_im(got_i.page))
|
# debug.debug_show(debug.debug_im(got_i.page))
|
||||||
cropped_pages = sorted(cropped_pages)
|
|
||||||
for cropped_page in cropped_pages:
|
cropped_pages = sorted(cropped_pages)
|
||||||
print(cropped_page)
|
writer = pypdf.PdfWriter()
|
||||||
|
for cropped_page in cropped_pages:
|
||||||
|
with open(cropped_page[1], "rb") as f:
|
||||||
|
reader = pypdf.PdfReader(f)
|
||||||
|
writer.add_page(reader.pages[0])
|
||||||
|
with open(config.INPUT + ".de_column_ified.pdf", "wb") as f:
|
||||||
|
writer.write(f)
|
||||||
|
|
||||||
def de_columnify_page(q, path, page):
|
def de_columnify_page(q, path, page):
|
||||||
result = cluster.Chars(path, page.chars, page).divide_into_columns()
|
result = cluster.Chars(path, page.chars, page).divide_into_columns()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue