wheeeeee
parent
1d73f84172
commit
0c1fee915a
24
main.py
24
main.py
|
|
@ -4,6 +4,7 @@ import time
|
|||
import subprocess
|
||||
|
||||
DEBUG = os.environ.get("DEBUG", "")
|
||||
DEBUG_NO_SHOW = os.environ.get("DEBUG_NO_SHOW", "")
|
||||
INPUT = os.environ.get("INPUT", "./testdata/input.pdf")
|
||||
|
||||
def main():
|
||||
|
|
@ -14,24 +15,13 @@ def main():
|
|||
print(splitpage.extract_text())
|
||||
print("/main")
|
||||
|
||||
def crop(page, x0, y0, x1, y1):
|
||||
if DEBUG:
|
||||
im = debug_im(page)
|
||||
im.draw_lines([
|
||||
((x0, y0), (x0, y1)),
|
||||
((x0, y1), (x1, y1)),
|
||||
((x1, y1), (x1, y0)),
|
||||
((x1, y0), (x0, y0)),
|
||||
], stroke_width=5)
|
||||
debug_show(im)
|
||||
return page.crop((x0, y0, x1, y1))
|
||||
|
||||
def debug_im(page):
|
||||
return page.to_image(height=800)
|
||||
|
||||
def debug_show(im):
|
||||
im.save("/tmp/out.jpg")
|
||||
go("qlmanage -p /tmp/out.jpg &> /dev/null")
|
||||
def debug_show(im, name=None):
|
||||
im.save(f"/tmp/dnd-pdf-to-txt{'' if not name else '-'+name}.jpg")
|
||||
if not DEBUG_NO_SHOW:
|
||||
go(f"qlmanage -p /tmp/dnd-pdf-to-txt{'' if not name else '-'+name}.jpg &> /dev/null")
|
||||
|
||||
def v_split(page):
|
||||
clusters = cluster(page)
|
||||
|
|
@ -51,7 +41,7 @@ def v_split(page):
|
|||
im = debug_im(page)
|
||||
for x_cluster in x_clusters:
|
||||
im.draw_line(((x_cluster, 0), (x_cluster, page.height)))
|
||||
debug_show(im)
|
||||
debug_show(im, name=f'v-split-xclusters-{page.page_number}')
|
||||
if len(x_clusters) != 2:
|
||||
return [page]
|
||||
x = sum(x_clusters) / len(x_clusters)
|
||||
|
|
@ -115,7 +105,7 @@ def cluster(page):
|
|||
((i.x1, page.height-i.y1), (i.x1, page.height-i.y0)),
|
||||
((i.x1, page.height-i.y0), (i.x0, page.height-i.y0)),
|
||||
], stroke_width=5)
|
||||
debug_show(im)
|
||||
debug_show(im, name=f'cluster-{page.page_number}')
|
||||
return clusters
|
||||
|
||||
__subprocesses__ = []
|
||||
|
|
|
|||
Loading…
Reference in New Issue