diff --git a/main.py b/main.py index 773a67a..dd2af93 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ import time import subprocess DEBUG = os.environ.get("DEBUG", "") +DEBUG_NO_SHOW = os.environ.get("DEBUG_NO_SHOW", "") INPUT = os.environ.get("INPUT", "./testdata/input.pdf") def main(): @@ -14,24 +15,13 @@ def main(): print(splitpage.extract_text()) print("/main") -def crop(page, x0, y0, x1, y1): - if DEBUG: - im = debug_im(page) - im.draw_lines([ - ((x0, y0), (x0, y1)), - ((x0, y1), (x1, y1)), - ((x1, y1), (x1, y0)), - ((x1, y0), (x0, y0)), - ], stroke_width=5) - debug_show(im) - return page.crop((x0, y0, x1, y1)) - def debug_im(page): return page.to_image(height=800) -def debug_show(im): - im.save("/tmp/out.jpg") - go("qlmanage -p /tmp/out.jpg &> /dev/null") +def debug_show(im, name=None): + im.save(f"/tmp/dnd-pdf-to-txt{'' if not name else '-'+name}.jpg") + if not DEBUG_NO_SHOW: + go(f"qlmanage -p /tmp/dnd-pdf-to-txt{'' if not name else '-'+name}.jpg &> /dev/null") def v_split(page): clusters = cluster(page) @@ -51,7 +41,7 @@ def v_split(page): im = debug_im(page) for x_cluster in x_clusters: im.draw_line(((x_cluster, 0), (x_cluster, page.height))) - debug_show(im) + debug_show(im, name=f'v-split-xclusters-{page.page_number}') if len(x_clusters) != 2: return [page] x = sum(x_clusters) / len(x_clusters) @@ -115,7 +105,7 @@ def cluster(page): ((i.x1, page.height-i.y1), (i.x1, page.height-i.y0)), ((i.x1, page.height-i.y0), (i.x0, page.height-i.y0)), ], stroke_width=5) - debug_show(im) + debug_show(im, name=f'cluster-{page.page_number}') return clusters __subprocesses__ = []