wheeeeee
parent
1d73f84172
commit
0c1fee915a
24
main.py
24
main.py
|
|
@ -4,6 +4,7 @@ import time
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
DEBUG = os.environ.get("DEBUG", "")
|
DEBUG = os.environ.get("DEBUG", "")
|
||||||
|
DEBUG_NO_SHOW = os.environ.get("DEBUG_NO_SHOW", "")
|
||||||
INPUT = os.environ.get("INPUT", "./testdata/input.pdf")
|
INPUT = os.environ.get("INPUT", "./testdata/input.pdf")
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -14,24 +15,13 @@ def main():
|
||||||
print(splitpage.extract_text())
|
print(splitpage.extract_text())
|
||||||
print("/main")
|
print("/main")
|
||||||
|
|
||||||
def crop(page, x0, y0, x1, y1):
|
|
||||||
if DEBUG:
|
|
||||||
im = debug_im(page)
|
|
||||||
im.draw_lines([
|
|
||||||
((x0, y0), (x0, y1)),
|
|
||||||
((x0, y1), (x1, y1)),
|
|
||||||
((x1, y1), (x1, y0)),
|
|
||||||
((x1, y0), (x0, y0)),
|
|
||||||
], stroke_width=5)
|
|
||||||
debug_show(im)
|
|
||||||
return page.crop((x0, y0, x1, y1))
|
|
||||||
|
|
||||||
def debug_im(page):
|
def debug_im(page):
|
||||||
return page.to_image(height=800)
|
return page.to_image(height=800)
|
||||||
|
|
||||||
def debug_show(im):
|
def debug_show(im, name=None):
|
||||||
im.save("/tmp/out.jpg")
|
im.save(f"/tmp/dnd-pdf-to-txt{'' if not name else '-'+name}.jpg")
|
||||||
go("qlmanage -p /tmp/out.jpg &> /dev/null")
|
if not DEBUG_NO_SHOW:
|
||||||
|
go(f"qlmanage -p /tmp/dnd-pdf-to-txt{'' if not name else '-'+name}.jpg &> /dev/null")
|
||||||
|
|
||||||
def v_split(page):
|
def v_split(page):
|
||||||
clusters = cluster(page)
|
clusters = cluster(page)
|
||||||
|
|
@ -51,7 +41,7 @@ def v_split(page):
|
||||||
im = debug_im(page)
|
im = debug_im(page)
|
||||||
for x_cluster in x_clusters:
|
for x_cluster in x_clusters:
|
||||||
im.draw_line(((x_cluster, 0), (x_cluster, page.height)))
|
im.draw_line(((x_cluster, 0), (x_cluster, page.height)))
|
||||||
debug_show(im)
|
debug_show(im, name=f'v-split-xclusters-{page.page_number}')
|
||||||
if len(x_clusters) != 2:
|
if len(x_clusters) != 2:
|
||||||
return [page]
|
return [page]
|
||||||
x = sum(x_clusters) / len(x_clusters)
|
x = sum(x_clusters) / len(x_clusters)
|
||||||
|
|
@ -115,7 +105,7 @@ def cluster(page):
|
||||||
((i.x1, page.height-i.y1), (i.x1, page.height-i.y0)),
|
((i.x1, page.height-i.y1), (i.x1, page.height-i.y0)),
|
||||||
((i.x1, page.height-i.y0), (i.x0, page.height-i.y0)),
|
((i.x1, page.height-i.y0), (i.x0, page.height-i.y0)),
|
||||||
], stroke_width=5)
|
], stroke_width=5)
|
||||||
debug_show(im)
|
debug_show(im, name=f'cluster-{page.page_number}')
|
||||||
return clusters
|
return clusters
|
||||||
|
|
||||||
__subprocesses__ = []
|
__subprocesses__ = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue