whee crops

master
bel 2023-02-21 12:05:16 -07:00
parent c22168c565
commit 4a5bf4c4e1
3 changed files with 31 additions and 12 deletions

View File

@ -76,11 +76,18 @@ class Chars:
for i in result: for i in result:
i.merge() i.merge()
for j in range(len(i.chars)): assert(len(i.chars) == 1)
i.chars[j]["x0"] -= median_height i.chars[0]["x0"] -= median_height
i.chars[j]["x1"] += median_height i.chars[0]["x1"] += median_height
i.chars[j]["y0"] -= median_height * 3 i.chars[0]["y0"] -= median_height
i.chars[j]["y1"] += median_height * 3 i.chars[0]["y1"] += median_height
bounds = i._box()
i.page = self.page.crop((
bounds.x0,
self.page.height - bounds.y1,
bounds.x1,
self.page.height - bounds.y0,
), relative=True)
return result return result

15
main.py
View File

@ -22,13 +22,14 @@ def de_column_ify():
got = q.get() got = q.get()
for got_i in got[1]: for got_i in got[1]:
if got_i.chars: if got_i.chars:
debug.draw_boxes(got_i.page, [{ debug.debug_show(debug.debug_im(got_i.page))
"x0": got_i.chars[0]["x0"], #debug.draw_boxes(got_i.page, [{
"x1": got_i.chars[0]["x1"], # "x0": got_i.chars[0]["x0"],
"y0": got_i.chars[0]["y0"], # "x1": got_i.chars[0]["x1"],
"y1": got_i.chars[0]["y1"], # "y0": got_i.chars[0]["y0"],
"debug_label": got_i.n, # "y1": got_i.chars[0]["y1"],
}]) # "debug_label": got_i.n,
#}])
def de_columnify_page(q, page): def de_columnify_page(q, page):
result = cluster.Chars(page.chars, page).divide_into_columns() result = cluster.Chars(page.chars, page).divide_into_columns()

View File

@ -40,6 +40,17 @@ class TestChars(unittest.TestCase):
} }
for i in got for i in got
]) ])
for i in got:
print(i.page.height, i.page.width, i._box())
debug.draw_boxes(i.page, [
{
"x0": i.chars[0]["x0"],
"x1": i.chars[0]["x1"],
"y0": i.chars[0]["y0"],
"y1": i.chars[0]["y1"],
"debug_label": i.n,
}
])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()