whee crops

master
bel 2023-02-21 12:05:16 -07:00
parent c22168c565
commit 4a5bf4c4e1
3 changed files with 31 additions and 12 deletions

View File

@ -76,11 +76,18 @@ class Chars:
for i in result:
i.merge()
for j in range(len(i.chars)):
i.chars[j]["x0"] -= median_height
i.chars[j]["x1"] += median_height
i.chars[j]["y0"] -= median_height * 3
i.chars[j]["y1"] += median_height * 3
assert(len(i.chars) == 1)
i.chars[0]["x0"] -= median_height
i.chars[0]["x1"] += median_height
i.chars[0]["y0"] -= median_height
i.chars[0]["y1"] += median_height
bounds = i._box()
i.page = self.page.crop((
bounds.x0,
self.page.height - bounds.y1,
bounds.x1,
self.page.height - bounds.y0,
), relative=True)
return result

15
main.py
View File

@ -22,13 +22,14 @@ def de_column_ify():
got = q.get()
for got_i in got[1]:
if got_i.chars:
debug.draw_boxes(got_i.page, [{
"x0": got_i.chars[0]["x0"],
"x1": got_i.chars[0]["x1"],
"y0": got_i.chars[0]["y0"],
"y1": got_i.chars[0]["y1"],
"debug_label": got_i.n,
}])
debug.debug_show(debug.debug_im(got_i.page))
#debug.draw_boxes(got_i.page, [{
# "x0": got_i.chars[0]["x0"],
# "x1": got_i.chars[0]["x1"],
# "y0": got_i.chars[0]["y0"],
# "y1": got_i.chars[0]["y1"],
# "debug_label": got_i.n,
#}])
def de_columnify_page(q, page):
result = cluster.Chars(page.chars, page).divide_into_columns()

View File

@ -40,6 +40,17 @@ class TestChars(unittest.TestCase):
}
for i in got
])
for i in got:
print(i.page.height, i.page.width, i._box())
debug.draw_boxes(i.page, [
{
"x0": i.chars[0]["x0"],
"x1": i.chars[0]["x1"],
"y0": i.chars[0]["y0"],
"y1": i.chars[0]["y1"],
"debug_label": i.n,
}
])
if __name__ == "__main__":
unittest.main()