diff --git a/cluster.py b/cluster.py index 4598a43..4319e76 100644 --- a/cluster.py +++ b/cluster.py @@ -76,11 +76,18 @@ class Chars: for i in result: i.merge() - for j in range(len(i.chars)): - i.chars[j]["x0"] -= median_height - i.chars[j]["x1"] += median_height - i.chars[j]["y0"] -= median_height * 3 - i.chars[j]["y1"] += median_height * 3 + assert(len(i.chars) == 1) + i.chars[0]["x0"] -= median_height + i.chars[0]["x1"] += median_height + i.chars[0]["y0"] -= median_height + i.chars[0]["y1"] += median_height + bounds = i._box() + i.page = self.page.crop(( + bounds.x0, + self.page.height - bounds.y1, + bounds.x1, + self.page.height - bounds.y0, + ), relative=True) return result diff --git a/main.py b/main.py index 17418f0..724467b 100644 --- a/main.py +++ b/main.py @@ -22,13 +22,14 @@ def de_column_ify(): got = q.get() for got_i in got[1]: if got_i.chars: - debug.draw_boxes(got_i.page, [{ - "x0": got_i.chars[0]["x0"], - "x1": got_i.chars[0]["x1"], - "y0": got_i.chars[0]["y0"], - "y1": got_i.chars[0]["y1"], - "debug_label": got_i.n, - }]) + debug.debug_show(debug.debug_im(got_i.page)) + #debug.draw_boxes(got_i.page, [{ + # "x0": got_i.chars[0]["x0"], + # "x1": got_i.chars[0]["x1"], + # "y0": got_i.chars[0]["y0"], + # "y1": got_i.chars[0]["y1"], + # "debug_label": got_i.n, + #}]) def de_columnify_page(q, page): result = cluster.Chars(page.chars, page).divide_into_columns() diff --git a/test_cluster.py b/test_cluster.py index 797b9db..ffac0d9 100644 --- a/test_cluster.py +++ b/test_cluster.py @@ -40,6 +40,17 @@ class TestChars(unittest.TestCase): } for i in got ]) + for i in got: + print(i.page.height, i.page.width, i._box()) + debug.draw_boxes(i.page, [ + { + "x0": i.chars[0]["x0"], + "x1": i.chars[0]["x1"], + "y0": i.chars[0]["y0"], + "y1": i.chars[0]["y1"], + "debug_label": i.n, + } + ]) if __name__ == "__main__": unittest.main()