diff --git a/cluster.py b/cluster.py index 5d65604..4e3af2e 100644 --- a/cluster.py +++ b/cluster.py @@ -36,6 +36,7 @@ class Chars: result[-1].merge_in(char) else: result.append(Chars([char], self.page)) + # TODO remove headers, footers; maybe median font size vs. max of cluster? result = [i for i in result if i.n > 2] # merge all vertically overlapping boxes