diff --git a/cluster.py b/cluster.py index 5ddf250..66ab95f 100644 --- a/cluster.py +++ b/cluster.py @@ -82,10 +82,10 @@ class Chars: j += 1 i.merge() assert(len(i.chars) == 1) - i.chars[0]["x0"] -= median_height - i.chars[0]["x1"] += median_height - i.chars[0]["y0"] -= median_height - i.chars[0]["y1"] += median_height + #i.chars[0]["x0"] -= median_height + #i.chars[0]["x1"] += median_height + #i.chars[0]["y0"] -= median_height + #i.chars[0]["y1"] += median_height bounds = i._box() original_reader = pypdf.PdfReader(self.path) @@ -95,10 +95,10 @@ class Chars: modified_page.trimbox.upper_left = (bounds.x1, bounds.y0) modified_page.trimbox.lower_right = (bounds.x0, bounds.y1) modified_page.trimbox.lower_left = (bounds.x1, bounds.y1) - modified_page.cropbox.upper_right = (bounds.x0, bounds.y0) - modified_page.cropbox.upper_left = (bounds.x1, bounds.y0) - modified_page.cropbox.lower_right = (bounds.x0, bounds.y1) - modified_page.cropbox.lower_left = (bounds.x1, bounds.y1) + modified_page.cropbox.upper_right = (bounds.x0, bounds.y0-median_height) + modified_page.cropbox.upper_left = (bounds.x1, bounds.y0-median_height) + modified_page.cropbox.lower_right = (bounds.x0, bounds.y1+median_height) + modified_page.cropbox.lower_left = (bounds.x1, bounds.y1+median_height) modified_writer.add_page(modified_page) modified_path = "{}/{}-{:03d}-{}.modified.pdf".format( config.TEMP_DIR,