import debug import cluster import config import pdfplumber def main(): with pdfplumber.open(config.INPUT) as pdf: for page in pdf.pages: got = cluster.Chars(page.chars, page).divide_into_columns() debug.draw_boxes(page, [ { "x0": i.chars[0]["x0"], "x1": i.chars[0]["x1"], "y0": i.chars[0]["y0"], "y1": i.chars[0]["y1"], "debug_label": i.n, } for i in got if i.chars ]) if __name__ == "__main__": main()