23 lines
556 B
Python
23 lines
556 B
Python
import debug
|
|
import cluster
|
|
import config
|
|
import pdfplumber
|
|
|
|
def main():
|
|
with pdfplumber.open(config.INPUT) as pdf:
|
|
for page in pdf.pages:
|
|
got = cluster.Chars(page.chars, page).divide_into_columns()
|
|
debug.draw_boxes(page, [
|
|
{
|
|
"x0": i.chars[0]["x0"],
|
|
"x1": i.chars[0]["x1"],
|
|
"y0": i.chars[0]["y0"],
|
|
"y1": i.chars[0]["y1"],
|
|
"debug_label": i.n,
|
|
}
|
|
for i in got if i.chars
|
|
])
|
|
|
|
if __name__ == "__main__":
|
|
main()
|