diff --git a/pdf_parser.py b/pdf_parser.py index 0d5dd66352511dd8a57979d6ce53dc41a279f88f..08c5b1d2929488b6d20eb9138c0215234d4bf718 100644 --- a/pdf_parser.py +++ b/pdf_parser.py @@ -118,7 +118,7 @@ def read_template(p_template, p_text_element_list): """ Analysis all text elements according to the given template. """ - for key, pos in p_template.iteritems(): + for key, pos in p_template.items(): key_box = None for element in p_text_element_list: if key in element.get_text(): diff --git a/pdf_parser_test.py b/pdf_parser_test.py index d2afc0194dc18424e31ae3739ed4f3a4f1e69445..8f9792a15386919ddf9050dc0f2986a4ec271633 100644 --- a/pdf_parser_test.py +++ b/pdf_parser_test.py @@ -1,15 +1,43 @@ # from pdf_parser import get_text_elements, generate_template, read_template -from template_manager import create_template +from template_manager import * -# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2017/Eingangsrechnungen/2017-08-12 rabe getraenke 33971.pdf" -path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-09-01 sipgate EB589544.pdf" -path = "pdf/2016-09-01 sipgate EB589544.pdf" -# e = get_text_elements(path) +# =========== create templates =========== -# examples = {"Rechnungsbetrag": "183,43", "Datum": "12.8.2017", "RECHNUNGSNr": "33971"} +# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-09-01 sipgate EB589544.pdf" +path = "pdf/2016-09-01 sipgate EB589544.pdf" examples = {"Rechnung": "EB589544", "Rechnungsdatum": "01.09.2016", "Mandatsreferenz": "8ADB836C18A8491092EA62F7F35F8A28"} create_template("sipgate", path, examples) +# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2017/Eingangsrechnungen/2017-08-12 rabe getraenke 33971.pdf" +path = "pdf/2017-08-12 rabe getränke 33971.pdf" +examples = {"Rechnungsbetrag": "183,43", "Datum": "12.8.2017", "RECHNUNGSNr": "33971"} +create_template("rabe", path, examples) + +# =========== use templates to analyse pdfs ans read data =========== + +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2016-09-01 sipgate EB589544.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-01-01 sipgate EB744727.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-03-01 sipgate EB817766.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-05-01 sipgate EB892848.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-07-01 sipgate EB967525.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-09-01 sipgate EB1040838.pdf') + +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-01-21 rabe 32834.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-03-22 rabe 33162.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-05-25 rabe Getraenke 33527.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-07-12 rabe getränke 33811.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-08-12 rabe getränke 33971.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-10-29 Rabe Getränke 34383.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-12-08 Rabe Getränke 34589.pdf') + + + + + + + +# e = get_text_elements(path) + # g = generate_template(examples, e[0]) # path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-07-27 sipgate EB540908.pdf"