From cdb28bac6bd6d8f66480f173f2d1bf8c9b271864 Mon Sep 17 00:00:00 2001 From: larsm <a@a.a> Date: Wed, 27 Jun 2018 22:39:33 +0200 Subject: [PATCH] test verbessert --- pdf_parser.py | 2 +- pdf_parser_test.py | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/pdf_parser.py b/pdf_parser.py index 0d5dd66..08c5b1d 100644 --- a/pdf_parser.py +++ b/pdf_parser.py @@ -118,7 +118,7 @@ def read_template(p_template, p_text_element_list): """ Analysis all text elements according to the given template. """ - for key, pos in p_template.iteritems(): + for key, pos in p_template.items(): key_box = None for element in p_text_element_list: if key in element.get_text(): diff --git a/pdf_parser_test.py b/pdf_parser_test.py index d2afc01..8f9792a 100644 --- a/pdf_parser_test.py +++ b/pdf_parser_test.py @@ -1,15 +1,43 @@ # from pdf_parser import get_text_elements, generate_template, read_template -from template_manager import create_template +from template_manager import * -# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2017/Eingangsrechnungen/2017-08-12 rabe getraenke 33971.pdf" -path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-09-01 sipgate EB589544.pdf" -path = "pdf/2016-09-01 sipgate EB589544.pdf" -# e = get_text_elements(path) +# =========== create templates =========== -# examples = {"Rechnungsbetrag": "183,43", "Datum": "12.8.2017", "RECHNUNGSNr": "33971"} +# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-09-01 sipgate EB589544.pdf" +path = "pdf/2016-09-01 sipgate EB589544.pdf" examples = {"Rechnung": "EB589544", "Rechnungsdatum": "01.09.2016", "Mandatsreferenz": "8ADB836C18A8491092EA62F7F35F8A28"} create_template("sipgate", path, examples) +# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2017/Eingangsrechnungen/2017-08-12 rabe getraenke 33971.pdf" +path = "pdf/2017-08-12 rabe getränke 33971.pdf" +examples = {"Rechnungsbetrag": "183,43", "Datum": "12.8.2017", "RECHNUNGSNr": "33971"} +create_template("rabe", path, examples) + +# =========== use templates to analyse pdfs ans read data =========== + +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2016-09-01 sipgate EB589544.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-01-01 sipgate EB744727.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-03-01 sipgate EB817766.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-05-01 sipgate EB892848.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-07-01 sipgate EB967525.pdf') +evaluate_pdf(p_template_name='sipgate', p_path='pdf/2017-09-01 sipgate EB1040838.pdf') + +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-01-21 rabe 32834.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-03-22 rabe 33162.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-05-25 rabe Getraenke 33527.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-07-12 rabe getränke 33811.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-08-12 rabe getränke 33971.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-10-29 Rabe Getränke 34383.pdf') +evaluate_pdf(p_template_name='rabe', p_path='pdf/2017-12-08 Rabe Getränke 34589.pdf') + + + + + + + +# e = get_text_elements(path) + # g = generate_template(examples, e[0]) # path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-07-27 sipgate EB540908.pdf" -- GitLab