Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
warpDF-analyser
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
leo
warpDF-analyser
Commits
cdb28bac
Commit
cdb28bac
authored
6 years ago
by
larsm
Browse files
Options
Downloads
Patches
Plain Diff
test verbessert
parent
a7fc7a95
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
pdf_parser.py
+1
-1
1 addition, 1 deletion
pdf_parser.py
pdf_parser_test.py
+34
-6
34 additions, 6 deletions
pdf_parser_test.py
with
35 additions
and
7 deletions
pdf_parser.py
+
1
−
1
View file @
cdb28bac
...
@@ -118,7 +118,7 @@ def read_template(p_template, p_text_element_list):
...
@@ -118,7 +118,7 @@ def read_template(p_template, p_text_element_list):
"""
"""
Analysis all text elements according to the given template.
Analysis all text elements according to the given template.
"""
"""
for
key
,
pos
in
p_template
.
iter
items
():
for
key
,
pos
in
p_template
.
items
():
key_box
=
None
key_box
=
None
for
element
in
p_text_element_list
:
for
element
in
p_text_element_list
:
if
key
in
element
.
get_text
():
if
key
in
element
.
get_text
():
...
...
This diff is collapsed.
Click to expand it.
pdf_parser_test.py
+
34
−
6
View file @
cdb28bac
# from pdf_parser import get_text_elements, generate_template, read_template
# from pdf_parser import get_text_elements, generate_template, read_template
from
template_manager
import
create_template
from
template_manager
import
*
# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2017/Eingangsrechnungen/2017-08-12 rabe getraenke 33971.pdf"
# =========== create templates ===========
path
=
"
/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-09-01 sipgate EB589544.pdf
"
path
=
"
pdf/2016-09-01 sipgate EB589544.pdf
"
# e = get_text_elements(path)
# examples = {"Rechnungsbetrag": "183,43", "Datum": "12.8.2017", "RECHNUNGSNr": "33971"}
# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-09-01 sipgate EB589544.pdf"
path
=
"
pdf/2016-09-01 sipgate EB589544.pdf
"
examples
=
{
"
Rechnung
"
:
"
EB589544
"
,
"
Rechnungsdatum
"
:
"
01.09.2016
"
,
"
Mandatsreferenz
"
:
"
8ADB836C18A8491092EA62F7F35F8A28
"
}
examples
=
{
"
Rechnung
"
:
"
EB589544
"
,
"
Rechnungsdatum
"
:
"
01.09.2016
"
,
"
Mandatsreferenz
"
:
"
8ADB836C18A8491092EA62F7F35F8A28
"
}
create_template
(
"
sipgate
"
,
path
,
examples
)
create_template
(
"
sipgate
"
,
path
,
examples
)
# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2017/Eingangsrechnungen/2017-08-12 rabe getraenke 33971.pdf"
path
=
"
pdf/2017-08-12 rabe getränke 33971.pdf
"
examples
=
{
"
Rechnungsbetrag
"
:
"
183,43
"
,
"
Datum
"
:
"
12.8.2017
"
,
"
RECHNUNGSNr
"
:
"
33971
"
}
create_template
(
"
rabe
"
,
path
,
examples
)
# =========== use templates to analyse pdfs ans read data ===========
evaluate_pdf
(
p_template_name
=
'
sipgate
'
,
p_path
=
'
pdf/2016-09-01 sipgate EB589544.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
sipgate
'
,
p_path
=
'
pdf/2017-01-01 sipgate EB744727.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
sipgate
'
,
p_path
=
'
pdf/2017-03-01 sipgate EB817766.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
sipgate
'
,
p_path
=
'
pdf/2017-05-01 sipgate EB892848.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
sipgate
'
,
p_path
=
'
pdf/2017-07-01 sipgate EB967525.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
sipgate
'
,
p_path
=
'
pdf/2017-09-01 sipgate EB1040838.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
rabe
'
,
p_path
=
'
pdf/2017-01-21 rabe 32834.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
rabe
'
,
p_path
=
'
pdf/2017-03-22 rabe 33162.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
rabe
'
,
p_path
=
'
pdf/2017-05-25 rabe Getraenke 33527.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
rabe
'
,
p_path
=
'
pdf/2017-07-12 rabe getränke 33811.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
rabe
'
,
p_path
=
'
pdf/2017-08-12 rabe getränke 33971.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
rabe
'
,
p_path
=
'
pdf/2017-10-29 Rabe Getränke 34383.pdf
'
)
evaluate_pdf
(
p_template_name
=
'
rabe
'
,
p_path
=
'
pdf/2017-12-08 Rabe Getränke 34589.pdf
'
)
# e = get_text_elements(path)
# g = generate_template(examples, e[0])
# g = generate_template(examples, e[0])
# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-07-27 sipgate EB540908.pdf"
# path = "/home/reverend/Programmierung/git_projects/warpzone-rechnungen/2016/Eingangsrechnungen/2016-07-27 sipgate EB540908.pdf"
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment