From a7fc7a956ca0d3c30a15b824bed1ca80022119d4 Mon Sep 17 00:00:00 2001
From: leo <Leo.Strohmidel@gmx.net>
Date: Wed, 27 Jun 2018 20:20:56 +0200
Subject: [PATCH] +comments

---
 pdf_parser.py       | 22 ++++++++++++++++++++++
 template_manager.py |  7 +++++++
 2 files changed, 29 insertions(+)

diff --git a/pdf_parser.py b/pdf_parser.py
index d1727ba..0d5dd66 100644
--- a/pdf_parser.py
+++ b/pdf_parser.py
@@ -12,6 +12,9 @@ from pdfminer.layout import LTTextBoxHorizontal
 
 
 def get_text_elements(p_path):
+    """
+    Gets all text-elements of the given document as list.
+    """
     fp = open(p_path, 'rb')
     parser = PDFParser(fp)
     document = PDFDocument(parser)
@@ -37,6 +40,11 @@ def get_text_elements(p_path):
 
 
 def find_relative_position(p_key_box, p_value_box):
+    """
+    Calculates the distance from key_box to value_box in order to provide a
+    position relative to key_box. Example:
+    key_box conatains "Total" and the value_box "34.42â‚¬"
+    """
     kx0, ky0, kx1, ky1 = p_key_box
     vx0, vy0, vx1, vy1 = p_value_box
     center_value_x = (abs(vx0 - vx1) / 2.0) + vx0
@@ -45,6 +53,9 @@ def find_relative_position(p_key_box, p_value_box):
 
 
 def get_word_offset(p_sentence, p_word, p_seperator=" "):
+    """
+    Gets the count (index) of a word in a sentence/string
+    """
     # das wievielte wort eines satzes
     p_sentence = str(p_sentence)
     p_sentence = p_sentence.replace("\n", " ")
@@ -58,6 +69,11 @@ def get_word_offset(p_sentence, p_word, p_seperator=" "):
 
 
 def generate_template(p_example_dict, p_text_element_list):
+    """
+    Generates a template from examples.
+    The exmample is a dict containing key/value pair that are found in the
+    element list.
+    """
     template = {}
     for key, value in p_example_dict.items():
         key_box = None
@@ -86,6 +102,9 @@ def generate_template(p_example_dict, p_text_element_list):
 
 
 def is_point_in_box(p_point, p_box, p_padding=5):
+    """
+    Checks wether a point is in a specific box. The point can be padding of.
+    """
     x0, y0, x1, y1 = p_box
     x, y = p_point
     x0 -= p_padding
@@ -96,6 +115,9 @@ def is_point_in_box(p_point, p_box, p_padding=5):
 
 
 def read_template(p_template, p_text_element_list):
+    """
+    Analysis all text elements according to the given template.
+    """
     for key, pos in p_template.iteritems():
         key_box = None
         for element in p_text_element_list:
diff --git a/template_manager.py b/template_manager.py
index 574ea0d..8e18e49 100644
--- a/template_manager.py
+++ b/template_manager.py
@@ -3,6 +3,10 @@ import json
 
 
 def create_template(p_name, p_path, p_examples):
+    """
+    Generates a template from example and pdf file and
+    stores it as json
+    """
     elements = get_text_elements(p_path)
     template = generate_template(p_examples, elements)
     data = [p_examples, template]
@@ -12,6 +16,9 @@ def create_template(p_name, p_path, p_examples):
 
 
 def evaluate_pdf(p_template_name, p_path):
+    """
+    Analysis a pdf according to the given template
+    """
 
     with open(p_template_name + '.template') as f:
         data = json.load(f)
-- 
GitLab