From 31b0dbf6b37ad3ecb3eb2eb740f696bc7c6ba543 Mon Sep 17 00:00:00 2001
From: Leonhard Strohmidel <strohmidel@sitepark.com>
Date: Tue, 30 Apr 2019 15:20:44 +0200
Subject: [PATCH] +experimental evaluation

---
 evaluation.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 evaluation.py

diff --git a/evaluation.py b/evaluation.py
new file mode 100644
index 0000000..bd4294d
--- /dev/null
+++ b/evaluation.py
@@ -0,0 +1,83 @@
+from model import *
+from peewee import fn
+import json
+
+punctuation = ['!', '?', '.', ',', ';', ':', '-', '_', '"', "'", 'â€¦', 'â€”', '#', '@']
+semantic = ['e.v.', 'ev', 'ev.', 'e.v', 'herr', 'hr', 'frau', 'fr', 'ag', 'gmbh', '& co kg']
+
+def remove_punctuation(text):
+    for char in punctuation:
+        text = text.replace(char, " ")
+    return text
+
+def remove_semantics(text):
+    for char in semantic:
+        text = text.replace(char, " ")
+    return text
+
+def replace_umlautes(text):
+    text = text.replace("oe", "Ã¶")
+    text = text.replace("ÃŸ", "ss")
+    text = text.replace("ae", "Ã¤")
+    text = text.replace("ue", "Ã¼")
+    return text
+
+
+def normalize_text(text):
+    text = replace_umlautes(text)
+    text = remove_punctuation(text)
+    text = text.lower()
+    splitted = text.split(" ")
+    temp = ""
+    for splinter in splitted:
+        if len(splinter) > 0:
+            temp += splinter + " "
+    text = temp.strip()
+    return text
+
+
+def normalize_answers():
+    answers = Answer.select()
+    for answer in answers:
+        text = answer.text
+        text = normalize_text(text)
+        answer.text = text
+        answer.save()
+
+def get_answer_groups_for_question(question):
+    return Answer.select().group_by(Answer.text).where(Answer.question == question)
+
+def update_answer_text_batch(updates=[("", "")]):
+    for u in updates:
+        update_answer_text(u[0], u[1])
+
+def update_answer_text(old, new):
+    answers = Answer.select().where(Answer.text == old)
+    for a in answers:
+        a.text = new
+        a.save()
+
+def update_answer_from_file():
+    with open("answer_update.json", "r") as file:
+        jzon = json.load(file)
+        update_answer_text_batch(jzon)
+
+def generate_answer_classes():
+
+
+def aggregate_answer_file():
+    questions = Question.select()
+    for q in questions:
+        answer_list = []
+        answers = get_answer_groups_for_question(q)
+        for a in answers:
+            count = Answer.select(fn.count(Answer.id)).where(Answer.text == a.text).scalar()
+            count = int(count)
+            answer_list.append((count, a.text))
+        answer_list = sorted(answer_list, key=lambda answer: answer[0])
+        answer_dict = {"question": q.text, "answers": answer_list}
+        with open("q%d.json" % q.id, "w") as file:
+            json.dump(answer_dict, file, indent=4, sort_keys=True)
+
+        
+
-- 
GitLab