Skip to content
Snippets Groups Projects
Commit 31b0dbf6 authored by Leonhard Strohmidel's avatar Leonhard Strohmidel
Browse files

+experimental evaluation

parent ce87a664
No related branches found
No related tags found
No related merge requests found
from model import *
from peewee import fn
import json
punctuation = ['!', '?', '.', ',', ';', ':', '-', '_', '"', "'", '', '', '#', '@']
semantic = ['e.v.', 'ev', 'ev.', 'e.v', 'herr', 'hr', 'frau', 'fr', 'ag', 'gmbh', '& co kg']
def remove_punctuation(text):
for char in punctuation:
text = text.replace(char, " ")
return text
def remove_semantics(text):
for char in semantic:
text = text.replace(char, " ")
return text
def replace_umlautes(text):
text = text.replace("oe", "ö")
text = text.replace("ß", "ss")
text = text.replace("ae", "ä")
text = text.replace("ue", "ü")
return text
def normalize_text(text):
text = replace_umlautes(text)
text = remove_punctuation(text)
text = text.lower()
splitted = text.split(" ")
temp = ""
for splinter in splitted:
if len(splinter) > 0:
temp += splinter + " "
text = temp.strip()
return text
def normalize_answers():
answers = Answer.select()
for answer in answers:
text = answer.text
text = normalize_text(text)
answer.text = text
answer.save()
def get_answer_groups_for_question(question):
return Answer.select().group_by(Answer.text).where(Answer.question == question)
def update_answer_text_batch(updates=[("", "")]):
for u in updates:
update_answer_text(u[0], u[1])
def update_answer_text(old, new):
answers = Answer.select().where(Answer.text == old)
for a in answers:
a.text = new
a.save()
def update_answer_from_file():
with open("answer_update.json", "r") as file:
jzon = json.load(file)
update_answer_text_batch(jzon)
def generate_answer_classes():
def aggregate_answer_file():
questions = Question.select()
for q in questions:
answer_list = []
answers = get_answer_groups_for_question(q)
for a in answers:
count = Answer.select(fn.count(Answer.id)).where(Answer.text == a.text).scalar()
count = int(count)
answer_list.append((count, a.text))
answer_list = sorted(answer_list, key=lambda answer: answer[0])
answer_dict = {"question": q.text, "answers": answer_list}
with open("q%d.json" % q.id, "w") as file:
json.dump(answer_dict, file, indent=4, sort_keys=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment