diff --git a/evaluation.py b/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..bd4294dfd5508a1a1b9ce1bae876dc051b3d55c1 --- /dev/null +++ b/evaluation.py @@ -0,0 +1,83 @@ +from model import * +from peewee import fn +import json + +punctuation = ['!', '?', '.', ',', ';', ':', '-', '_', '"', "'", '…', '—', '#', '@'] +semantic = ['e.v.', 'ev', 'ev.', 'e.v', 'herr', 'hr', 'frau', 'fr', 'ag', 'gmbh', '& co kg'] + +def remove_punctuation(text): + for char in punctuation: + text = text.replace(char, " ") + return text + +def remove_semantics(text): + for char in semantic: + text = text.replace(char, " ") + return text + +def replace_umlautes(text): + text = text.replace("oe", "ö") + text = text.replace("ß", "ss") + text = text.replace("ae", "ä") + text = text.replace("ue", "ü") + return text + + +def normalize_text(text): + text = replace_umlautes(text) + text = remove_punctuation(text) + text = text.lower() + splitted = text.split(" ") + temp = "" + for splinter in splitted: + if len(splinter) > 0: + temp += splinter + " " + text = temp.strip() + return text + + +def normalize_answers(): + answers = Answer.select() + for answer in answers: + text = answer.text + text = normalize_text(text) + answer.text = text + answer.save() + +def get_answer_groups_for_question(question): + return Answer.select().group_by(Answer.text).where(Answer.question == question) + +def update_answer_text_batch(updates=[("", "")]): + for u in updates: + update_answer_text(u[0], u[1]) + +def update_answer_text(old, new): + answers = Answer.select().where(Answer.text == old) + for a in answers: + a.text = new + a.save() + +def update_answer_from_file(): + with open("answer_update.json", "r") as file: + jzon = json.load(file) + update_answer_text_batch(jzon) + +def generate_answer_classes(): + + +def aggregate_answer_file(): + questions = Question.select() + for q in questions: + answer_list = [] + answers = get_answer_groups_for_question(q) + for a in answers: + count = Answer.select(fn.count(Answer.id)).where(Answer.text == a.text).scalar() + count = int(count) + answer_list.append((count, a.text)) + answer_list = sorted(answer_list, key=lambda answer: answer[0]) + answer_dict = {"question": q.text, "answers": answer_list} + with open("q%d.json" % q.id, "w") as file: + json.dump(answer_dict, file, indent=4, sort_keys=True) + + +