Source code for textbox.evaluator.chrfplusplus_evaluator

# @Time   : 2021/4/19
# @Author : Lai Xu
# @Email  : tsui_lai@163.com

"""
textbox.evaluator.chrf++_evaluator
#######################################
"""

import re
import numpy as np
from nltk.util import ngrams
from collections import defaultdict, Counter
from textbox.evaluator.abstract_evaluator import AbstractEvaluator


[docs]class ChrfPlusPlusEvaluator(AbstractEvaluator): def __init__(self): self.char_n_grams = [1, 2, 3, 4, 5, 6] self.word_n_grams = [1, 2] self.beta = 3.0 def _preprocess(self, input_sentence, ignore_whitespace=True): if isinstance(input_sentence, (str, list)): if isinstance(input_sentence, list): input_sentence = " ".join(input_sentence) else: raise TypeError('Input must be a string or list') if ignore_whitespace: input_sentence = re.sub("\s+", "", input_sentence) return input_sentence def _generate_ngrams(self, input_sentence, task_type): result = defaultdict(lambda: Counter()) if task_type.lower() == "char": ngrams_list = self.char_n_grams elif task_type.lower() == "word": ngrams_list = self.word_n_grams else: raise KeyError("Task type should be in ['char', 'word']") for ngram in ngrams_list: ngram_dict = Counter(ngrams(input_sentence, ngram)) result[ngram] = ngram_dict return result def _ngrams_match(self, gen_ngrams, ref_ngrams): matchNgramCount = defaultdict(float) totalGenNgramCount = defaultdict(float) totalRefNgramCount = defaultdict(float) for index in ref_ngrams: for ngram in gen_ngrams[index]: totalGenNgramCount[index] += gen_ngrams[index][ngram] for ngram in ref_ngrams[index]: totalRefNgramCount[index] += ref_ngrams[index][ngram] if ngram in gen_ngrams[index]: matchNgramCount[index] += min(gen_ngrams[index][ngram], ref_ngrams[index][ngram]) return matchNgramCount, totalGenNgramCount, totalRefNgramCount def _calc_F(self, matchCount, genCount, refCount, beta=3.0): ngramF = defaultdict(float) ngramRecall = defaultdict(float) ngramPrec = defaultdict(float) for index in matchCount: if genCount[index] > 0: ngramPrec[index] = matchCount[index] / genCount[index] else: ngramPrec[index] = 0 if refCount[index] > 0: ngramRecall[index] = matchCount[index] / refCount[index] else: ngramRecall[index] = 0 denominator = pow(beta, 2) * ngramPrec[index] + ngramRecall[index] if denominator > 0: ngramF[index] = (1 + pow(beta, 2)) * ngramPrec[index] * ngramRecall[index] / denominator else: ngramF[index] = 0 return ngramF, ngramRecall, ngramPrec def _calc_metrics_info(self, generate_corpus, reference_corpus): r"""get metrics result Args: generate_corpus (List[List[str]]): the generated corpus reference_corpus (List[List[str]]): the referenced corpus Returns: dict: a dict of metrics <metric> which record the results according to self.n_grams """ reference_corpus = [[reference_sentence] for reference_sentence in reference_corpus] totalMatchWordCount = defaultdict(float) totalRefWordCount = defaultdict(float) totalGenWordCount = defaultdict(float) totalMatchCharCount = defaultdict(float) totalRefCharCount = defaultdict(float) totalGenCharCount = defaultdict(float) avgTotalF = .0 result = {} generate_corpus_process = [self._preprocess(generate_sentence) for generate_sentence in generate_corpus] reference_corpus_process = [] for reference_sentences in reference_corpus: reference_corpus_process.append([ self._preprocess(reference_sentence) for reference_sentence in reference_sentences ]) for i in range(len(generate_corpus)): curMatchWordCount = defaultdict(float) curRefWordCount = defaultdict(float) curGenWordCount = defaultdict(float) curMatchCharCount = defaultdict(float) curRefCharCount = defaultdict(float) curGenCharCount = defaultdict(float) generate_char_ngrams = self._generate_ngrams(input_sentence=generate_corpus_process[i], task_type='char') generate_word_ngrams = self._generate_ngrams(input_sentence=generate_corpus[i], task_type='word') cur_max_F = 0 for j in range(len(reference_corpus[i])): reference_char_ngrams = self._generate_ngrams( input_sentence=reference_corpus_process[i][j], task_type='char' ) reference_word_ngrams = self._generate_ngrams(input_sentence=reference_corpus[i][j], task_type='word') matchNgramWordCount, totalGenNgramWordCount, totalRefNgramWordCount = self._ngrams_match( gen_ngrams=generate_word_ngrams, ref_ngrams=reference_word_ngrams ) matchNgramCharCount, totalGenNgramCharCount, totalRefNgramCharCount = self._ngrams_match( gen_ngrams=generate_char_ngrams, ref_ngrams=reference_char_ngrams ) ngramWordF, _, _ = self._calc_F( matchNgramWordCount, totalGenNgramWordCount, totalRefNgramWordCount, beta=self.beta ) ngramCharF, _, _ = self._calc_F( matchNgramCharCount, totalGenNgramCharCount, totalRefNgramCharCount, beta=self.beta ) cur_F = (sum(ngramCharF.values()) + sum(ngramWordF.values())) / (max(self.char_n_grams) + max(self.word_n_grams)) if cur_F > cur_max_F: cur_max_F = cur_F curMatchWordCount = matchNgramWordCount curRefWordCount = totalRefNgramWordCount curGenWordCount = totalGenNgramWordCount curMatchCharCount = matchNgramCharCount curRefCharCount = totalRefNgramCharCount curGenCharCount = totalGenNgramCharCount for ngram in self.char_n_grams: totalMatchCharCount[ngram] += curMatchCharCount[ngram] totalRefCharCount[ngram] += curRefCharCount[ngram] totalGenCharCount[ngram] += curGenCharCount[ngram] for ngram in self.word_n_grams: totalMatchWordCount[ngram] += curMatchWordCount[ngram] totalRefWordCount[ngram] += curRefWordCount[ngram] totalGenWordCount[ngram] += curGenWordCount[ngram] avgTotalF += cur_max_F totalWordF, totalWordRecall, totalWordPrec = self._calc_F( totalMatchWordCount, totalGenWordCount, totalRefWordCount, beta=self.beta ) totalCharF, totalCharRecall, totalCharPrec = self._calc_F( totalMatchCharCount, totalGenCharCount, totalRefCharCount, beta=self.beta ) totalF = (sum(totalCharF.values()) + sum(totalWordF.values())) / (max(self.char_n_grams) + max(self.word_n_grams)) totalRecall = (sum(totalCharRecall.values()) + sum(totalWordRecall.values())) / (max(self.char_n_grams) + max(self.word_n_grams)) totalPrec = (sum(totalCharPrec.values()) + sum(totalWordPrec.values())) / (max(self.char_n_grams) + max(self.word_n_grams)) avgTotalF /= len(generate_corpus) result['precision'] = totalPrec result['recall'] = totalRecall result['document-F'] = totalF result['avg-sentence-F'] = avgTotalF result['beta'] = self.beta return result