Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions graphgen/utils/calculate_confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ def _normalize_yes_no(tokens: List[Token]) -> Dict[str, float]:
{"yes": 0.8, "no": 0.2}
Among them, "yes" and "yeah" are synonyms for "yes",
while "no" and "nope" are synonyms for "no".
If neither "yes" nor "no" synonyms are present, it returns:
{"yes": 0.5, "no": 0.5}
If no "yes" or "no" synonyms are present, it will be judged as uncertain.
An uncertain result will also be considered as opposite to the ground truth.
"""
yes_syno = {
# English yes synonyms
Expand Down Expand Up @@ -126,17 +126,23 @@ def _normalize_yes_no(tokens: List[Token]) -> Dict[str, float]:

yes_prob = 0.0
no_prob = 0.0
uncertain_prob = 0.0
for tok in tokens:
t = tok.text.lower().strip()
if t in yes_syno:
yes_prob += tok.prob
elif t in no_syno:
no_prob += tok.prob
else:
uncertain_prob += tok.prob

total = yes_prob + no_prob + uncertain_prob

total = yes_prob + no_prob
if total == 0:
return {"yes": 0.5, "no": 0.5}
return {"yes": yes_prob / total, "no": no_prob / total}
return {
"yes": yes_prob / total,
"no": no_prob / total,
"uncertain": uncertain_prob / total,
}


def yes_no_loss_entropy(
Expand Down