Skip to content

Commit

Permalink
Merge pull request #3271 from flairNLP/3265-question-sentenceto_dictt…
Browse files Browse the repository at this point in the history
…ag_type=ner-no-longer-have-the-entities-key

recreate `to_dict` and add relations
  • Loading branch information
alanakbik authored Aug 8, 2023
2 parents c96660c + 14d5a07 commit 856e072
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 15 deletions.
50 changes: 36 additions & 14 deletions flair/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,14 @@ def set_label(self, typename: str, value: str, score: float = 1.0):
else:
DataPoint.set_label(self, typename=typename, value=value, score=score)

def to_dict(self, tag_type: Optional[str] = None):
return {
"text": self.text,
"start_pos": self.start_position,
"end_pos": self.end_position,
"labels": [label.to_dict() for label in self.get_labels(tag_type)],
}


class Span(_PartOfSentence):
"""This class represents one textual span consisting of Tokens."""
Expand Down Expand Up @@ -604,6 +612,14 @@ def __len__(self) -> int:
def embedding(self):
return self.get_embedding()

def to_dict(self, tag_type: Optional[str] = None):
return {
"text": self.text,
"start_pos": self.start_position,
"end_pos": self.end_position,
"labels": [label.to_dict() for label in self.get_labels(tag_type)],
}


class Relation(_PartOfSentence):
def __new__(self, first: Span, second: Span):
Expand Down Expand Up @@ -664,6 +680,15 @@ def end_position(self) -> int:
def embedding(self):
pass

def to_dict(self, tag_type: Optional[str] = None):
return {
"from_text": self.first.text,
"to_text": self.second.text,
"from_idx": self.first.tokens[0].idx - 1,
"to_idx": self.second.tokens[0].idx - 1,
"labels": [label.to_dict() for label in self.get_labels(tag_type)],
}


class Sentence(DataPoint):
"""A Sentence is a list of tokens and is used to represent a sentence or text fragment."""
Expand Down Expand Up @@ -760,17 +785,17 @@ def __init__(
def unlabeled_identifier(self):
return f'Sentence[{len(self)}]: "{self.text}"'

def get_relations(self, type: str) -> List[Relation]:
def get_relations(self, label_type: Optional[str] = None) -> List[Relation]:
relations: List[Relation] = []
for label in self.get_labels(type):
for label in self.get_labels(label_type):
if isinstance(label.data_point, Relation):
relations.append(label.data_point)
return relations

def get_spans(self, type: str) -> List[Span]:
def get_spans(self, label_type: Optional[str] = None) -> List[Span]:
spans: List[Span] = []
for potential_span in self._known_spans.values():
if isinstance(potential_span, Span) and potential_span.has_label(type):
if isinstance(potential_span, Span) and (label_type is None or potential_span.has_label(label_type)):
spans.append(potential_span)
return sorted(spans)

Expand Down Expand Up @@ -937,16 +962,13 @@ def to_original_text(self) -> str:
).strip()

def to_dict(self, tag_type: Optional[str] = None):
labels = []

if tag_type:
labels = [label.to_dict() for label in self.get_labels(tag_type)]
return {"text": self.to_original_text(), tag_type: labels}

if self.labels:
labels = [label.to_dict() for label in self.labels]

return {"text": self.to_original_text(), "all labels": labels}
return {
"text": self.to_original_text(),
"labels": [label.to_dict() for label in self.get_labels(tag_type) if label.data_point is self],
"entities": [span.to_dict(tag_type) for span in self.get_spans(tag_type)],
"relations": [relation.to_dict(tag_type) for relation in self.get_relations(tag_type)],
"tokens": [token.to_dict(tag_type) for token in self.tokens],
}

def get_span(self, start: int, stop: int):
span_slice = slice(start, stop)
Expand Down
2 changes: 1 addition & 1 deletion flair/models/relation_classifier_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def _valid_entities(self, sentence: Sentence) -> Iterator[_Entity]:
:return: Valid entities as `_Entity`
"""
for label_type, valid_labels in self.entity_label_types.items():
for entity_span in sentence.get_spans(type=label_type):
for entity_span in sentence.get_spans(label_type=label_type):
entity_label: Label = entity_span.get_label(label_type=label_type)

# Only use entities labelled with the specified labels for each label type
Expand Down

0 comments on commit 856e072

Please sign in to comment.