Add biomedical entity normalization #1889
Annotations
10 errors
test:
flair/__init__.py#L1
mypy-status
mypy exited with status 1.
|
test:
flair/data.py#L1
flair/data.py
12: error: Unused "type: ignore" comment [unused-ignore]
329: note: "add_label" of "DataPoint" defined here
529: error: Argument 2 of "add_label" is incompatible with supertype "DataPoint"; supertype defines the argument type as "Union[str, Label]" [override]
529: note: This violates the Liskov substitution principle
529: note: See https://mypy.readthedocs.io/en/stable/common_issues.html#incompatible-overrides
626: error: Argument 2 of "add_label" is incompatible with supertype "DataPoint"; supertype defines the argument type as "Union[str, Label]" [override]
626: note: This violates the Liskov substitution principle
626: note: See https://mypy.readthedocs.io/en/stable/common_issues.html#incompatible-overrides
632: error: Unexpected keyword argument "value" for "add_label" of "DataPoint" [call-arg]
329: note: "add_label" of "DataPoint" defined here
329: note: "add_label" of "DataPoint" defined here
329: note: "add_label" of "DataPoint" defined here
329: note: "add_label" of "DataPoint" defined here
329: note: "add_label" of "DataPoint" defined here
|
test:
flair/data.py#L341
ruff
pytest_ruff.RuffError: flair/data.py:9:52: F401 [*] `typing.Tuple` imported but unused
|
7 | from operator import itemgetter
8 | from pathlib import Path
9 | from typing import Dict, Iterable, List, Optional, Tuple, Union, cast
| ^^^^^ F401
10 |
11 | import torch
|
= help: Remove unused import: `typing.Tuple`
flair/data.py:12:36: PGH003 Use specific rule codes when ignoring type issues
|
11 | import torch
12 | from deprecated import deprecated # type: ignore
| ^^^^^^^^^^^^^^ PGH003
13 | from torch.utils.data import Dataset, IterableDataset
14 | from torch.utils.data.dataset import ConcatDataset, Subset
|
flair/data.py:427:5: D415 [*] First line should end with a period, question mark, or exclamation point
|
426 | class EntityLinkingCandidate:
427 | """Represent a single candidate returned by a CandidateGenerator"""
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ D415
428 |
429 | def __init__(
|
= help: Add closing punctuation
flair/data.py:437:9: D205 1 blank line required between summary line and description
|
435 | additional_ids: Optional[Union[List[str], str]] = None,
436 | ):
437 | """
| _________^
438 | | :param concept_id: Identifier of the entity / concept from the knowledge base / ontology
439 | | :param concept_name: (Canonical) name of the entity / concept from the knowledge base / ontology
440 | | :param score: Matching score of the entity / concept according to the entity mention
441 | | :param additional_ids: List of additional identifiers for the concept / entity in the KB / ontology
442 | | :param database_name: Name of the knowlege base / ontology
443 | | """
| |___________^ D205
444 | self.concept_id = concept_id
445 | self.concept_name = concept_name
|
= help: Insert single blank line
flair/data.py:437:9: D212 [*] Multi-line docstring summary should start at the first line
|
435 | additional_ids: Optional[Union[List[str], str]] = None,
436 | ):
437 | """
| _________^
438 | | :param concept_id: Identifier of the entity / concept from the knowledge base / ontology
439 | | :param concept_name: (Canonical) name of the entity / concept from the knowledge base / ontology
440 | | :param score: Matching score of the entity / concept according to the entity mention
441 | | :param additional_ids: List of additional identifiers for the concept / entity in the KB / ontology
442 | | :param database_name: Name of the knowlege base / ontology
443 | | """
| |___________^ D212
444 | self.concept_id = concept_id
445 | self.concept_name = concept_name
|
= help: Remove whitespace after opening quotes
flair/data.py:461:5: D205 1 blank line required between summary line and description
|
460 | class EntityLinkingLabel(Label):
461 | """
| _____^
462 | | Label class models entity linking annotations. Each entity linking label has a data point it refers
463 | | to as well as the identifier and name of the concept / entity from a knowledge base or ontology.
464 | | Optionally, additional concepts identifier and the database name can be provided.
465 | | """
| |_______^ D205
466 |
467 | def __init__(self, data_point: DataPoint, candidates: List[EntityLinkingCandidate]):
|
= help: Insert single blank line
flair/data.py:461:5: D212 [*] Multi-line docstring summary should start at the first line
|
460 | class EntityLinkingLabel(Label):
461 | """
| _____^
462 | | Label class models entity linking annotations. Each entity linking label has a data point it refers
463 | | to as well as the identifier and name of the concept / entity from a knowledge base or ontology.
464 | | Optionally, additional concepts identifier and the database name can be provided.
4
|
test:
flair/datasets/biomedical.py#L1
flair/datasets/biomedical.py
514: error: Incompatible default for argument "base_path" (default has type "None", argument has type "Union[str, Path]") [assignment]
514: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True
514: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase
600: error: Incompatible default for argument "base_path" (default has type "None", argument has type "Union[str, Path]") [assignment]
600: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True
600: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase
672: error: Incompatible default for argument "base_path" (default has type "None", argument has type "Union[str, Path]") [assignment]
672: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True
672: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase
742: error: Incompatible default for argument "base_path" (default has type "None", argument has type "Union[str, Path]") [assignment]
742: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True
742: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase
833: error: Incompatible default for argument "base_path" (default has type "None", argument has type "Union[str, Path]") [assignment]
833: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True
833: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase
2052: error: "Type[CHEMDNER]" has no attribute "default_dir" [attr-defined]
2096: error: Incompatible default for argument "tokenizer" (default has type "None", argument has type "Tokenizer") [assignment]
2096: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True
2096: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase
|
test:
flair/datasets/biomedical.py#L1
Black format check
--- /home/runner/work/flair/flair/flair/datasets/biomedical.py 2023-07-12 16:11:43.298681 +0000
+++ /home/runner/work/flair/flair/flair/datasets/biomedical.py 2023-07-12 16:16:58.689536 +0000
@@ -2035,11 +2035,10 @@
conll_writer.write_to_conll(dev_data, dev_file)
conll_writer.write_to_conll(test_data, test_file)
super().__init__(data_folder, columns, in_memory=in_memory)
-
@staticmethod
def download_dataset(data_dir: Path):
data_url = "https://biocreative.bioinformatics.udel.edu/media/store/files/2014/chemdner_corpus.tar.gz"
data_path = cached_path(data_url, data_dir)
unpack_file(data_path, data_dir)
@@ -4220,11 +4219,10 @@
conll_writer.write_to_conll(dev_data, dev_file)
conll_writer.write_to_conll(test_data, test_file)
super().__init__(data_folder, columns, in_memory=in_memory)
-
@staticmethod
@AbstractMethod
def download_corpus(data_folder: Path) -> Tuple[Path, Path, Path]:
pass
@@ -4393,11 +4391,10 @@
conll_writer.write_to_conll(dev_data, dev_file)
conll_writer.write_to_conll(test_data, test_file)
super().__init__(data_folder, columns, in_memory=in_memory)
-
@staticmethod
@AbstractMethod
def download_corpus(data_folder: Path):
corpus_url = "http://nactem.ac.uk/anatomytagger/AnatEM-1.0.2.tar.gz"
corpus_archive = cached_path(corpus_url, data_folder)
@@ -4619,11 +4616,10 @@
BioBertHelper.convert_and_write(common_path / "JNLPBA", data_folder, tag_type=GENE_TAG)
super().__init__(data_folder, columns, in_memory=in_memory)
-
class BIOBERT_CHEMICAL_BC5CDR(ColumnCorpus):
"""BC5CDR corpus with chemical annotations as used in the evaluation of BioBERT.
For further details regarding BioBERT and it's evaluation, see Lee
et al.:
@@ -4651,11 +4647,10 @@
BioBertHelper.convert_and_write(common_path / "BC5CDR-chem", data_folder, tag_type=CHEMICAL_TAG)
super().__init__(data_folder, columns, in_memory=in_memory)
-
class BIOBERT_DISEASE_BC5CDR(ColumnCorpus):
"""BC5CDR corpus with disease annotations as used in the evaluation of BioBERT.
For further details regarding BioBERT and it's evaluation, see Lee
et al.:
@@ -4683,11 +4678,10 @@
BioBertHelper.convert_and_write(common_path / "BC5CDR-disease", data_folder, tag_type=DISEASE_TAG)
super().__init__(data_folder, columns, in_memory=in_memory)
-
class BIOBERT_DISEASE_NCBI(ColumnCorpus):
"""NCBI disease corpus as used in the evaluation of BioBERT.
For further details regarding BioBERT and it's evaluation, see Lee
et al.:
@@ -4715,11 +4709,10 @@
BioBertHelper.convert_and_write(common_path / "NCBI-disease", data_folder, tag_type=DISEASE_TAG)
super().__init__(data_folder, columns, in_memory=in_memory)
-
class BIOBERT_SPECIES_LINNAEUS(ColumnCorpus):
"""Linneaeus corpus with species annotations as used in the evaluation of BioBERT.
For further details regarding BioBERT and it's evaluation, see Lee
et al.:
@@ -4745,11 +4738,10 @@
if not (common_path / "linnaeus").exists():
BioBertHelper.download_corpora(common_path)
BioBertHelper.convert_and_write(common_path / "linnaeus", data_folder, tag_type=SPECIES_TAG)
super().__init__(data_folder, columns, in_memory=in_memory)
-
class BIOBERT_SPECIES_S800(ColumnCorpus):
"""S800 corpus with species annotations as used in the evaluation of BioBERT.
|
test:
flair/datasets/biomedical.py#L341
ruff
pytest_ruff.RuffError: flair/datasets/biomedical.py:514:20: RUF013 [*] PEP 484 prohibits implicit `Optional`
|
512 | def __init__(
513 | self,
514 | base_path: Union[str, Path] = None,
| ^^^^^^^^^^^^^^^^ RUF013
515 | ):
516 | """:param base_path: Path to the corpus on your machine"""
|
= help: Convert to `Optional[T]`
flair/datasets/biomedical.py:517:9: SIM108 [*] Use ternary operator `base_path = flair.cache_root / "datasets" if base_path is None else Path(base_path)` instead of `if`-`else`-block
|
515 | ):
516 | """:param base_path: Path to the corpus on your machine"""
517 | if base_path is None:
| _________^
518 | | base_path = flair.cache_root / "datasets"
519 | | else:
520 | | base_path = Path(base_path)
| |_______________________________________^ SIM108
521 |
522 | # this dataset name
|
= help: Replace `if`-`else`-block with `base_path = flair.cache_root / "datasets" if base_path is None else Path(base_path)`
flair/datasets/biomedical.py:539:9: D415 [*] First line should end with a period, question mark, or exclamation point
|
537 | @AbstractMethod
538 | def database_name(self) -> str:
539 | """Name of the database represented by the dictionary"""
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ D415
540 |
541 | @AbstractMethod
|
= help: Add closing punctuation
flair/datasets/biomedical.py:543:9: D415 [*] First line should end with a period, question mark, or exclamation point
|
541 | @AbstractMethod
542 | def download_dictionary(self, data_dir: Path) -> Path:
543 | """Download dictionary"""
| ^^^^^^^^^^^^^^^^^^^^^^^^^ D415
544 |
545 | @AbstractMethod
|
= help: Add closing punctuation
flair/datasets/biomedical.py:547:9: D415 [*] First line should end with a period, question mark, or exclamation point
|
545 | @AbstractMethod
546 | def parse_dictionary(self, original_file: Path):
547 | """Parse data into HunFlair format"""
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ D415
548 |
549 | def stream(self) -> Iterator[Tuple[str, str]]:
|
= help: Add closing punctuation
flair/datasets/biomedical.py:550:9: D202 [*] No blank lines allowed after function docstring (found 1)
|
549 | def stream(self) -> Iterator[Tuple[str, str]]:
550 | """Stream preprocessed dictionary"""
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ D202
551 |
552 | with open(self.dataset_file) as fp:
|
= help: Remove blank line(s) after function docstring
flair/datasets/biomedical.py:550:9: D415 [*] First line should end with a period, question mark, or exclamation point
|
549 | def stream(self) -> Iterator[Tuple[str, str]]:
550 | """Stream preprocessed dictionary"""
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ D415
551 |
552 | with open(self.dataset_file) as fp:
|
= help: Add closing punctuation
flair/datasets/biomedical.py:564:5: D205 1 blank line required between summary line and description
|
563 | class ParsedBiomedicalEntityLinkingDictionary(AbstractBiomedicalEntityLinkingDictionary):
564 | """
| _____^
565 | | Base dictionary with data already in preprocessed format, i.e. every line in the file must
566 | | be formatted as follows:
567 | |
568 | | concept_id||concept_name
569 | |
570 | | If multiple concept ids are associated to a given name they have to be separated by a `|`, e.g.
571 | |
572 | | 7157||TP53|tumor protein p53
573 | | """
| |_______^ D205
574 |
575 | def __init__(self, path: Path, database_name: str):
|
= help: Insert single blank line
flair/datasets/biomedical.py:564:5: D212 [*] Multi-line docstring summary should start at the first line
|
563 | class ParsedBiomedicalEntityLinkingDictionary(AbstractBiomedicalEntityLinkingDictionary):
564 | """
| _____^
565 | |
|
test:
flair/datasets/document_classification.py#L341
ruff
pytest_ruff.RuffError: flair/datasets/document_classification.py:1480:17: RUF012 Mutable class attributes should be annotated with `typing.ClassVar`
|
1479 | class GLUE_SST2(CSVClassificationCorpus):
1480 | label_map = {0: "negative", 1: "positive"}
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF012
1481 |
1482 | def __init__(
|
|
test:
flair/datasets/sequence_labeling.py#L341
ruff
pytest_ruff.RuffError: flair/datasets/sequence_labeling.py:423:13: RUF012 Mutable class attributes should be annotated with `typing.ClassVar`
|
421 | SPACE_AFTER_KEY = "space-after"
422 | # special key for feature columns
423 | FEATS = ["feats", "misc"]
| ^^^^^^^^^^^^^^^^^ RUF012
424 | # special key for dependency head id
425 | HEAD = ["head", "head_id"]
|
flair/datasets/sequence_labeling.py:425:12: RUF012 Mutable class attributes should be annotated with `typing.ClassVar`
|
423 | FEATS = ["feats", "misc"]
424 | # special key for dependency head id
425 | HEAD = ["head", "head_id"]
| ^^^^^^^^^^^^^^^^^^^ RUF012
426 |
427 | def __init__(
|
|
test:
flair/datasets/text_text.py#L1
flair/datasets/text_text.py
414: error: Unexpected keyword argument "value" for "add_label" of "DataPoint" [call-arg]
|
test:
flair/embeddings/document.py#L1
flair/embeddings/document.py
50: error: Argument "model" to "__init__" of "TransformerEmbeddings" has incompatible type "Union[str, Path]"; expected "str" [arg-type]
|