From 3f950491286a8e33c321382aa8a05c8feadb38d1 Mon Sep 17 00:00:00 2001 From: AbdiHaryadi Date: Wed, 14 Jun 2023 18:09:14 +0700 Subject: [PATCH 1/2] GH-3267: fix(flair.datasets): resolve false warning empty sentence in ColumnDataset --- flair/datasets/sequence_labeling.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flair/datasets/sequence_labeling.py b/flair/datasets/sequence_labeling.py index 5525ae45a..6ed657d9c 100644 --- a/flair/datasets/sequence_labeling.py +++ b/flair/datasets/sequence_labeling.py @@ -560,9 +560,11 @@ def _identify_span_columns(self, column_name_map, skip_first_line): # check the first 5 sentences probe = [] for _i in range(5): - sentence = self._convert_lines_to_sentence( - self._read_next_sentence(file), word_level_tag_columns=column_name_map - ) + next_sentence = self._read_next_sentence(file) + if len(next_sentence) == 0: + break + + sentence = self._convert_lines_to_sentence(next_sentence, word_level_tag_columns=column_name_map) if sentence: probe.append(sentence) else: From 0c56c251fe4dd7f2069927eb6d4e11ee60b9d7b5 Mon Sep 17 00:00:00 2001 From: AbdiHaryadi Date: Wed, 14 Jun 2023 18:11:23 +0700 Subject: [PATCH 2/2] GH-3267: style: update codes changed by Black module and Ruff --- flair/data.py | 2 +- flair/inference_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flair/data.py b/flair/data.py index 04e10a8dd..839f75006 100644 --- a/flair/data.py +++ b/flair/data.py @@ -1675,7 +1675,7 @@ def __str__(self) -> str: f"{len(self.dev) if self.dev else 0} dev + " f"{len(self.test) if self.test else 0} test sentences\n - " ) - output += "\n - ".join([f"{type(corpus).__name__} {str(corpus)} - {corpus.name}" for corpus in self.corpora]) + output += "\n - ".join([f"{type(corpus).__name__} {corpus!s} - {corpus.name}" for corpus in self.corpora]) return output diff --git a/flair/inference_utils.py b/flair/inference_utils.py index e96e16da1..035025c00 100644 --- a/flair/inference_utils.py +++ b/flair/inference_utils.py @@ -177,7 +177,7 @@ def __init__(self, embedding, verbose) -> None: self.k = len(result[0]) - 1 return except sqlite3.Error as err: - logger.exception(f"Fail to open sqlite database {self.store_path!s}: {str(err)}") + logger.exception(f"Fail to open sqlite database {self.store_path!s}: {err!s}") # otherwise, push embedding to database if hasattr(embedding, "precomputed_word_embeddings"): self.db = sqlite3.connect(str(self.store_path)) @@ -239,7 +239,7 @@ def __init__(self, embedding, verbose) -> None: cursor.close() return except lmdb.Error as err: - logger.exception(f"Fail to open lmdb database {self.store_path!s}: {str(err)}") + logger.exception(f"Fail to open lmdb database {self.store_path!s}: {err!s}") # create and load the database in write mode if hasattr(embedding, "precomputed_word_embeddings"): pwe = embedding.precomputed_word_embeddings