fix: fix pylint problems

CHERRY-ui8 · CHERRY-ui8 · commit 29ab42fb83f5 · 2025-12-18T19:03:26.000+08:00
diff --git a/graphgen/models/generator/omics_qa_generator.py b/graphgen/models/generator/omics_qa_generator.py
@@ -68,7 +68,7 @@ def parse_response(response: str) -> Any:
         return qa_pairs
 
     @staticmethod
-    def _extract_caption(node_data: dict, molecule_type: str) -> Optional[dict]:
+    def _extract_caption(node_data: dict, molecule_type: str) -> Optional[dict]:  # pylint: disable=too-many-branches
         """
         Extract molecule-specific caption information from node data.
 
@@ -341,7 +341,7 @@ def format_generation_results(
                 }
                 for qa in qa_items
             ]
-        if output_data_format == "ChatML":
+        elif output_data_format == "ChatML":
             return [
                 {
                     "messages": [
diff --git a/graphgen/models/kg_builder/__init__.py b/graphgen/models/kg_builder/__init__.py
@@ -1,3 +1,3 @@
 from .light_rag_kg_builder import LightRAGKGBuilder
 from .mm_kg_builder import MMKGBuilder
-from .omics_kg_builder import OmicsKGBuilder
+from .omics_kg_builder import OmicsKGBuilder
diff --git a/graphgen/models/partitioner/anchor_bfs_partitioner.py b/graphgen/models/partitioner/anchor_bfs_partitioner.py
@@ -66,7 +66,7 @@ def partition(
             if comm_n or comm_e:
                 yield Community(id=seed_node, nodes=comm_n, edges=comm_e)
 
-    def _pick_anchor_ids(
+    def _pick_anchor_ids(  # pylint: disable=too-many-branches
         self,
         nodes: List[tuple[str, dict]],
     ) -> Set[str]:
diff --git a/graphgen/models/searcher/db/rnacentral_searcher.py b/graphgen/models/searcher/db/rnacentral_searcher.py
@@ -276,7 +276,7 @@ def _local_blast(self, seq: str, threshold: float) -> Optional[str]:
                 pass
             return None
 
-    def get_by_fasta(self, sequence: str, threshold: float = 0.01) -> Optional[dict]:
+    def get_by_fasta(self, sequence: str, threshold: float = 0.01) -> Optional[dict]:  # pylint: disable=too-many-return-statements
         """
         Search RNAcentral with an RNA sequence.
         Tries local BLAST first if enabled, falls back to RNAcentral API.
diff --git a/graphgen/models/searcher/db/uniprot_searcher.py b/graphgen/models/searcher/db/uniprot_searcher.py
@@ -111,7 +111,7 @@ def get_best_hit(self, keyword: str) -> Optional[Dict]:
             self.logger.error("Keyword %s not found: %s", keyword, e)
         return None
 
-    def get_by_fasta(self, fasta_sequence: str, threshold: float) -> Optional[Dict]:
+    def get_by_fasta(self, fasta_sequence: str, threshold: float) -> Optional[Dict]:  # pylint: disable=too-many-return-statements
         """
         Search UniProt with a FASTA sequence and return the best hit.
         :param fasta_sequence: The FASTA sequence.
diff --git a/graphgen/operators/partition/partition_service.py b/graphgen/operators/partition/partition_service.py
@@ -127,7 +127,7 @@ def _pre_tokenize(self) -> None:
         self.kg_instance.index_done_callback()
         logger.info("Pre-tokenization completed.")
 
-    def _attach_additional_data_to_node(self, batch: tuple) -> tuple:
+    def _attach_additional_data_to_node(self, batch: tuple) -> tuple:  # pylint: disable=too-many-branches,too-many-statements
         """
         Attach additional data from chunk_storage to nodes in the batch.
         :param batch: tuple of (nodes_data, edges_data)
diff --git a/graphgen/operators/search/search_service.py b/graphgen/operators/search/search_service.py
@@ -215,7 +215,18 @@ def _is_already_searched(self, doc: dict) -> bool:
 
         return False
 
-    def _normalize_searched_data(self, doc: dict) -> dict:
+    @staticmethod
+    def _clean_value(v):
+        """Recursively convert numpy arrays and other problematic types to Python-native types."""
+        if isinstance(v, np.ndarray):
+            return v.tolist()
+        if isinstance(v, (list, tuple)):
+            return [SearchService._clean_value(item) for item in v]
+        if isinstance(v, dict):
+            return {k: SearchService._clean_value(val) for k, val in v.items()}
+        return v
+
+    def _normalize_searched_data(self, doc: dict) -> dict:  # pylint: disable=too-many-branches
         """
         Normalize a document that already contains search results to the expected format.
 
@@ -289,7 +300,7 @@ def _normalize_searched_data(self, doc: dict) -> dict:
 
         return normalized_doc
 
-    def process(self, batch: pd.DataFrame) -> pd.DataFrame:
+    def process(self, batch: pd.DataFrame) -> pd.DataFrame:  # pylint: disable=too-many-branches
         """
         Process a batch of documents and perform searches.
         This is the Ray Data operator interface.
@@ -397,18 +408,7 @@ def process(self, batch: pd.DataFrame) -> pd.DataFrame:
 
                 # Convert numpy arrays and complex types to Python-native types
                 # to avoid Ray Data tensor extension casting issues
-                def clean_value(v):
-                    """Recursively convert numpy arrays and other problematic types to Python-native types."""
-                    if isinstance(v, np.ndarray):
-                        return v.tolist()
-                    elif isinstance(v, (list, tuple)):
-                        return [clean_value(item) for item in v]
-                    elif isinstance(v, dict):
-                        return {k: clean_value(val) for k, val in v.items()}
-                    else:
-                        return v
-
-                cleaned_result = {k: clean_value(v) for k, v in result.items()}
+                cleaned_result = {k: self._clean_value(v) for k, v in result.items()}
 
                 # Create document row with all result fields plus required fields
                 row = {
diff --git a/graphgen/templates/kg/__init__.py b/graphgen/templates/kg/__init__.py
@@ -2,4 +2,4 @@
 from .kg_summarization import KG_SUMMARIZATION_PROMPT
 from .mm_kg_extraction import MMKG_EXTRACTION_PROMPT
 from .omics_kg_extraction import OMICS_KG_EXTRACTION_PROMPT
-from .protein_kg_extraction import PROTEIN_KG_EXTRACTION_PROMPT
+from .protein_kg_extraction import PROTEIN_KG_EXTRACTION_PROMPT

Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ def parse_response(response: str) -> Any:`
`68`	`68`	`return qa_pairs`
`69`	`69`
`70`	`70`	`@staticmethod`
`71`		`- def _extract_caption(node_data: dict, molecule_type: str) -> Optional[dict]:`
	`71`	`+ def _extract_caption(node_data: dict, molecule_type: str) -> Optional[dict]: # pylint: disable=too-many-branches`
`72`	`72`	`"""`
`73`	`73`	`Extract molecule-specific caption information from node data.`
`74`	`74`
`@@ -341,7 +341,7 @@ def format_generation_results(`
`341`	`341`	`}`
`342`	`342`	`for qa in qa_items`
`343`	`343`	`]`
`344`		`- if output_data_format == "ChatML":`
	`344`	`+ elif output_data_format == "ChatML":`
`345`	`345`	`return [`
`346`	`346`	`{`
`347`	`347`	`"messages": [`