fix: fix pylint style issues

CHERRY-ui8 · CHERRY-ui8 · commit 8678e33161f9 · 2025-11-27T20:34:33.000+08:00
diff --git a/graphgen/models/searcher/db/ncbi_searcher.py b/graphgen/models/searcher/db/ncbi_searcher.py
@@ -33,7 +33,7 @@ class NCBISearch(BaseSearcher):
     1) Get the gene/DNA by accession number or gene ID.
     2) Search with keywords or gene names (fuzzy search).
     3) Search with FASTA sequence (BLAST search for DNA sequences).
-    
+
     API Documentation: https://www.ncbi.nlm.nih.gov/home/develop/api/
     Note: NCBI has rate limits (max 3 requests per second), delays are required between requests.
     """
@@ -49,12 +49,11 @@ def _safe_get(obj, key, default=None):
         """Safely get value from dict or StringElement-like object."""
         if isinstance(obj, dict):
             return obj.get(key, default)
-        elif hasattr(obj, "get"):
+        if hasattr(obj, "get"):
             return obj.get(key, default)
-        elif hasattr(obj, key):
+        if hasattr(obj, key):
             return getattr(obj, key, default)
-        else:
-            return default
+        return default
 
     @staticmethod
     def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
@@ -68,7 +67,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
             raise ValueError("Empty gene record")
 
         gene_data = gene_record[0]
-        
+
         # Safely extract gene_ref, handling both dict and StringElement types
         gene_ref = {}
         entrezgene_gene = gene_data.get("Entrezgene_gene")
@@ -146,7 +145,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
         # Note: Entrezgene_location doesn't exist, but Entrezgene_locus contains location info
         chromosome = None
         genomic_location = None
-        
+
         try:
             locus_data = gene_data.get("Entrezgene_locus")
             if locus_data and isinstance(locus_data, list) and locus_data:
@@ -159,7 +158,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
                         match = re.search(r'Chromosome\s+(\S+)', str(label))
                         if match:
                             chromosome = match.group(1)
-                    
+
                     # Extract genomic location from Gene-commentary_seqs
                     seqs = NCBISearch._safe_get(first_locus, "Gene-commentary_seqs", [])
                     if seqs and isinstance(seqs, list) and seqs:
@@ -255,7 +254,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
         reraise=True,
         before_sleep=before_sleep_log(logger, logging.WARNING),
     )
-    
+
     def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None) -> Optional[dict]:
         """
         Get gene information by Gene ID.
@@ -272,7 +271,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
                 if not gene_record:
                     return None
                 result = self._gene_record_to_dict(gene_record, gene_id)
-                
+
                 # Try to get sequence from accession
                 # Priority: 1) preferred_accession (if provided), 2) representative mRNA accession
                 accession_to_use = preferred_accession or result.get("_representative_accession")
@@ -292,7 +291,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
                                 seq_lines = sequence_data.strip().split("\n")
                                 header = seq_lines[0] if seq_lines else ""
                                 sequence = "".join(seq_lines[1:])
-                                
+
                                 # Get summary for additional info
                                 time.sleep(0.35)
                                 summary_handle = Entrez.esummary(db="nuccore", id=accession_to_use)
@@ -301,7 +300,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
                                     if summary:
                                         summary_data = summary[0]
                                         title = summary_data.get("Title", header)
-                                        
+
                                         # Determine molecule type detail
                                         molecule_type_detail = "N/A"
                                         if accession_to_use.startswith("NM_") or accession_to_use.startswith("XM_"):
@@ -312,13 +311,13 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
                                             molecule_type_detail = "RNA"
                                         elif accession_to_use.startswith("NG_"):
                                             molecule_type_detail = "genomic region"
-                                        
+
                                         # Merge sequence information into result
                                         result["sequence"] = sequence
                                         result["sequence_length"] = len(sequence)
                                         result["title"] = title
                                         result["molecule_type_detail"] = molecule_type_detail
-                                        
+
                                         # Update chromosome and genomic_location if not already set
                                         if not result.get("chromosome"):
                                             chromosome = summary_data.get("ChrLoc") or summary_data.get("ChrAccVer")
@@ -337,9 +336,9 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
                         # Re-raise to allow retry mechanism
                         raise
                     except Exception as e:
-                        logger.debug("Failed to get sequence for accession %s: %s", 
+                        logger.debug("Failed to get sequence for accession %s: %s",
                                    accession_to_use, e)
-                
+
                 # Remove internal field
                 result.pop("_representative_accession", None)
                 return result
@@ -368,7 +367,7 @@ def get_by_accession(self, accession: str) -> Optional[dict]:
             gene_id = None
             try:
                 links = Entrez.read(link_handle)
-                
+
                 # Extract GeneID from elink results
                 # Structure: links[0]["LinkSetDb"][0]["Link"][0]["Id"]
                 if links and len(links) > 0:
@@ -401,19 +400,19 @@ def get_by_accession(self, accession: str) -> Optional[dict]:
                 # Continue to check if we got gene_id before the error
             finally:
                 link_handle.close()
-            
+
             # Step 2: If we have a GeneID, get complete information from Gene database
             # Pass the accession as preferred_accession so get_by_gene_id can use it for sequence
             if gene_id:
                 result = self.get_by_gene_id(gene_id, preferred_accession=accession)
-                
+
                 # Update id to accession for consistency (user searched by accession)
                 if result:
                     result["id"] = accession
                     result["url"] = f"https://www.ncbi.nlm.nih.gov/nuccore/{accession}"
-                
+
                 return result
-            
+
             # Step 3: If no GeneID, this is a rare case (accession without associated gene)
             # Return None - we can't provide complete information without Gene ID
             logger.warning("Accession %s has no associated GeneID, cannot provide complete information", accession)
diff --git a/graphgen/models/searcher/db/rnacentral_searcher.py b/graphgen/models/searcher/db/rnacentral_searcher.py
@@ -19,7 +19,7 @@ class RNACentralSearch(BaseSearcher):
     1) Get RNA by RNAcentral ID.
     2) Search with keywords or RNA names (fuzzy search).
     3) Search with RNA sequence.
-    
+
     API Documentation: https://rnacentral.org/api/v1
     """
 
@@ -37,7 +37,7 @@ async def _fetch_all_xrefs(self, xrefs_url: str, session: aiohttp.ClientSession)
         """
         all_xrefs = []
         current_url = xrefs_url
-        
+
         while current_url:
             try:
                 async with session.get(
@@ -47,12 +47,12 @@ async def _fetch_all_xrefs(self, xrefs_url: str, session: aiohttp.ClientSession)
                         data = await resp.json()
                         results = data.get("results", [])
                         all_xrefs.extend(results)
-                        
+
                         # Check if there's a next page
                         current_url = data.get("next")
                         if not current_url:
                             break
-                        
+
                         # Small delay to avoid rate limiting
                         await asyncio.sleep(0.2)
                     else:
@@ -61,7 +61,7 @@ async def _fetch_all_xrefs(self, xrefs_url: str, session: aiohttp.ClientSession)
             except Exception as e:
                 logger.warning("Error fetching xrefs from %s: %s", current_url, e)
                 break
-        
+
         return all_xrefs
 
     @staticmethod
@@ -78,31 +78,31 @@ def _extract_info_from_xrefs(xrefs: List[Dict]) -> Dict[str, Any]:
             "so_terms": set(),
             "xrefs_list": [],
         }
-        
+
         for xref in xrefs:
             # Extract accession information
             accession = xref.get("accession", {})
-            
+
             # Extract species information
             species = accession.get("species")
             if species:
                 extracted["organisms"].add(species)
-            
+
             # Extract gene name
             gene = accession.get("gene")
             if gene and gene.strip():  # Only add non-empty genes
                 extracted["gene_names"].add(gene.strip())
-            
+
             # Extract modifications
             modifications = xref.get("modifications", [])
             if modifications:
                 extracted["modifications"].extend(modifications)
-            
+
             # Extract SO term (biotype)
             biotype = accession.get("biotype")
             if biotype:
                 extracted["so_terms"].add(biotype)
-            
+
             # Build xrefs list
             xref_info = {
                 "database": xref.get("database"),
@@ -113,24 +113,24 @@ def _extract_info_from_xrefs(xrefs: List[Dict]) -> Dict[str, Any]:
                 "gene": gene,
             }
             extracted["xrefs_list"].append(xref_info)
-        
+
         # Convert sets to appropriate formats
         return {
             "organism": (
-                list(extracted["organisms"])[0] 
-                if len(extracted["organisms"]) == 1 
+                list(extracted["organisms"])[0]
+                if len(extracted["organisms"]) == 1
                 else (", ".join(extracted["organisms"]) if extracted["organisms"] else None)
             ),
             "gene_name": (
-                list(extracted["gene_names"])[0] 
-                if len(extracted["gene_names"]) == 1 
+                list(extracted["gene_names"])[0]
+                if len(extracted["gene_names"]) == 1
                 else (", ".join(extracted["gene_names"]) if extracted["gene_names"] else None)
             ),
             "related_genes": list(extracted["gene_names"]) if extracted["gene_names"] else None,
             "modifications": extracted["modifications"] if extracted["modifications"] else None,
             "so_term": (
-                list(extracted["so_terms"])[0] 
-                if len(extracted["so_terms"]) == 1 
+                list(extracted["so_terms"])[0]
+                if len(extracted["so_terms"]) == 1
                 else (", ".join(extracted["so_terms"]) if extracted["so_terms"] else None)
             ),
             "xrefs": extracted["xrefs_list"] if extracted["xrefs_list"] else None,
@@ -146,12 +146,12 @@ def _rna_data_to_dict(rna_id: str, rna_data: dict, xrefs_data: Optional[List[Dic
         :return: A dictionary containing RNA information.
         """
         sequence = rna_data.get("sequence", "")
-        
+
         # Initialize extracted info from xrefs if available
         extracted_info = {}
         if xrefs_data:
             extracted_info = RNACentralSearch._extract_info_from_xrefs(xrefs_data)
-        
+
         # Extract organism information (prefer from xrefs, fallback to main data)
         organism = extracted_info.get("organism")
         if not organism:
@@ -220,7 +220,7 @@ async def get_by_rna_id(self, rna_id: str) -> Optional[dict]:
                 ) as resp:
                     if resp.status == 200:
                         rna_data = await resp.json()
-                        
+
                         # Check if xrefs is a URL and fetch the actual xrefs data
                         xrefs_data = None
                         xrefs_url = rna_data.get("xrefs")
@@ -231,7 +231,7 @@ async def get_by_rna_id(self, rna_id: str) -> Optional[dict]:
                             except Exception as e:
                                 logger.warning("Failed to fetch xrefs for RNA ID %s: %s", rna_id, e)
                                 # Continue without xrefs data
-                        
+
                         return self._rna_data_to_dict(rna_id, rna_data, xrefs_data)
                     if resp.status == 404:
                         logger.error("RNA ID %s not found", rna_id)
@@ -271,16 +271,16 @@ async def get_best_hit(self, keyword: str) -> Optional[dict]:
                             # Step 1: Get RNA ID from search results
                             first_result = results[0]
                             rna_id = first_result.get("rnacentral_id")
-                            
+
                             if rna_id:
                                 # Step 2: Unified call to get_by_rna_id() for complete information
                                 result = await self.get_by_rna_id(rna_id)
-                                
+
                                 # Step 3: If get_by_rna_id() failed, use search result data as fallback
                                 if not result:
                                     logger.debug("get_by_rna_id() failed for %s, using search result data", rna_id)
                                     result = self._rna_data_to_dict(rna_id, first_result)
-                                
+
                                 return result
                         logger.info("No results found for keyword: %s", keyword)
                         return None
@@ -339,20 +339,20 @@ async def search_by_sequence(self, sequence: str) -> Optional[dict]:
                                 if result_seq == seq:
                                     exact_match = result
                                     break
-                            
+
                             # Use exact match if found, otherwise use first result
                             target_result = exact_match if exact_match else results[0]
                             rna_id = target_result.get("rnacentral_id")
-                            
+
                             if rna_id:
                                 # Step 2: Unified call to get_by_rna_id() for complete information
                                 result = await self.get_by_rna_id(rna_id)
-                                
+
                                 # Step 3: If get_by_rna_id() failed, use search result data as fallback
                                 if not result:
                                     logger.debug("get_by_rna_id() failed for %s, using search result data", rna_id)
                                     result = self._rna_data_to_dict(rna_id, target_result)
-                                
+
                                 return result
                         logger.info("No results found for sequence.")
                         return None