@@ -33,7 +33,7 @@ class NCBISearch(BaseSearcher):
3333 1) Get the gene/DNA by accession number or gene ID.
3434 2) Search with keywords or gene names (fuzzy search).
3535 3) Search with FASTA sequence (BLAST search for DNA sequences).
36-
36+
3737 API Documentation: https://www.ncbi.nlm.nih.gov/home/develop/api/
3838 Note: NCBI has rate limits (max 3 requests per second), delays are required between requests.
3939 """
@@ -49,12 +49,11 @@ def _safe_get(obj, key, default=None):
4949 """Safely get value from dict or StringElement-like object."""
5050 if isinstance (obj , dict ):
5151 return obj .get (key , default )
52- elif hasattr (obj , "get" ):
52+ if hasattr (obj , "get" ):
5353 return obj .get (key , default )
54- elif hasattr (obj , key ):
54+ if hasattr (obj , key ):
5555 return getattr (obj , key , default )
56- else :
57- return default
56+ return default
5857
5958 @staticmethod
6059 def _gene_record_to_dict (gene_record , gene_id : str ) -> dict :
@@ -68,7 +67,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
6867 raise ValueError ("Empty gene record" )
6968
7069 gene_data = gene_record [0 ]
71-
70+
7271 # Safely extract gene_ref, handling both dict and StringElement types
7372 gene_ref = {}
7473 entrezgene_gene = gene_data .get ("Entrezgene_gene" )
@@ -146,7 +145,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
146145 # Note: Entrezgene_location doesn't exist, but Entrezgene_locus contains location info
147146 chromosome = None
148147 genomic_location = None
149-
148+
150149 try :
151150 locus_data = gene_data .get ("Entrezgene_locus" )
152151 if locus_data and isinstance (locus_data , list ) and locus_data :
@@ -159,7 +158,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
159158 match = re .search (r'Chromosome\s+(\S+)' , str (label ))
160159 if match :
161160 chromosome = match .group (1 )
162-
161+
163162 # Extract genomic location from Gene-commentary_seqs
164163 seqs = NCBISearch ._safe_get (first_locus , "Gene-commentary_seqs" , [])
165164 if seqs and isinstance (seqs , list ) and seqs :
@@ -255,7 +254,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
255254 reraise = True ,
256255 before_sleep = before_sleep_log (logger , logging .WARNING ),
257256 )
258-
257+
259258 def get_by_gene_id (self , gene_id : str , preferred_accession : Optional [str ] = None ) -> Optional [dict ]:
260259 """
261260 Get gene information by Gene ID.
@@ -272,7 +271,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
272271 if not gene_record :
273272 return None
274273 result = self ._gene_record_to_dict (gene_record , gene_id )
275-
274+
276275 # Try to get sequence from accession
277276 # Priority: 1) preferred_accession (if provided), 2) representative mRNA accession
278277 accession_to_use = preferred_accession or result .get ("_representative_accession" )
@@ -292,7 +291,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
292291 seq_lines = sequence_data .strip ().split ("\n " )
293292 header = seq_lines [0 ] if seq_lines else ""
294293 sequence = "" .join (seq_lines [1 :])
295-
294+
296295 # Get summary for additional info
297296 time .sleep (0.35 )
298297 summary_handle = Entrez .esummary (db = "nuccore" , id = accession_to_use )
@@ -301,7 +300,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
301300 if summary :
302301 summary_data = summary [0 ]
303302 title = summary_data .get ("Title" , header )
304-
303+
305304 # Determine molecule type detail
306305 molecule_type_detail = "N/A"
307306 if accession_to_use .startswith ("NM_" ) or accession_to_use .startswith ("XM_" ):
@@ -312,13 +311,13 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
312311 molecule_type_detail = "RNA"
313312 elif accession_to_use .startswith ("NG_" ):
314313 molecule_type_detail = "genomic region"
315-
314+
316315 # Merge sequence information into result
317316 result ["sequence" ] = sequence
318317 result ["sequence_length" ] = len (sequence )
319318 result ["title" ] = title
320319 result ["molecule_type_detail" ] = molecule_type_detail
321-
320+
322321 # Update chromosome and genomic_location if not already set
323322 if not result .get ("chromosome" ):
324323 chromosome = summary_data .get ("ChrLoc" ) or summary_data .get ("ChrAccVer" )
@@ -337,9 +336,9 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
337336 # Re-raise to allow retry mechanism
338337 raise
339338 except Exception as e :
340- logger .debug ("Failed to get sequence for accession %s: %s" ,
339+ logger .debug ("Failed to get sequence for accession %s: %s" ,
341340 accession_to_use , e )
342-
341+
343342 # Remove internal field
344343 result .pop ("_representative_accession" , None )
345344 return result
@@ -368,7 +367,7 @@ def get_by_accession(self, accession: str) -> Optional[dict]:
368367 gene_id = None
369368 try :
370369 links = Entrez .read (link_handle )
371-
370+
372371 # Extract GeneID from elink results
373372 # Structure: links[0]["LinkSetDb"][0]["Link"][0]["Id"]
374373 if links and len (links ) > 0 :
@@ -401,19 +400,19 @@ def get_by_accession(self, accession: str) -> Optional[dict]:
401400 # Continue to check if we got gene_id before the error
402401 finally :
403402 link_handle .close ()
404-
403+
405404 # Step 2: If we have a GeneID, get complete information from Gene database
406405 # Pass the accession as preferred_accession so get_by_gene_id can use it for sequence
407406 if gene_id :
408407 result = self .get_by_gene_id (gene_id , preferred_accession = accession )
409-
408+
410409 # Update id to accession for consistency (user searched by accession)
411410 if result :
412411 result ["id" ] = accession
413412 result ["url" ] = f"https://www.ncbi.nlm.nih.gov/nuccore/{ accession } "
414-
413+
415414 return result
416-
415+
417416 # Step 3: If no GeneID, this is a rare case (accession without associated gene)
418417 # Return None - we can't provide complete information without Gene ID
419418 logger .warning ("Accession %s has no associated GeneID, cannot provide complete information" , accession )
0 commit comments