11import asyncio
2+ import os
23import re
4+ import subprocess
5+ import tempfile
36from typing import Dict , Optional , List , Any
47
58import aiohttp
@@ -23,10 +26,15 @@ class RNACentralSearch(BaseSearcher):
2326 API Documentation: https://rnacentral.org/api/v1
2427 """
2528
26- def __init__ (self ):
29+ def __init__ (self , use_local_blast : bool = False , local_blast_db : str = "rna_db" ):
2730 super ().__init__ ()
2831 self .base_url = "https://rnacentral.org/api/v1"
2932 self .headers = {"Accept" : "application/json" }
33+ self .use_local_blast = use_local_blast
34+ self .local_blast_db = local_blast_db
35+ if self .use_local_blast and not os .path .isfile (f"{ self .local_blast_db } .nhr" ):
36+ logger .error ("Local BLAST database files not found. Please check the path." )
37+ self .use_local_blast = False
3038
3139 async def _fetch_all_xrefs (self , xrefs_url : str , session : aiohttp .ClientSession ) -> List [Dict ]:
3240 """
@@ -294,11 +302,50 @@ async def get_best_hit(self, keyword: str) -> Optional[dict]:
294302 logger .error ("Keyword %s not found: %s" , keyword , e )
295303 return None
296304
297- async def search_by_sequence (self , sequence : str ) -> Optional [dict ]:
305+ def _local_blast (self , seq : str , threshold : float ) -> Optional [str ]:
306+ """
307+ Perform local BLAST search using local BLAST database.
308+ :param seq: The RNA sequence.
309+ :param threshold: E-value threshold for BLAST search.
310+ :return: The accession/ID of the best hit or None if not found.
311+ """
312+ try :
313+ with tempfile .NamedTemporaryFile (
314+ mode = "w+" , suffix = ".fa" , delete = False
315+ ) as tmp :
316+ tmp .write (f">query\n { seq } \n " )
317+ tmp_name = tmp .name
318+
319+ cmd = [
320+ "blastn" ,
321+ "-db" ,
322+ self .local_blast_db ,
323+ "-query" ,
324+ tmp_name ,
325+ "-evalue" ,
326+ str (threshold ),
327+ "-max_target_seqs" ,
328+ "1" ,
329+ "-outfmt" ,
330+ "6 sacc" , # only return accession
331+ ]
332+ logger .debug ("Running local blastn for RNA: %s" , " " .join (cmd ))
333+ out = subprocess .check_output (cmd , text = True ).strip ()
334+ os .remove (tmp_name )
335+ if out :
336+ return out .split ("\n " , maxsplit = 1 )[0 ]
337+ return None
338+ except Exception as exc : # pylint: disable=broad-except
339+ logger .error ("Local blastn failed: %s" , exc )
340+ return None
341+
342+ async def search_by_sequence (self , sequence : str , threshold : float = 0.01 ) -> Optional [dict ]:
298343 """
299344 Search RNAcentral with an RNA sequence.
345+ Tries local BLAST first if enabled, falls back to RNAcentral API.
300346 Unified approach: Find RNA ID from sequence search, then call get_by_rna_id() for complete information.
301347 :param sequence: RNA sequence (FASTA format or raw sequence).
348+ :param threshold: E-value threshold for BLAST search.
302349 :return: A dictionary containing complete RNA information or None if not found.
303350 """
304351 try :
@@ -318,7 +365,23 @@ async def search_by_sequence(self, sequence: str) -> Optional[dict]:
318365 logger .error ("Empty RNA sequence provided." )
319366 return None
320367
321- # RNAcentral API supports sequence search
368+ # Try local BLAST first if enabled
369+ if self .use_local_blast :
370+ accession = self ._local_blast (seq , threshold )
371+ if accession :
372+ logger .debug ("Local BLAST found accession: %s" , accession )
373+ # Try to get RNA ID from accession (may need conversion)
374+ # For now, try using accession as RNA ID or search by it
375+ result = await self .get_by_rna_id (accession )
376+ if result :
377+ return result
378+ # If not found by ID, try keyword search
379+ result = await self .get_best_hit (accession )
380+ if result :
381+ return result
382+
383+ # Fall back to RNAcentral API
384+ logger .debug ("Falling back to RNAcentral API." )
322385 async with aiohttp .ClientSession () as session :
323386 search_url = f"{ self .base_url } /rna"
324387 params = {"sequence" : seq , "format" : "json" }
@@ -373,7 +436,7 @@ async def search_by_sequence(self, sequence: str) -> Optional[dict]:
373436 reraise = True ,
374437 )
375438 async def search (
376- self , query : str , threshold : float = 0.7 , ** kwargs
439+ self , query : str , threshold : float = 0.1 , ** kwargs
377440 ) -> Optional [Dict ]:
378441 """
379442 Search RNAcentral with either an RNAcentral ID, keyword, or RNA sequence.
@@ -395,7 +458,7 @@ async def search(
395458 if query .startswith (">" ) or (
396459 re .fullmatch (r"[AUCGN\s]+" , query , re .I ) and "U" in query .upper ()
397460 ):
398- result = await self .search_by_sequence (query )
461+ result = await self .search_by_sequence (query , threshold )
399462 # check if RNAcentral ID (typically starts with URS)
400463 elif re .fullmatch (r"URS\d+" , query , re .I ):
401464 result = await self .get_by_rna_id (query )
0 commit comments