Skip to content

Commit af71e07

Browse files
committed
fix: accept both U and T as RNA seq
1 parent af49ba2 commit af71e07

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

graphgen/models/searcher/db/rnacentral_searcher.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,8 @@ def _extract_sequence(sequence: str) -> Optional[str]:
327327
seq = "".join(seq_lines[1:])
328328
else:
329329
seq = sequence.strip().replace(" ", "").replace("\n", "")
330-
return seq if seq and re.fullmatch(r"[AUCGN\s]+", seq, re.I) else None
330+
# Accept both U (original RNA) and T (converted for local BLAST compatibility)
331+
return seq if seq and re.fullmatch(r"[AUCGTN\s]+", seq, re.I) else None
331332

332333
try:
333334
seq = _extract_sequence(sequence)
@@ -404,10 +405,13 @@ async def search(self, query: str, threshold: float = 0.1, **kwargs) -> Optional
404405

405406
loop = asyncio.get_running_loop()
406407

407-
# check if RNA sequence (AUCG characters, contains U)
408-
if query.startswith(">") or (
409-
re.fullmatch(r"[AUCGN\s]+", query, re.I) and "U" in query.upper()
410-
):
408+
# check if RNA sequence (AUCG or ATCG characters, contains U or T)
409+
# Note: Sequences with T are also RNA sequences
410+
is_rna_sequence = query.startswith(">") or (
411+
re.fullmatch(r"[AUCGTN\s]+", query, re.I) and
412+
("U" in query.upper() or "T" in query.upper())
413+
)
414+
if is_rna_sequence:
411415
result = await loop.run_in_executor(_get_pool(), self.get_by_fasta, query, threshold)
412416
# check if RNAcentral ID (typically starts with URS)
413417
elif re.fullmatch(r"URS\d+", query, re.I):

0 commit comments

Comments
 (0)