Skip to content

Commit 8678e33

Browse files
committed
fix: fix pylint style issues
1 parent bdba4f9 commit 8678e33

File tree

2 files changed

+49
-50
lines changed

2 files changed

+49
-50
lines changed

graphgen/models/searcher/db/ncbi_searcher.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class NCBISearch(BaseSearcher):
3333
1) Get the gene/DNA by accession number or gene ID.
3434
2) Search with keywords or gene names (fuzzy search).
3535
3) Search with FASTA sequence (BLAST search for DNA sequences).
36-
36+
3737
API Documentation: https://www.ncbi.nlm.nih.gov/home/develop/api/
3838
Note: NCBI has rate limits (max 3 requests per second), delays are required between requests.
3939
"""
@@ -49,12 +49,11 @@ def _safe_get(obj, key, default=None):
4949
"""Safely get value from dict or StringElement-like object."""
5050
if isinstance(obj, dict):
5151
return obj.get(key, default)
52-
elif hasattr(obj, "get"):
52+
if hasattr(obj, "get"):
5353
return obj.get(key, default)
54-
elif hasattr(obj, key):
54+
if hasattr(obj, key):
5555
return getattr(obj, key, default)
56-
else:
57-
return default
56+
return default
5857

5958
@staticmethod
6059
def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
@@ -68,7 +67,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
6867
raise ValueError("Empty gene record")
6968

7069
gene_data = gene_record[0]
71-
70+
7271
# Safely extract gene_ref, handling both dict and StringElement types
7372
gene_ref = {}
7473
entrezgene_gene = gene_data.get("Entrezgene_gene")
@@ -146,7 +145,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
146145
# Note: Entrezgene_location doesn't exist, but Entrezgene_locus contains location info
147146
chromosome = None
148147
genomic_location = None
149-
148+
150149
try:
151150
locus_data = gene_data.get("Entrezgene_locus")
152151
if locus_data and isinstance(locus_data, list) and locus_data:
@@ -159,7 +158,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
159158
match = re.search(r'Chromosome\s+(\S+)', str(label))
160159
if match:
161160
chromosome = match.group(1)
162-
161+
163162
# Extract genomic location from Gene-commentary_seqs
164163
seqs = NCBISearch._safe_get(first_locus, "Gene-commentary_seqs", [])
165164
if seqs and isinstance(seqs, list) and seqs:
@@ -255,7 +254,7 @@ def _gene_record_to_dict(gene_record, gene_id: str) -> dict:
255254
reraise=True,
256255
before_sleep=before_sleep_log(logger, logging.WARNING),
257256
)
258-
257+
259258
def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None) -> Optional[dict]:
260259
"""
261260
Get gene information by Gene ID.
@@ -272,7 +271,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
272271
if not gene_record:
273272
return None
274273
result = self._gene_record_to_dict(gene_record, gene_id)
275-
274+
276275
# Try to get sequence from accession
277276
# Priority: 1) preferred_accession (if provided), 2) representative mRNA accession
278277
accession_to_use = preferred_accession or result.get("_representative_accession")
@@ -292,7 +291,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
292291
seq_lines = sequence_data.strip().split("\n")
293292
header = seq_lines[0] if seq_lines else ""
294293
sequence = "".join(seq_lines[1:])
295-
294+
296295
# Get summary for additional info
297296
time.sleep(0.35)
298297
summary_handle = Entrez.esummary(db="nuccore", id=accession_to_use)
@@ -301,7 +300,7 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
301300
if summary:
302301
summary_data = summary[0]
303302
title = summary_data.get("Title", header)
304-
303+
305304
# Determine molecule type detail
306305
molecule_type_detail = "N/A"
307306
if accession_to_use.startswith("NM_") or accession_to_use.startswith("XM_"):
@@ -312,13 +311,13 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
312311
molecule_type_detail = "RNA"
313312
elif accession_to_use.startswith("NG_"):
314313
molecule_type_detail = "genomic region"
315-
314+
316315
# Merge sequence information into result
317316
result["sequence"] = sequence
318317
result["sequence_length"] = len(sequence)
319318
result["title"] = title
320319
result["molecule_type_detail"] = molecule_type_detail
321-
320+
322321
# Update chromosome and genomic_location if not already set
323322
if not result.get("chromosome"):
324323
chromosome = summary_data.get("ChrLoc") or summary_data.get("ChrAccVer")
@@ -337,9 +336,9 @@ def get_by_gene_id(self, gene_id: str, preferred_accession: Optional[str] = None
337336
# Re-raise to allow retry mechanism
338337
raise
339338
except Exception as e:
340-
logger.debug("Failed to get sequence for accession %s: %s",
339+
logger.debug("Failed to get sequence for accession %s: %s",
341340
accession_to_use, e)
342-
341+
343342
# Remove internal field
344343
result.pop("_representative_accession", None)
345344
return result
@@ -368,7 +367,7 @@ def get_by_accession(self, accession: str) -> Optional[dict]:
368367
gene_id = None
369368
try:
370369
links = Entrez.read(link_handle)
371-
370+
372371
# Extract GeneID from elink results
373372
# Structure: links[0]["LinkSetDb"][0]["Link"][0]["Id"]
374373
if links and len(links) > 0:
@@ -401,19 +400,19 @@ def get_by_accession(self, accession: str) -> Optional[dict]:
401400
# Continue to check if we got gene_id before the error
402401
finally:
403402
link_handle.close()
404-
403+
405404
# Step 2: If we have a GeneID, get complete information from Gene database
406405
# Pass the accession as preferred_accession so get_by_gene_id can use it for sequence
407406
if gene_id:
408407
result = self.get_by_gene_id(gene_id, preferred_accession=accession)
409-
408+
410409
# Update id to accession for consistency (user searched by accession)
411410
if result:
412411
result["id"] = accession
413412
result["url"] = f"https://www.ncbi.nlm.nih.gov/nuccore/{accession}"
414-
413+
415414
return result
416-
415+
417416
# Step 3: If no GeneID, this is a rare case (accession without associated gene)
418417
# Return None - we can't provide complete information without Gene ID
419418
logger.warning("Accession %s has no associated GeneID, cannot provide complete information", accession)

graphgen/models/searcher/db/rnacentral_searcher.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class RNACentralSearch(BaseSearcher):
1919
1) Get RNA by RNAcentral ID.
2020
2) Search with keywords or RNA names (fuzzy search).
2121
3) Search with RNA sequence.
22-
22+
2323
API Documentation: https://rnacentral.org/api/v1
2424
"""
2525

@@ -37,7 +37,7 @@ async def _fetch_all_xrefs(self, xrefs_url: str, session: aiohttp.ClientSession)
3737
"""
3838
all_xrefs = []
3939
current_url = xrefs_url
40-
40+
4141
while current_url:
4242
try:
4343
async with session.get(
@@ -47,12 +47,12 @@ async def _fetch_all_xrefs(self, xrefs_url: str, session: aiohttp.ClientSession)
4747
data = await resp.json()
4848
results = data.get("results", [])
4949
all_xrefs.extend(results)
50-
50+
5151
# Check if there's a next page
5252
current_url = data.get("next")
5353
if not current_url:
5454
break
55-
55+
5656
# Small delay to avoid rate limiting
5757
await asyncio.sleep(0.2)
5858
else:
@@ -61,7 +61,7 @@ async def _fetch_all_xrefs(self, xrefs_url: str, session: aiohttp.ClientSession)
6161
except Exception as e:
6262
logger.warning("Error fetching xrefs from %s: %s", current_url, e)
6363
break
64-
64+
6565
return all_xrefs
6666

6767
@staticmethod
@@ -78,31 +78,31 @@ def _extract_info_from_xrefs(xrefs: List[Dict]) -> Dict[str, Any]:
7878
"so_terms": set(),
7979
"xrefs_list": [],
8080
}
81-
81+
8282
for xref in xrefs:
8383
# Extract accession information
8484
accession = xref.get("accession", {})
85-
85+
8686
# Extract species information
8787
species = accession.get("species")
8888
if species:
8989
extracted["organisms"].add(species)
90-
90+
9191
# Extract gene name
9292
gene = accession.get("gene")
9393
if gene and gene.strip(): # Only add non-empty genes
9494
extracted["gene_names"].add(gene.strip())
95-
95+
9696
# Extract modifications
9797
modifications = xref.get("modifications", [])
9898
if modifications:
9999
extracted["modifications"].extend(modifications)
100-
100+
101101
# Extract SO term (biotype)
102102
biotype = accession.get("biotype")
103103
if biotype:
104104
extracted["so_terms"].add(biotype)
105-
105+
106106
# Build xrefs list
107107
xref_info = {
108108
"database": xref.get("database"),
@@ -113,24 +113,24 @@ def _extract_info_from_xrefs(xrefs: List[Dict]) -> Dict[str, Any]:
113113
"gene": gene,
114114
}
115115
extracted["xrefs_list"].append(xref_info)
116-
116+
117117
# Convert sets to appropriate formats
118118
return {
119119
"organism": (
120-
list(extracted["organisms"])[0]
121-
if len(extracted["organisms"]) == 1
120+
list(extracted["organisms"])[0]
121+
if len(extracted["organisms"]) == 1
122122
else (", ".join(extracted["organisms"]) if extracted["organisms"] else None)
123123
),
124124
"gene_name": (
125-
list(extracted["gene_names"])[0]
126-
if len(extracted["gene_names"]) == 1
125+
list(extracted["gene_names"])[0]
126+
if len(extracted["gene_names"]) == 1
127127
else (", ".join(extracted["gene_names"]) if extracted["gene_names"] else None)
128128
),
129129
"related_genes": list(extracted["gene_names"]) if extracted["gene_names"] else None,
130130
"modifications": extracted["modifications"] if extracted["modifications"] else None,
131131
"so_term": (
132-
list(extracted["so_terms"])[0]
133-
if len(extracted["so_terms"]) == 1
132+
list(extracted["so_terms"])[0]
133+
if len(extracted["so_terms"]) == 1
134134
else (", ".join(extracted["so_terms"]) if extracted["so_terms"] else None)
135135
),
136136
"xrefs": extracted["xrefs_list"] if extracted["xrefs_list"] else None,
@@ -146,12 +146,12 @@ def _rna_data_to_dict(rna_id: str, rna_data: dict, xrefs_data: Optional[List[Dic
146146
:return: A dictionary containing RNA information.
147147
"""
148148
sequence = rna_data.get("sequence", "")
149-
149+
150150
# Initialize extracted info from xrefs if available
151151
extracted_info = {}
152152
if xrefs_data:
153153
extracted_info = RNACentralSearch._extract_info_from_xrefs(xrefs_data)
154-
154+
155155
# Extract organism information (prefer from xrefs, fallback to main data)
156156
organism = extracted_info.get("organism")
157157
if not organism:
@@ -220,7 +220,7 @@ async def get_by_rna_id(self, rna_id: str) -> Optional[dict]:
220220
) as resp:
221221
if resp.status == 200:
222222
rna_data = await resp.json()
223-
223+
224224
# Check if xrefs is a URL and fetch the actual xrefs data
225225
xrefs_data = None
226226
xrefs_url = rna_data.get("xrefs")
@@ -231,7 +231,7 @@ async def get_by_rna_id(self, rna_id: str) -> Optional[dict]:
231231
except Exception as e:
232232
logger.warning("Failed to fetch xrefs for RNA ID %s: %s", rna_id, e)
233233
# Continue without xrefs data
234-
234+
235235
return self._rna_data_to_dict(rna_id, rna_data, xrefs_data)
236236
if resp.status == 404:
237237
logger.error("RNA ID %s not found", rna_id)
@@ -271,16 +271,16 @@ async def get_best_hit(self, keyword: str) -> Optional[dict]:
271271
# Step 1: Get RNA ID from search results
272272
first_result = results[0]
273273
rna_id = first_result.get("rnacentral_id")
274-
274+
275275
if rna_id:
276276
# Step 2: Unified call to get_by_rna_id() for complete information
277277
result = await self.get_by_rna_id(rna_id)
278-
278+
279279
# Step 3: If get_by_rna_id() failed, use search result data as fallback
280280
if not result:
281281
logger.debug("get_by_rna_id() failed for %s, using search result data", rna_id)
282282
result = self._rna_data_to_dict(rna_id, first_result)
283-
283+
284284
return result
285285
logger.info("No results found for keyword: %s", keyword)
286286
return None
@@ -339,20 +339,20 @@ async def search_by_sequence(self, sequence: str) -> Optional[dict]:
339339
if result_seq == seq:
340340
exact_match = result
341341
break
342-
342+
343343
# Use exact match if found, otherwise use first result
344344
target_result = exact_match if exact_match else results[0]
345345
rna_id = target_result.get("rnacentral_id")
346-
346+
347347
if rna_id:
348348
# Step 2: Unified call to get_by_rna_id() for complete information
349349
result = await self.get_by_rna_id(rna_id)
350-
350+
351351
# Step 3: If get_by_rna_id() failed, use search result data as fallback
352352
if not result:
353353
logger.debug("get_by_rna_id() failed for %s, using search result data", rna_id)
354354
result = self._rna_data_to_dict(rna_id, target_result)
355-
355+
356356
return result
357357
logger.info("No results found for sequence.")
358358
return None

0 commit comments

Comments
 (0)