Skip to content

Commit

Permalink
lint, remove test code
Browse files Browse the repository at this point in the history
  • Loading branch information
elewis2 committed Nov 12, 2024
1 parent 4996001 commit ad6567b
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 167 deletions.
43 changes: 11 additions & 32 deletions pori_python/ipr/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def get_second_pass_variants(
}

for reference1, variant_type in inferred_variants:
variants = gkb_match.match_category_variant(
graphkb_conn, reference1, variant_type
)
variants = gkb_match.match_category_variant(graphkb_conn, reference1, variant_type)

for variant in variants:
all_inferred_matches[variant["@rid"]] = variant
Expand Down Expand Up @@ -82,8 +80,7 @@ def get_ipr_statements_from_variants(
inferred_statements = [
s
for s in get_statements_from_variants(graphkb_conn, inferred_matches)
if s["@rid"]
not in existing_statements # do not duplicate if non-inferred match
if s["@rid"] not in existing_statements # do not duplicate if non-inferred match
]

for ipr_row in convert_statements_to_alterations(
Expand Down Expand Up @@ -128,9 +125,7 @@ def annotate_expression_variants(
continue
try:
matches = gkb_match.match_expression_variant(graphkb_conn, gene, variant)
for ipr_row in get_ipr_statements_from_variants(
graphkb_conn, matches, disease_name
):
for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
ipr_row["variant"] = row["key"]
ipr_row["variantType"] = row.get("variantType", "exp")
# "kbVariantId": matched_stmt["kbVariantId"],
Expand Down Expand Up @@ -183,15 +178,11 @@ def annotate_copy_variants(
if variant not in REPORTED_COPY_VARIANTS:
# https://www.bcgsc.ca/jira/browse/GERO-77
skipped += 1
logger.debug(
f"Dropping {gene} copy change '{variant}' - not in REPORTED_COPY_VARIANTS"
)
logger.debug(f"Dropping {gene} copy change '{variant}' - not in REPORTED_COPY_VARIANTS")
continue
try:
matches = gkb_match.match_copy_variant(graphkb_conn, gene, variant)
for ipr_row in get_ipr_statements_from_variants(
graphkb_conn, matches, disease_name
):
for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
ipr_row["variant"] = row["key"]
ipr_row["variantType"] = row.get("variantType", "cnv")
# "kbVariantId": matched_stmt["kbVariantId"],
Expand All @@ -210,9 +201,7 @@ def annotate_copy_variants(
)
if problem_genes:
logger.error(f"gene finding failures for copy variants {sorted(problem_genes)}")
logger.error(
f"gene finding failure for {len(problem_genes)} copy variant genes"
)
logger.error(f"gene finding failure for {len(problem_genes)} copy variant genes")
logger.info(
f"matched {len(variants)} copy category variants to {len(alterations)} graphkb annotations"
)
Expand Down Expand Up @@ -270,9 +259,7 @@ def annotate_positional_variants(
f"Assuming malformed deletion variant {variant} is {variant[:-2] + 'del'}"
)
variant = variant[:-2] + "del"
matches = gkb_match.match_positional_variant(
graphkb_conn, variant
)
matches = gkb_match.match_positional_variant(graphkb_conn, variant)
else:
raise parse_err
for ipr_row in get_ipr_statements_from_variants(
Expand Down Expand Up @@ -307,9 +294,7 @@ def annotate_positional_variants(

if problem_genes:
logger.error(f"gene finding failures for {sorted(problem_genes)}")
logger.error(
f"{len(problem_genes)} gene finding failures for positional variants"
)
logger.error(f"{len(problem_genes)} gene finding failures for positional variants")
if errors:
logger.error(f"skipped {errors} positional variants due to errors")

Expand Down Expand Up @@ -357,9 +342,7 @@ def annotate_msi(
)
if msi_categories:
msi_variants = [cast(Variant, var) for var in msi_categories]
for ipr_row in get_ipr_statements_from_variants(
graphkb_conn, msi_variants, disease_name
):
for ipr_row in get_ipr_statements_from_variants(graphkb_conn, msi_variants, disease_name):
ipr_row["variant"] = msi_category
ipr_row["variantType"] = "msi"
# "kbVariantId": matched_stmt["kbVariantId"],
Expand Down Expand Up @@ -393,9 +376,7 @@ def annotate_tmb(
"filters": {
"reference1": {
"target": "Signature",
"filters": {
"OR": [{"name": category}, {"displayName": category}]
},
"filters": {"OR": [{"name": category}, {"displayName": category}]},
}
},
},
Expand All @@ -405,9 +386,7 @@ def annotate_tmb(
)
if categories:
cat_variants = [cast(Variant, var) for var in categories]
for ipr_row in get_ipr_statements_from_variants(
graphkb_conn, cat_variants, disease_name
):
for ipr_row in get_ipr_statements_from_variants(graphkb_conn, cat_variants, disease_name):
ipr_row["variant"] = category
ipr_row["variantType"] = "tmb"
# "kbVariantId": matched_stmt["kbVariantId"],
Expand Down
68 changes: 17 additions & 51 deletions pori_python/ipr/ipr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,9 @@ def filter_structural_variants(
Filter structural variants to remove non-high quality events unless they are matched/annotated or
they involve a gene that is a known fusion partner
"""
matched_svs = {
match["variant"] for match in kb_matches if match["variantType"] == "sv"
}
matched_svs = {match["variant"] for match in kb_matches if match["variantType"] == "sv"}
fusion_genes = {
gene["name"]
for gene in gene_annotations
if gene.get("knownFusionPartner", False)
gene["name"] for gene in gene_annotations if gene.get("knownFusionPartner", False)
}

result = []
Expand Down Expand Up @@ -89,9 +85,7 @@ def get_evidencelevel_mapping(graphkb_conn: GraphKBConnection) -> Dict[str, str]

# Filter IPR EvidenceLevel and map each outgoing CrossReferenceOf to displayName
ipr_source_rid = graphkb_conn.get_source("ipr")["@rid"]
ipr_evidence_levels = filter(
lambda d: d.get("source") == ipr_source_rid, evidence_levels
)
ipr_evidence_levels = filter(lambda d: d.get("source") == ipr_source_rid, evidence_levels)
cross_references_mapping: Dict[str, str] = dict()
ipr_rids_to_displayname: Dict[str, str] = dict()
for level in ipr_evidence_levels:
Expand Down Expand Up @@ -139,9 +133,7 @@ def convert_statements_to_alterations(
"""
disease_matches = {
r["@rid"]
for r in gkb_vocab.get_term_tree(
graphkb_conn, disease_name, ontology_class="Disease"
)
for r in gkb_vocab.get_term_tree(graphkb_conn, disease_name, ontology_class="Disease")
}

if not disease_matches:
Expand All @@ -154,9 +146,7 @@ def convert_statements_to_alterations(

# get the recruitment status for any trial associated with a statement
clinical_trials = [
s["subject"]["@rid"]
for s in statements
if s["subject"]["@class"] == "ClinicalTrial"
s["subject"]["@rid"] for s in statements if s["subject"]["@class"] == "ClinicalTrial"
]
recruitment_statuses = {}
if clinical_trials:
Expand All @@ -173,9 +163,7 @@ def convert_statements_to_alterations(

for statement in statements:
variants = [
cast(Variant, c)
for c in statement["conditions"]
if c["@class"] in VARIANT_CLASSES
cast(Variant, c) for c in statement["conditions"] if c["@class"] in VARIANT_CLASSES
]
diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"]
disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches
Expand All @@ -196,25 +184,17 @@ def convert_statements_to_alterations(

evidence_level_str = display_evidence_levels(statement)
evidence_levels = statement.get("evidenceLevel") or []
ipr_evidence_levels = [
ev_map[el.get("@rid", "")] for el in evidence_levels if el
]
ipr_evidence_levels_str = ";".join(
sorted(set([el for el in ipr_evidence_levels]))
)
ipr_evidence_levels = [ev_map[el.get("@rid", "")] for el in evidence_levels if el]
ipr_evidence_levels_str = ";".join(sorted(set([el for el in ipr_evidence_levels])))

for variant in variants:
if variant["@rid"] not in variant_matches:
continue
stmt = {
"approvedTherapy": approved_therapy or False,
"category": ipr_section or "unknown",
"context": (
statement["subject"]["displayName"] if statement["subject"] else ""
),
"kbContextId": (
statement["subject"]["@rid"] if statement["subject"] else ""
),
"context": (statement["subject"]["displayName"] if statement["subject"] else ""),
"kbContextId": (statement["subject"]["@rid"] if statement["subject"] else ""),
"disease": ";".join(sorted(d.get("displayName", "") for d in diseases)),
"evidenceLevel": evidence_level_str or "",
"iprEvidenceLevel": ipr_evidence_levels_str or "",
Expand All @@ -224,9 +204,7 @@ def convert_statements_to_alterations(
"relevance": statement["relevance"]["displayName"],
"kbRelevanceId": statement["relevance"]["@rid"],
"externalSource": (
str(statement["source"].get("displayName", ""))
if statement["source"]
else ""
str(statement["source"].get("displayName", "")) if statement["source"] else ""
),
"externalStatementId": statement.get("sourceId", "") or "",
"reviewStatus": statement.get("reviewStatus", "") or "",
Expand Down Expand Up @@ -281,9 +259,7 @@ def select_expression_plots(
gene = str(variant.get("gene", ""))
hist = str(variant.get("histogramImage", ""))
if hist:
images_by_gene[gene] = ImageDefinition(
{"key": f"expDensity.{gene}", "path": hist}
)
images_by_gene[gene] = ImageDefinition({"key": f"expDensity.{gene}", "path": hist})
return [images_by_gene[gene] for gene in selected_genes if gene in images_by_gene]


Expand All @@ -307,9 +283,7 @@ def create_key_alterations(
variant_type = kb_match["variantType"]
variant_key = kb_match["variant"]

match_categories = [
item["category"] for item in kb_match["kbMatchedStatements"]
]
match_categories = [item["category"] for item in kb_match["kbMatchedStatements"]]
if list(set(match_categories)) == ["unknown"]:
continue

Expand All @@ -330,9 +304,7 @@ def create_key_alterations(
counts[type_mapping[variant_type]].add(variant_key)

if variant_type == "exp":
alterations.append(
f'{variant.get("gene","")} ({variant.get("expressionState")})'
)
alterations.append(f'{variant.get("gene","")} ({variant.get("expressionState")})')
elif variant_type == "cnv":
alterations.append(f'{variant.get("gene","")} ({variant.get("cnvState")})')
elif any(item in GERMLINE_BASE_TERMS for item in match_categories):
Expand Down Expand Up @@ -408,19 +380,15 @@ def germline_kb_matches(
# Remove any matches to germline events
for alt in somatic_alts:
var_list = [v for v in all_variants if v["key"] == alt["variant"]]
somatic_var_list = [
v for v in var_list if not v.get("germline", not assume_somatic)
]
somatic_var_list = [v for v in var_list if not v.get("germline", not assume_somatic)]
if var_list and not somatic_var_list:
logger.debug(
f"Dropping germline match to somatic statement kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
)
elif somatic_var_list:
ret_list.append(alt) # match to somatic variant
else:
ret_list.append(
alt
) # alteration not in any specific keys matches to check.
ret_list.append(alt) # alteration not in any specific keys matches to check.

return ret_list

Expand Down Expand Up @@ -449,9 +417,7 @@ def multi_variant_filtering(
"""
# All matching statements & variants (GKB RIDs)
matching_statement_rids = {
stmt["kbStatementId"]
for match in gkb_matches
for stmt in match["kbMatchedStatements"]
stmt["kbStatementId"] for match in gkb_matches for stmt in match["kbMatchedStatements"]
}
matching_variant_rids = {match["kbVariantId"] for match in gkb_matches}

Expand Down
Loading

0 comments on commit ad6567b

Please sign in to comment.