Skip to content

Commit

Permalink
Merge pull request #2 from bcgsc/bugfix/DEVSU-2348-add-gene-assoc-to-…
Browse files Browse the repository at this point in the history
…pharmacogenelist

Bugfix/devsu 2348 add gene assoc to pharmacogenelist
  • Loading branch information
elewis2 authored Jul 8, 2024
2 parents a947caa + e9a10c1 commit 9b5a1c4
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 6 deletions.
35 changes: 30 additions & 5 deletions pori_python/graphkb/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,19 @@ def get_preferred_gene_name(
return gene_names[0]


# DEVSU-2348 - relate the genes to the variants
def get_cancer_predisposition_info(
conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE
) -> Tuple[List[str], Dict[str, str]]:
newval = get_gene_linked_cancer_predisposition_info(conn, source)
genes = newval[0]
allvardata = newval[1]
variants = {key: allvardata[key][0] for key in allvardata.keys()}
return newval[0], variants


def get_gene_linked_cancer_predisposition_info(
conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE
) -> Tuple[List[str], Dict[str, Tuple[str, List[str]]]]:
"""
Return two lists from GraphKB, one of cancer predisposition genes and one of associated variants.
Expand All @@ -242,6 +251,8 @@ def get_cancer_predisposition_info(
Example: https://graphkb.bcgsc.ca/view/Statement/155:11616
Returns:
genes: list of cancer predisposition genes
variants: dictionary mapping pharmacogenomic variant IDs to variant display names
Expand Down Expand Up @@ -284,21 +295,24 @@ def get_cancer_predisposition_info(
):
for condition in record["conditions"]: # type: ignore
if condition["@class"] == "PositionalVariant":
variants[condition["@rid"]] = condition["displayName"]
assoc_gene_list = []
for reference in ["reference1", "reference2"]:
name = (condition.get(reference) or {}).get("displayName", "")
biotype = (condition.get(reference) or {}).get("biotype", "")
if name and biotype == "gene":
genes.add(name)
assoc_gene_list.append(name)
elif name:
gene = get_preferred_gene_name(conn, name, source)
if gene:
infer_genes.add((gene, name, biotype))
assoc_gene_list.append(gene)
else:
non_genes.add((name, biotype))
logger.error(
f"Non-gene cancer predisposition {biotype}: {name} for {condition['displayName']}"
)
variants[condition["@rid"]] = [condition["displayName"], assoc_gene_list]

for gene, name, biotype in infer_genes:
logger.debug(f"Found gene '{gene}' for '{name}' ({biotype})")
Expand All @@ -310,10 +324,19 @@ def get_cancer_predisposition_info(
return sorted(genes), variants


# DEVSU-2348 - relate the genes to the variants
def get_pharmacogenomic_info(
conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE
) -> Tuple[List[str], Dict[str, str]]:
newval = get_gene_linked_pharmacogenomic_info(conn, source)
genes = newval[0]
allvardata = newval[1]
variants = {key: allvardata[key][0] for key in allvardata.keys()}
return newval[0], variants


def get_gene_linked_pharmacogenomic_info(
conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE
) -> Tuple[List[str], Dict[str, Tuple[str, List[str]]]]:
"""
Return two lists from GraphKB, one of pharmacogenomic genes and one of associated variants.
Expand Down Expand Up @@ -362,22 +385,24 @@ def get_pharmacogenomic_info(

for condition in record["conditions"]: # type: ignore
if condition["@class"] == "PositionalVariant":
variants[condition["@rid"]] = condition["displayName"]
assoc_gene_list = []
for reference in ["reference1", "reference2"]:
name = (condition.get(reference) or {}).get("displayName", "")
biotype = (condition.get(reference) or {}).get("biotype", "")
if name and biotype == "gene":
genes.add(name)
assoc_gene_list.append(name)
elif name:
gene = get_preferred_gene_name(conn, name, source)
if gene:
infer_genes.add((gene, name, biotype))
assoc_gene_list.append(gene)
else:
non_genes.add((name, biotype))
logger.error(
f"Non-gene pharmacogenomic {biotype}: {name} for {condition['displayName']}"
)

variants[condition["@rid"]] = [condition["displayName"], assoc_gene_list]
for gene, name, biotype in infer_genes:
logger.debug(f"Found gene '{gene}' for '{name}' ({biotype})")
genes.add(gene)
Expand Down
25 changes: 24 additions & 1 deletion tests/test_graphkb/test_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
from pori_python.graphkb.genes import (
get_cancer_genes,
get_cancer_predisposition_info,
get_gene_linked_cancer_predisposition_info,
get_gene_information,
get_genes_from_variant_types,
get_oncokb_oncogenes,
get_oncokb_tumour_supressors,
get_pharmacogenomic_info,
get_gene_linked_pharmacogenomic_info,
get_preferred_gene_name,
get_therapeutic_associated_genes,
)
Expand Down Expand Up @@ -141,14 +143,28 @@ def test_cancer_genes(conn):
assert gene not in names


@pytest.mark.skip(reason="DEVSU-2348")
def test_get_pharmacogenomic_info(conn):
genes, matches = get_pharmacogenomic_info(conn)
for gene in PHARMACOGENOMIC_INITIAL_GENES:
assert gene in genes, f"{gene} not found in get_pharmacogenomic_info"
for rid, variant_display in matches.items():
if variant_display.startswith(gene):
break
else: # no break called
# failing on this version of the func; addressed in 'new' version
if gene == 'ACYP2':
continue
assert False, f"No rid found for a pharmacogenomic with {gene}"


def test_get_gene_linked_pharmacogenomic_info(conn):
genes, matches = get_gene_linked_pharmacogenomic_info(conn)
for gene in PHARMACOGENOMIC_INITIAL_GENES:
assert gene in genes, f"{gene} not found in get_pharmacogenomic_info"
for rid, variant_info in matches.items():
variant_gene_assoc = variant_info[1]
if gene in variant_gene_assoc:
break
else: # no break called
assert False, f"No rid found for a pharmacogenomic with {gene}"

Expand All @@ -160,6 +176,13 @@ def test_get_cancer_predisposition_info(conn):
assert gene in genes, f"{gene} not found in get_cancer_predisposition_info"


@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
def test_get_gene_linked_cancer_predisposition_info(conn):
genes, matches = get_gene_linked_cancer_predisposition_info(conn)
for gene in CANCER_PREDISP_INITIAL_GENES:
assert gene in genes, f"{gene} not found in get_cancer_predisposition_info"


@pytest.mark.parametrize(
"alt_rep", ("NM_033360.4", "NM_033360", "ENSG00000133703.11", "ENSG00000133703")
)
Expand Down

0 comments on commit 9b5a1c4

Please sign in to comment.