Skip to content

Commit

Permalink
SDEV-4431 - refactor typing errors - use HashableDict types in to red…
Browse files Browse the repository at this point in the history
…uce typing errors. Make HashableDict a type.
  • Loading branch information
dustinbleile committed Jul 23, 2024
1 parent 7269a54 commit f0ae3cd
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 35 deletions.
5 changes: 3 additions & 2 deletions pori_python/ipr/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pori_python.graphkb.statement import get_statements_from_variants
from pori_python.graphkb.util import FeatureNotFoundError
from pori_python.types import (
Hashabledict,
IprCopyVariant,
IprExprVariant,
IprStructuralVariant,
Expand All @@ -24,7 +25,7 @@

from .constants import TMB_HIGH_CATEGORY
from .ipr import convert_statements_to_alterations
from .util import Hashabledict, convert_to_rid_set, logger
from .util import convert_to_rid_set, logger

REPORTED_COPY_VARIANTS = (INPUT_COPY_CATEGORIES.AMP, INPUT_COPY_CATEGORIES.DEEP)

Expand Down Expand Up @@ -200,7 +201,7 @@ def annotate_copy_variants(

def annotate_positional_variants(
graphkb_conn: GraphKBConnection,
variants: Sequence[IprStructuralVariant],
variants: Sequence[IprStructuralVariant] | Sequence[Hashabledict],
disease_name: str,
show_progress: bool = False,
) -> List[Hashabledict]:
Expand Down
11 changes: 6 additions & 5 deletions pori_python/ipr/ipr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pori_python.graphkb import statement as gkb_statement
from pori_python.graphkb import vocab as gkb_vocab
from pori_python.types import (
Hashabledict,
ImageDefinition,
IprFusionVariant,
IprGene,
Expand All @@ -34,7 +35,7 @@ def display_evidence_levels(statement: Statement) -> str:

def filter_structural_variants(
structural_variants: List[IprFusionVariant],
kb_matches: List[KbMatch],
kb_matches: List[KbMatch] | List[Hashabledict],
gene_annotations: List[IprGene],
) -> List[IprFusionVariant]:
"""
Expand Down Expand Up @@ -225,7 +226,7 @@ def convert_statements_to_alterations(


def select_expression_plots(
kb_matches: List[KbMatch], all_variants: Sequence[IprVariant]
kb_matches: List[KbMatch] | List[Hashabledict], all_variants: Sequence[IprVariant]
) -> List[ImageDefinition]:
"""
Given the list of expression variants, determine which expression
Expand Down Expand Up @@ -261,7 +262,7 @@ def select_expression_plots(


def create_key_alterations(
kb_matches: List[KbMatch], all_variants: Sequence[IprVariant]
kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant]
) -> Tuple[List[Dict], Dict]:
"""Create the list of significant variants matched by the KB.
Expand Down Expand Up @@ -323,8 +324,8 @@ def create_key_alterations(


def germline_kb_matches(
kb_matches: List[KbMatch], all_variants: Sequence[IprVariant], assume_somatic: bool = True
) -> List[KbMatch]:
kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant], assume_somatic: bool = True
) -> List[Hashabledict]:
"""Filter kb_matches for matching to germline or somatic events using the 'germline' optional property.
Statements related to pharmacogenomic toxicity or cancer predisposition are only relevant if
Expand Down
45 changes: 28 additions & 17 deletions pori_python/ipr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pori_python.graphkb import GraphKBConnection
from pori_python.graphkb.genes import get_gene_information
from pori_python.types import IprVariant, KbMatch
from pori_python.types import Hashabledict, IprVariant

from .annotate import (
annotate_copy_variants,
Expand All @@ -35,7 +35,7 @@
germline_kb_matches,
select_expression_plots,
)
from .summary import summarize
from .summary import auto_analyst_comments
from .therapeutic_options import create_therapeutic_options
from .util import LOG_LEVELS, logger, trim_empty_values

Expand Down Expand Up @@ -295,7 +295,7 @@ def ipr_report(
graphkb_conn = GraphKBConnection()
graphkb_conn.login(username, password)

gkb_matches: List[KbMatch] = []
gkb_matches: List[Hashabledict] = []

# Signature category variants
tmb_variant: IprVariant = {} # type: ignore
Expand Down Expand Up @@ -328,7 +328,7 @@ def ipr_report(
logger.info(
f"GERO-296 '{TMB_HIGH_CATEGORY}' matches {len(tmb_matches)} statements."
)
gkb_matches.extend(tmb_matches)
gkb_matches.extend([Hashabledict(tmb_statement) for tmb_statement in tmb_matches])
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")

msi = content.get("msi", [])
Expand All @@ -351,7 +351,7 @@ def ipr_report(
msi_variant["key"] = msi_cat
msi_variant["variantType"] = "msi"
logger.info(f"GERO-295 '{msi_cat}' matches {len(msi_matches)} msi statements.")
gkb_matches.extend(msi_matches)
gkb_matches.extend([Hashabledict(msi) for msi in msi_matches])
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")

logger.info(f"annotating {len(small_mutations)} small mutations")
Expand All @@ -372,17 +372,23 @@ def ipr_report(

logger.info(f"annotating {len(copy_variants)} copy variants")
gkb_matches.extend(
annotate_copy_variants(
graphkb_conn, copy_variants, kb_disease_match, show_progress=interactive
)
[
Hashabledict(copy_var)
for copy_var in annotate_copy_variants(
graphkb_conn, copy_variants, kb_disease_match, show_progress=interactive
)
]
)
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")

logger.info(f"annotating {len(expression_variants)} expression variants")
gkb_matches.extend(
annotate_expression_variants(
graphkb_conn, expression_variants, kb_disease_match, show_progress=interactive
)
[
Hashabledict(exp_var)
for exp_var in annotate_expression_variants(
graphkb_conn, expression_variants, kb_disease_match, show_progress=interactive
)
]
)
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")

Expand All @@ -393,14 +399,19 @@ def ipr_report(
if tmb_matches:
all_variants.append(tmb_variant) # type: ignore

if match_germline: # verify germline kb statements matched germline observed variants
gkb_matches = germline_kb_matches(gkb_matches, all_variants)
if gkb_matches:
logger.info(f"Removing {len(gkb_matches)} germline events without medical matches.")
if match_germline:
# verify germline kb statements matched germline observed variants, not somatic variants
org_len = len(gkb_matches)
gkb_matches = [
Hashabledict(match) for match in germline_kb_matches(gkb_matches, all_variants)
]
num_removed = org_len - len(gkb_matches)
if num_removed:
logger.info(f"Removing {num_removed} germline events without medical matches.")

if custom_kb_match_filter:
logger.info(f"custom_kb_match_filter on {len(gkb_matches)} variants")
gkb_matches = custom_kb_match_filter(gkb_matches)
gkb_matches = [Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)]
logger.info(f"\t custom_kb_match_filter left {len(gkb_matches)} variants")

key_alterations, variant_counts = create_key_alterations(gkb_matches, all_variants)
Expand All @@ -417,7 +428,7 @@ def ipr_report(
logger.info("generating analyst comments")
if generate_comments:
comments = {
"comments": summarize(
"comments": auto_analyst_comments(
graphkb_conn, gkb_matches, disease_name=kb_disease_match, variants=all_variants
)
}
Expand Down
6 changes: 3 additions & 3 deletions pori_python/ipr/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pori_python.graphkb.util import convert_to_rid_list
from pori_python.graphkb.vocab import get_term_tree
from pori_python.ipr.inputs import create_graphkb_sv_notation
from pori_python.types import IprVariant, KbMatch, Ontology, Record, Statement
from pori_python.types import Hashabledict, IprVariant, KbMatch, Ontology, Record, Statement

from .util import (
convert_to_rid_set,
Expand Down Expand Up @@ -328,9 +328,9 @@ def section_statements_by_genes(
return genes


def summarize(
def auto_analyst_comments(
graphkb_conn: GraphKBConnection,
matches: Sequence[KbMatch],
matches: Sequence[KbMatch] | Sequence[Hashabledict],
disease_name: str,
variants: Sequence[IprVariant],
) -> str:
Expand Down
8 changes: 5 additions & 3 deletions pori_python/ipr/therapeutic_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Dict, List, Sequence

from pori_python.graphkb import GraphKBConnection
from pori_python.types import IprVariant, KbMatch
from pori_python.types import Hashabledict, IprVariant, KbMatch

from .util import (
create_variant_name_tuple,
Expand All @@ -17,7 +17,9 @@


def create_therapeutic_options(
graphkb_conn: GraphKBConnection, kb_matches: List[KbMatch], variants: Sequence[IprVariant]
graphkb_conn: GraphKBConnection,
kb_matches: List[KbMatch] | List[Hashabledict],
variants: Sequence[IprVariant],
) -> List[Dict]:
"""
Generate therapeutic options summary from the list of kb-matches
Expand Down Expand Up @@ -66,7 +68,7 @@ def delimited_list(inputs: List, delimiter: str = " / ") -> str:
}
)
options_df = options_df.reset_index()
options = options_df.to_dict("records")
options = options_df.to_dict("records") # type: ignore
therapeutic_rank = 0
chemoresistance_rank = 0
for option in options:
Expand Down
5 changes: 0 additions & 5 deletions pori_python/ipr/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@
}


class Hashabledict(dict):
def __hash__(self):
return hash(frozenset(self))


def get_terms_set(graphkb_conn: GraphKBConnection, base_terms: List[str]) -> Set[str]:
terms = set()
for base_term in base_terms:
Expand Down
5 changes: 5 additions & 0 deletions pori_python/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ class KbMatch(TypedDict):
kbData: Dict


class Hashabledict(dict):
def __hash__(self):
return hash(frozenset(self))


class IprVariantBase(TypedDict):
"""Required properties of all variants for IPR."""

Expand Down

0 comments on commit f0ae3cd

Please sign in to comment.