From db7c199c40634983014e82b5f246fe259add2b12 Mon Sep 17 00:00:00 2001 From: Dustin Bleile Date: Fri, 12 Jul 2024 15:35:01 -0700 Subject: [PATCH] lint - isort black - line-len 100 --- pori_python/graphkb/genes.py | 73 +++---------- pori_python/graphkb/match.py | 90 +++------------- pori_python/graphkb/statement.py | 19 +--- pori_python/graphkb/util.py | 33 ++---- pori_python/graphkb/vocab.py | 33 ++---- pori_python/ipr/annotate.py | 65 +++-------- pori_python/ipr/connection.py | 15 +-- pori_python/ipr/constants.py | 12 +-- pori_python/ipr/inputs.py | 16 ++- pori_python/ipr/ipr.py | 73 +++---------- pori_python/ipr/main.py | 84 ++++----------- pori_python/ipr/summary.py | 85 ++++----------- pori_python/ipr/therapeutic_options.py | 4 +- pori_python/ipr/util.py | 24 ++--- tests/test_graphkb/data.py | 25 +---- tests/test_graphkb/test_genes.py | 24 ++--- tests/test_graphkb/test_match.py | 143 ++++++------------------- tests/test_graphkb/test_statement.py | 14 +-- tests/test_graphkb/test_util.py | 42 ++------ tests/test_ipr/test_annotate.py | 12 +-- tests/test_ipr/test_connection.py | 11 +- tests/test_ipr/test_inputs.py | 22 ++-- tests/test_ipr/test_ipr.py | 24 +---- tests/test_ipr/test_main.py | 24 ++--- tests/test_ipr/test_probe.py | 8 +- tests/test_ipr/test_summary.py | 66 ++---------- tests/test_ipr/test_util.py | 23 +--- tests/test_ipr/util.py | 6 +- 28 files changed, 235 insertions(+), 835 deletions(-) diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py index 1edb263..cc71ce6 100644 --- a/pori_python/graphkb/genes.py +++ b/pori_python/graphkb/genes.py @@ -24,10 +24,7 @@ def _get_tumourigenesis_genes_list( - conn: GraphKBConnection, - relevance: str, - sources: List[str], - ignore_cache: bool = False, + conn: GraphKBConnection, relevance: str, sources: List[str], ignore_cache: bool = False ) -> List[Ontology]: statements = cast( List[Statement], @@ -37,17 +34,10 @@ def _get_tumourigenesis_genes_list( "filters": { "AND": [ {"source": {"target": "Source", "filters": {"name": sources}}}, - { - "relevance": { - "target": "Vocabulary", - "filters": {"name": relevance}, - } - }, + {"relevance": {"target": "Vocabulary", "filters": {"name": relevance}}}, ] }, - "returnProperties": [ - f"subject.{prop}" for prop in GENE_RETURN_PROPERTIES - ], + "returnProperties": [f"subject.{prop}" for prop in GENE_RETURN_PROPERTIES], }, ignore_cache=ignore_cache, ), @@ -84,9 +74,7 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - return _get_tumourigenesis_genes_list( - conn, TUMOUR_SUPPRESSIVE, [ONCOKB_SOURCE_NAME] - ) + return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, [ONCOKB_SOURCE_NAME]) def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: @@ -159,12 +147,7 @@ def get_genes_from_variant_types( filters: List[Dict[str, Any]] = [] if types: filters.append( - { - "type": { - "target": "Vocabulary", - "filters": {"name": types, "operator": "IN"}, - } - } + {"type": {"target": "Vocabulary", "filters": {"name": types, "operator": "IN"}}} ) variants = cast( @@ -194,11 +177,7 @@ def get_genes_from_variant_types( result = cast( List[Ontology], conn.query( - { - "target": list(genes), - "returnProperties": GENE_RETURN_PROPERTIES, - "filters": filters, - }, + {"target": list(genes), "returnProperties": GENE_RETURN_PROPERTIES, "filters": filters}, ignore_cache=ignore_cache, ), ) @@ -294,12 +273,7 @@ def get_gene_linked_cancer_predisposition_info( "filters": {"@rid": get_rid(conn, "Source", "CGL")}, } }, - { - "relevance": { - "target": "Vocabulary", - "filters": {"@rid": relevance_rids}, - } - }, + {"relevance": {"target": "Vocabulary", "filters": {"@rid": relevance_rids}}}, ] }, "returnProperties": [ @@ -333,10 +307,7 @@ def get_gene_linked_cancer_predisposition_info( logger.error( f"Non-gene cancer predisposition {biotype}: {name} for {condition['displayName']}" ) - variants[condition["@rid"]] = [ - condition["displayName"], - assoc_gene_list, - ] + variants[condition["@rid"]] = [condition["displayName"], assoc_gene_list] for gene, name, biotype in infer_genes: logger.debug(f"Found gene '{gene}' for '{name}' ({biotype})") @@ -388,12 +359,7 @@ def get_gene_linked_pharmacogenomic_info( { "target": "Statement", "filters": [ - { - "relevance": { - "target": "Vocabulary", - "filters": {"@rid": relevance_rids}, - } - } + {"relevance": {"target": "Vocabulary", "filters": {"@rid": relevance_rids}}} ], "returnProperties": [ "conditions.@class", @@ -431,10 +397,7 @@ def get_gene_linked_pharmacogenomic_info( logger.error( f"Non-gene pharmacogenomic {biotype}: {name} for {condition['displayName']}" ) - variants[condition["@rid"]] = [ - condition["displayName"], - assoc_gene_list, - ] + variants[condition["@rid"]] = [condition["displayName"], assoc_gene_list] for gene, name, biotype in infer_genes: logger.debug(f"Found gene '{gene}' for '{name}' ({biotype})") genes.add(gene) @@ -486,9 +449,7 @@ def get_gene_information( gene_names = sorted(set(gene_names)) statements = graphkb_conn.query(body) - statements = [ - s for s in statements if s.get("reviewStatus") != FAILED_REVIEW_STATUS - ] + statements = [s for s in statements if s.get("reviewStatus") != FAILED_REVIEW_STATUS] gene_flags: Dict[str, Set[str]] = { "kbStatementRelated": set(), @@ -511,13 +472,9 @@ def get_gene_information( logger.info("fetching oncogenes list") gene_flags["oncogene"] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn)) logger.info("fetching tumour supressors list") - gene_flags["tumourSuppressor"] = convert_to_rid_set( - get_oncokb_tumour_supressors(graphkb_conn) - ) + gene_flags["tumourSuppressor"] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn)) logger.info("fetching cancerGeneListMatch list") - gene_flags["cancerGeneListMatch"] = convert_to_rid_set( - get_cancer_genes(graphkb_conn) - ) + gene_flags["cancerGeneListMatch"] = convert_to_rid_set(get_cancer_genes(graphkb_conn)) logger.info("fetching therapeutic associated genes lists") gene_flags["therapeuticAssociated"] = convert_to_rid_set( @@ -527,9 +484,7 @@ def get_gene_information( logger.info(f"Setting gene_info flags on {len(gene_names)} genes") result = [] for gene_name in gene_names: - equivalent = convert_to_rid_set( - get_equivalent_features(graphkb_conn, gene_name) - ) + equivalent = convert_to_rid_set(get_equivalent_features(graphkb_conn, gene_name)) row = {"name": gene_name} flagged = False for flag in gene_flags: diff --git a/pori_python/graphkb/match.py b/pori_python/graphkb/match.py index 631d073..599b152 100644 --- a/pori_python/graphkb/match.py +++ b/pori_python/graphkb/match.py @@ -15,14 +15,7 @@ STRUCTURAL_VARIANT_TYPES, VARIANT_RETURN_PROPERTIES, ) -from .types import ( - BasicPosition, - Ontology, - ParsedVariant, - PositionalVariant, - Record, - Variant, -) +from .types import BasicPosition, Ontology, ParsedVariant, PositionalVariant, Record, Variant from .util import ( FeatureNotFoundError, convert_to_rid_list, @@ -70,8 +63,7 @@ def get_equivalent_features( return cast( List[Ontology], conn.query( - {"target": [gene_name], "queryType": "similarTo"}, - ignore_cache=ignore_cache, + {"target": [gene_name], "queryType": "similarTo"}, ignore_cache=ignore_cache ), ) @@ -90,16 +82,9 @@ def get_equivalent_features( filters.append({"sourceId": gene_name}) if source_id_version: filters.append( - { - "OR": [ - {"sourceIdVersion": source_id_version}, - {"sourceIdVersion": None}, - ] - } + {"OR": [{"sourceIdVersion": source_id_version}, {"sourceIdVersion": None}]} ) - elif ( - FEATURES_CACHE and gene_name.lower() not in FEATURES_CACHE and not ignore_cache - ): + elif FEATURES_CACHE and gene_name.lower() not in FEATURES_CACHE and not ignore_cache: return [] else: filters.append({"OR": [{"sourceId": gene_name}, {"name": gene_name}]}) @@ -107,10 +92,7 @@ def get_equivalent_features( return cast( List[Ontology], conn.query( - { - "target": {"target": "Feature", "filters": filters}, - "queryType": "similarTo", - }, + {"target": {"target": "Feature", "filters": filters}, "queryType": "similarTo"}, ignore_cache=ignore_cache, ), ) @@ -123,13 +105,7 @@ def cache_missing_features(conn: GraphKBConnection) -> None: """ genes = cast( List[Ontology], - conn.query( - { - "target": "Feature", - "returnProperties": ["name", "sourceId"], - "neighbors": 0, - } - ), + conn.query({"target": "Feature", "returnProperties": ["name", "sourceId"], "neighbors": 0}), ) for gene in genes: if gene["name"]: @@ -184,9 +160,7 @@ def match_category_variant( ) if not terms: - raise ValueError( - f"unable to find the term/category ({category}) or any equivalent" - ) + raise ValueError(f"unable to find the term/category ({category}) or any equivalent") # find the variant list return cast( @@ -201,12 +175,7 @@ def match_category_variant( ], }, "queryType": "similarTo", - "edges": [ - "AliasOf", - "DeprecatedBy", - "CrossReferenceOf", - "GeneralizationOf", - ], + "edges": ["AliasOf", "DeprecatedBy", "CrossReferenceOf", "GeneralizationOf"], "treeEdges": ["Infers"], "returnProperties": VARIANT_RETURN_PROPERTIES, }, @@ -216,11 +185,7 @@ def match_category_variant( def match_copy_variant( - conn: GraphKBConnection, - gene_name: str, - category: str, - drop_homozygous: bool = False, - **kwargs, + conn: GraphKBConnection, gene_name: str, category: str, drop_homozygous: bool = False, **kwargs ) -> List[Variant]: """ Returns a list of variants matching the input variant @@ -261,9 +226,7 @@ def match_expression_variant( def positions_overlap( - pos_record: BasicPosition, - range_start: BasicPosition, - range_end: Optional[BasicPosition] = None, + pos_record: BasicPosition, range_start: BasicPosition, range_end: Optional[BasicPosition] = None ) -> bool: """ Check if 2 Position records from GraphKB indicate an overlap @@ -387,14 +350,9 @@ def compare_positional_variants( reference_variant["untemplatedSeq"] not in AMBIGUOUS_AA and variant["untemplatedSeq"] not in AMBIGUOUS_AA ): - if ( - reference_variant["untemplatedSeq"].lower() - != variant["untemplatedSeq"].lower() - ): + if reference_variant["untemplatedSeq"].lower() != variant["untemplatedSeq"].lower(): return False - elif len(variant["untemplatedSeq"]) != len( - reference_variant["untemplatedSeq"] - ): + elif len(variant["untemplatedSeq"]) != len(reference_variant["untemplatedSeq"]): return False # If both variants have a reference sequence, @@ -478,9 +436,7 @@ def type_screening( if parsed.get("reference2", None): return parsed["type"] prefix = parsed.get("prefix", "g") - if ( - prefix == "y" - ): # Assuming all variations using cytoband coordiantes meet the size threshold + if prefix == "y": # Assuming all variations using cytoband coordiantes meet the size threshold return parsed["type"] # When size cannot be determined: exonic and intronic coordinates @@ -575,11 +531,7 @@ def match_positional_variant( gene1 = parsed["reference1"] gene1_features = get_equivalent_features( - conn, - gene1, - source=gene_source, - is_source_id=gene_is_source_id, - ignore_cache=ignore_cache, + conn, gene1, source=gene_source, is_source_id=gene_is_source_id, ignore_cache=ignore_cache ) features = convert_to_rid_list(gene1_features) @@ -630,15 +582,12 @@ def match_positional_variant( ] filtered_similarOnly: List[Record] = [] # For post filter match use - filtered_similarAndGeneric: List[Record] = ( - [] - ) # To be added to the matches at the very end + filtered_similarAndGeneric: List[Record] = [] # To be added to the matches at the very end for row in cast( List[Record], conn.query( - {"target": "PositionalVariant", "filters": query_filters}, - ignore_cache=ignore_cache, + {"target": "PositionalVariant", "filters": query_filters}, ignore_cache=ignore_cache ), ): # TODO: Check if variant and reference_variant should be interchanged @@ -661,12 +610,7 @@ def match_positional_variant( { "target": convert_to_rid_list(filtered_similarOnly), "queryType": "similarTo", - "edges": [ - "AliasOf", - "DeprecatedBy", - "CrossReferenceOf", - "GeneralizationOf", - ], + "edges": ["AliasOf", "DeprecatedBy", "CrossReferenceOf", "GeneralizationOf"], "treeEdges": ["Infers"], "returnProperties": POS_VARIANT_RETURN_PROPERTIES, }, diff --git a/pori_python/graphkb/statement.py b/pori_python/graphkb/statement.py index 032498b..c969e8f 100644 --- a/pori_python/graphkb/statement.py +++ b/pori_python/graphkb/statement.py @@ -1,11 +1,7 @@ from typing import List, cast from . import GraphKBConnection -from .constants import ( - FAILED_REVIEW_STATUS, - RELEVANCE_BASE_TERMS, - STATEMENT_RETURN_PROPERTIES, -) +from .constants import FAILED_REVIEW_STATUS, RELEVANCE_BASE_TERMS, STATEMENT_RETURN_PROPERTIES from .types import CategoryBaseTermMapping, Statement, Variant from .util import convert_to_rid_list from .vocab import get_terms_set @@ -27,9 +23,7 @@ def categorize_relevance( def get_statements_from_variants( - graphkb_conn: GraphKBConnection, - variants: List[Variant], - failed_review: bool = False, + graphkb_conn: GraphKBConnection, variants: List[Variant], failed_review: bool = False ) -> List[Statement]: """Given a list of variant records from GraphKB, return related statements. @@ -44,15 +38,10 @@ def get_statements_from_variants( statements = graphkb_conn.query( { "target": "Statement", - "filters": { - "conditions": convert_to_rid_list(variants), - "operator": "CONTAINSANY", - }, + "filters": {"conditions": convert_to_rid_list(variants), "operator": "CONTAINSANY"}, "returnProperties": STATEMENT_RETURN_PROPERTIES, } ) if not failed_review: - statements = [ - s for s in statements if s.get("reviewStatus") != FAILED_REVIEW_STATUS - ] + statements = [s for s in statements if s.get("reviewStatus") != FAILED_REVIEW_STATUS] return [cast(Statement, s) for s in statements] diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 64b82e0..5e46a7a 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -113,10 +113,7 @@ def __init__( self.url = url self.username = username self.password = password - self.headers = { - "Accept": "application/json", - "Content-Type": "application/json", - } + self.headers = {"Accept": "application/json", "Content-Type": "application/json"} self.cache: Dict[Any, Any] = {} if not use_global_cache else QUERY_CACHE self.request_count = 0 self.first_request: Optional[datetime] = None @@ -128,9 +125,7 @@ def __init__( def load(self) -> Optional[float]: if self.first_request and self.last_request: return ( - self.request_count - * 1000 - / millis_interval(self.first_request, self.last_request) + self.request_count * 1000 / millis_interval(self.first_request, self.last_request) ) return None @@ -271,9 +266,7 @@ def query( return self.cache[hash_code] while True: - content = self.post( - "query", data={**request_body, "limit": limit, "skip": len(result)} - ) + content = self.post("query", data={**request_body, "limit": limit, "skip": len(result)}) records = content["result"] result.extend(records) if len(records) < limit or not paginate: @@ -365,9 +358,7 @@ def stripRefSeq(breakRepr: str) -> str: return breakRepr -def stripDisplayName( - displayName: str, withRef: bool = True, withRefSeq: bool = True -) -> str: +def stripDisplayName(displayName: str, withRef: bool = True, withRefSeq: bool = True) -> str: match: object = re.search(r"^(.*)(\:)(.*)$", displayName) if match and not withRef: if withRefSeq: @@ -385,9 +376,7 @@ def stripDisplayName( while new_matches: new_matches = re.search(r"(.*)([A-Z]|\?)([0-9]+)(.*)", rest) if new_matches: - rest = ( - new_matches.group(1) + new_matches.group(3) + new_matches.group(4) - ) + rest = new_matches.group(1) + new_matches.group(3) + new_matches.group(4) # refSeq before '>' new_matches = re.search(r"^([0-9]*)([A-Z]*|\?)(\>)(.*)$", rest) @@ -403,9 +392,7 @@ def stripDisplayName( def stringifyVariant( - variant: Union[PositionalVariant, ParsedVariant], - withRef: bool = True, - withRefSeq: bool = True, + variant: Union[PositionalVariant, ParsedVariant], withRef: bool = True, withRefSeq: bool = True ) -> str: """ Convert variant record to a string representation (displayName/hgvs) @@ -471,12 +458,8 @@ def stringifyVariant( break2Repr_noParentheses = stripParentheses(break2Repr) result.append(f"({break1Repr_noParentheses},{break2Repr_noParentheses})") else: - break1Repr_noParentheses_noRefSeq = stripRefSeq( - stripParentheses(break1Repr) - ) - break2Repr_noParentheses_noRefSeq = stripRefSeq( - stripParentheses(break2Repr) - ) + break1Repr_noParentheses_noRefSeq = stripRefSeq(stripParentheses(break1Repr)) + break2Repr_noParentheses_noRefSeq = stripRefSeq(stripParentheses(break2Repr)) result.append( f"({break1Repr_noParentheses_noRefSeq},{break2Repr_noParentheses_noRefSeq})" ) diff --git a/pori_python/graphkb/vocab.py b/pori_python/graphkb/vocab.py index 1b6c609..51446db 100644 --- a/pori_python/graphkb/vocab.py +++ b/pori_python/graphkb/vocab.py @@ -24,9 +24,7 @@ def get_equivalent_terms( base_term_name: the name to get superclasses of root_exclude_term: the parent term to exlcude along with all of its parent terms """ - base_records = convert_to_rid_list( - conn.query(build_base_query(ontology_class, base_term_name)) - ) + base_records = convert_to_rid_list(conn.query(build_base_query(ontology_class, base_term_name))) if not base_records: return [] base_term_parents = cast( @@ -36,13 +34,7 @@ def get_equivalent_terms( "target": {"target": base_records, "queryType": "descendants"}, "queryType": "similarTo", "treeEdges": [], - "returnProperties": [ - "sourceId", - "sourceIdVersion", - "deprecated", - "name", - "@rid", - ], + "returnProperties": ["sourceId", "sourceIdVersion", "deprecated", "name", "@rid"], }, ignore_cache=ignore_cache, ), @@ -102,9 +94,7 @@ def get_term_tree( Note: this must be done in 2 calls to avoid going up and down the tree in a single query (exclude adjacent siblings) """ # get all child terms of the subclass tree and disambiguate them - base_records = convert_to_rid_list( - conn.query(build_base_query(ontology_class, base_term_name)) - ) + base_records = convert_to_rid_list(conn.query(build_base_query(ontology_class, base_term_name))) if not base_records: return [] child_terms = cast( @@ -114,13 +104,7 @@ def get_term_tree( "target": {"target": base_records, "queryType": "ancestors"}, "queryType": "similarTo", "treeEdges": [], - "returnProperties": [ - "sourceId", - "sourceIdVersion", - "deprecated", - "name", - "@rid", - ], + "returnProperties": ["sourceId", "sourceIdVersion", "deprecated", "name", "@rid"], }, ignore_cache=ignore_cache, ), @@ -192,9 +176,7 @@ def get_term_by_name( def get_terms_set( - graphkb_conn: GraphKBConnection, - base_terms: Iterable[str], - ignore_cache: bool = False, + graphkb_conn: GraphKBConnection, base_terms: Iterable[str], ignore_cache: bool = False ) -> Set[str]: """Get a set of vocabulary rids given some base/parent term names.""" base_terms = [base_terms] if isinstance(base_terms, str) else base_terms @@ -206,10 +188,7 @@ def get_terms_set( terms.update( convert_to_rid_list( get_term_tree( - graphkb_conn, - base_term, - include_superclasses=False, - ignore_cache=ignore_cache, + graphkb_conn, base_term, include_superclasses=False, ignore_cache=ignore_cache ) ) ) diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py index 92aff40..6ddea0b 100644 --- a/pori_python/ipr/annotate.py +++ b/pori_python/ipr/annotate.py @@ -17,13 +17,7 @@ from .constants import TMB_HIGH_CATEGORY from .ipr import convert_statements_to_alterations -from .types import ( - GkbStatement, - IprCopyVariant, - IprExprVariant, - IprStructuralVariant, - KbMatch, -) +from .types import GkbStatement, IprCopyVariant, IprExprVariant, IprStructuralVariant, KbMatch from .util import Hashabledict, convert_to_rid_set, logger REPORTED_COPY_VARIANTS = (INPUT_COPY_CATEGORIES.AMP, INPUT_COPY_CATEGORIES.DEEP) @@ -44,9 +38,7 @@ def get_second_pass_variants( } for reference1, variant_type in inferred_variants: - variants = gkb_match.match_category_variant( - graphkb_conn, reference1, variant_type - ) + variants = gkb_match.match_category_variant(graphkb_conn, reference1, variant_type) for variant in variants: all_inferred_matches[variant["@rid"]] = variant @@ -79,15 +71,11 @@ def get_ipr_statements_from_variants( inferred_statements = [ s for s in get_statements_from_variants(graphkb_conn, inferred_matches) - if s["@rid"] - not in existing_statements # do not duplicate if non-inferred match + if s["@rid"] not in existing_statements # do not duplicate if non-inferred match ] for ipr_row in convert_statements_to_alterations( - graphkb_conn, - inferred_statements, - disease_name, - convert_to_rid_set(inferred_matches), + graphkb_conn, inferred_statements, disease_name, convert_to_rid_set(inferred_matches) ): ipr_row["kbData"]["inferred"] = True rows.append(ipr_row) @@ -127,9 +115,7 @@ def annotate_expression_variants( try: matches = gkb_match.match_expression_variant(graphkb_conn, gene, variant) - for ipr_row in get_ipr_statements_from_variants( - graphkb_conn, matches, disease_name - ): + for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name): ipr_row["variant"] = row["key"] ipr_row["variantType"] = row.get("variantType", "exp") alterations.append(ipr_row) @@ -178,16 +164,12 @@ def annotate_copy_variants( if variant not in REPORTED_COPY_VARIANTS: # https://www.bcgsc.ca/jira/browse/GERO-77 skipped += 1 - logger.debug( - f"Dropping {gene} copy change '{variant}' - not in REPORTED_COPY_VARIANTS" - ) + logger.debug(f"Dropping {gene} copy change '{variant}' - not in REPORTED_COPY_VARIANTS") continue try: matches = gkb_match.match_copy_variant(graphkb_conn, gene, variant) - for ipr_row in get_ipr_statements_from_variants( - graphkb_conn, matches, disease_name - ): + for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name): ipr_row["variant"] = row["key"] ipr_row["variantType"] = row.get("variantType", "cnv") alterations.append(ipr_row) @@ -203,9 +185,7 @@ def annotate_copy_variants( ) if problem_genes: logger.error(f"gene finding failures for copy variants {sorted(problem_genes)}") - logger.error( - f"gene finding failure for {len(problem_genes)} copy variant genes" - ) + logger.error(f"gene finding failure for {len(problem_genes)} copy variant genes") logger.info( f"matched {len(variants)} copy category variants to {len(alterations)} graphkb annotations" ) @@ -262,9 +242,7 @@ def annotate_positional_variants( f"Assuming malformed deletion variant {variant} is {variant[:-2] + 'del'}" ) variant = variant[:-2] + "del" - matches = gkb_match.match_positional_variant( - graphkb_conn, variant - ) + matches = gkb_match.match_positional_variant(graphkb_conn, variant) else: raise parse_err @@ -297,9 +275,7 @@ def annotate_positional_variants( if problem_genes: logger.error(f"gene finding failures for {sorted(problem_genes)}") - logger.error( - f"{len(problem_genes)} gene finding failures for positional variants" - ) + logger.error(f"{len(problem_genes)} gene finding failures for positional variants") if errors: logger.error(f"skipped {errors} positional variants due to errors") @@ -335,10 +311,7 @@ def annotate_msi( "target": { "target": "CategoryVariant", "filters": { - "reference1": { - "target": "Signature", - "filters": {"name": msi_category}, - } + "reference1": {"target": "Signature", "filters": {"name": msi_category}} }, }, "queryType": "similarTo", @@ -346,9 +319,7 @@ def annotate_msi( } ) if msi_categories: - for ipr_row in get_ipr_statements_from_variants( - graphkb_conn, msi_categories, disease_name - ): + for ipr_row in get_ipr_statements_from_variants(graphkb_conn, msi_categories, disease_name): ipr_row["variant"] = msi_category ipr_row["variantType"] = "msi" gkb_matches.append(ipr_row) @@ -356,9 +327,7 @@ def annotate_msi( def annotate_tmb( - graphkb_conn: GraphKBConnection, - disease_name: str = "cancer", - category: str = TMB_HIGH_CATEGORY, + graphkb_conn: GraphKBConnection, disease_name: str = "cancer", category: str = TMB_HIGH_CATEGORY ) -> List[KbMatch]: """Annotate Tumour Mutation Burden (tmb) categories from GraphKB in the IPR alterations format. @@ -379,9 +348,7 @@ def annotate_tmb( "filters": { "reference1": { "target": "Signature", - "filters": { - "OR": [{"name": category}, {"displayName": category}] - }, + "filters": {"OR": [{"name": category}, {"displayName": category}]}, } }, }, @@ -390,9 +357,7 @@ def annotate_tmb( } ) if categories: - for ipr_row in get_ipr_statements_from_variants( - graphkb_conn, categories, disease_name - ): + for ipr_row in get_ipr_statements_from_variants(graphkb_conn, categories, disease_name): ipr_row["variant"] = category ipr_row["variantType"] = "tmb" gkb_matches.append(ipr_row) diff --git a/pori_python/ipr/connection.py b/pori_python/ipr/connection.py index 122c047..f74579a 100644 --- a/pori_python/ipr/connection.py +++ b/pori_python/ipr/connection.py @@ -95,12 +95,7 @@ def check_status(interval: int = 5, num_attempts: int = 5): f'async report upload failed with reason: {current_status["failedReason"]}' ) - if current_status["state"] not in [ - "active", - "ready", - "waiting", - "completed", - ]: + if current_status["state"] not in ["active", "ready", "waiting", "completed"]: raise Exception( f"async report upload in unexpected state: {current_status}" ) @@ -138,9 +133,7 @@ def set_analyst_comments(self, report_id: str, data: Dict) -> Dict: data=zlib.compress(json.dumps(data, allow_nan=False).encode("utf-8")), ) - def post_images( - self, report_id: str, files: Dict[str, str], data: Dict[str, str] = {} - ) -> None: + def post_images(self, report_id: str, files: Dict[str, str], data: Dict[str, str] = {}) -> None: """ Post images to the report """ @@ -171,9 +164,7 @@ def post_images( handler.close() start_index += IMAGE_MAX if image_errors: - raise ValueError( - f'Error uploading images ({", ".join(sorted(list(image_errors)))})' - ) + raise ValueError(f'Error uploading images ({", ".join(sorted(list(image_errors)))})') def get_spec(self) -> Dict: """ diff --git a/pori_python/ipr/constants.py b/pori_python/ipr/constants.py index 948abc9..c176ff3 100644 --- a/pori_python/ipr/constants.py +++ b/pori_python/ipr/constants.py @@ -1,14 +1,6 @@ DEFAULT_URL = "https://iprstaging-api.bcgsc.ca/api" -GERMLINE_BASE_TERMS = ( - "pharmacogenomic", - "cancer predisposition", -) # based on graphkb.constants -VARIANT_CLASSES = { - "Variant", - "CategoryVariant", - "PositionalVariant", - "CatalogueVariant", -} +GERMLINE_BASE_TERMS = ("pharmacogenomic", "cancer predisposition") # based on graphkb.constants +VARIANT_CLASSES = {"Variant", "CategoryVariant", "PositionalVariant", "CatalogueVariant"} # all possible values for review status are: ['pending', 'not required', 'passed', 'failed', 'initial'] FAILED_REVIEW_STATUS = "failed" diff --git a/pori_python/ipr/inputs.py b/pori_python/ipr/inputs.py index 628428a..fc1c0b6 100644 --- a/pori_python/ipr/inputs.py +++ b/pori_python/ipr/inputs.py @@ -430,9 +430,7 @@ def check_variant_links( for variant in expression_variants: gene = variant["gene"] if not gene: - logger.error( - "expression_variant data cannot be applied to an empty genename" - ) + logger.error("expression_variant data cannot be applied to an empty genename") elif variant["variant"]: genes_with_variants.add(gene) @@ -478,14 +476,14 @@ def check_variant_links( if missing_information_genes: for err_msg in sorted(missing_information_errors): logger.debug(err_msg) - link_err_msg = f"Missing information variant links on {len(missing_information_genes)} genes" + link_err_msg = ( + f"Missing information variant links on {len(missing_information_genes)} genes" + ) logger.warning(link_err_msg) return genes_with_variants -def check_comparators( - content: Dict, expresssionVariants: List[IprExprVariant] = [] -) -> None: +def check_comparators(content: Dict, expresssionVariants: List[IprExprVariant] = []) -> None: """ Given the optional content dictionary, check that based on the analyses present the correct/sufficient comparators have also been specified @@ -573,9 +571,7 @@ def check_null(checker, instance): type_checker = validator_class.TYPE_CHECKER.redefine("null", check_null) return jsonschema.validators.extend( - validator_class, - validators={"properties": set_defaults}, - type_checker=type_checker, + validator_class, validators={"properties": set_defaults}, type_checker=type_checker ) diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py index 7361042..efa3417 100644 --- a/pori_python/ipr/ipr.py +++ b/pori_python/ipr/ipr.py @@ -10,14 +10,7 @@ from pori_python.graphkb import vocab as gkb_vocab from .constants import GERMLINE_BASE_TERMS, VARIANT_CLASSES -from .types import ( - GkbStatement, - ImageDefinition, - IprFusionVariant, - IprGene, - IprVariant, - KbMatch, -) +from .types import GkbStatement, ImageDefinition, IprFusionVariant, IprGene, IprVariant, KbMatch from .util import find_variant, logger @@ -40,13 +33,9 @@ def filter_structural_variants( Filter structural variants to remove non-high quality events unless they are matched/annotated or they involve a gene that is a known fusion partner """ - matched_svs = { - match["variant"] for match in kb_matches if match["variantType"] == "sv" - } + matched_svs = {match["variant"] for match in kb_matches if match["variantType"] == "sv"} fusion_genes = { - gene["name"] - for gene in gene_annotations - if gene.get("knownFusionPartner", False) + gene["name"] for gene in gene_annotations if gene.get("knownFusionPartner", False) } result = [] @@ -84,15 +73,11 @@ def get_evidencelevel_mapping(graphkb_conn: GraphKBConnection) -> Dict[str, str] # Filter IPR EvidenceLevel and map each outgoing CrossReferenceOf to displayName ipr_source_rid = graphkb_conn.get_source("ipr")["@rid"] - ipr_evidence_levels = filter( - lambda d: d.get("source") == ipr_source_rid, evidence_levels - ) + ipr_evidence_levels = filter(lambda d: d.get("source") == ipr_source_rid, evidence_levels) cross_references_mapping: Dict[str, str] = dict() ipr_rids_to_displayname = dict() for level in ipr_evidence_levels: - d = map( - lambda i: (i, level["displayName"]), level.get("out_CrossReferenceOf", []) - ) + d = map(lambda i: (i, level["displayName"]), level.get("out_CrossReferenceOf", [])) cross_references_mapping.update(d) ipr_rids_to_displayname[level["@rid"]] = level["displayName"] @@ -136,9 +121,7 @@ def convert_statements_to_alterations( """ disease_matches = { r["@rid"] - for r in gkb_vocab.get_term_tree( - graphkb_conn, disease_name, ontology_class="Disease" - ) + for r in gkb_vocab.get_term_tree(graphkb_conn, disease_name, ontology_class="Disease") } if not disease_matches: @@ -151,9 +134,7 @@ def convert_statements_to_alterations( # get the recruitment status for any trial associated with a statement clinical_trials = [ - s["subject"]["@rid"] - for s in statements - if s["subject"]["@class"] == "ClinicalTrial" + s["subject"]["@rid"] for s in statements if s["subject"]["@class"] == "ClinicalTrial" ] recruitment_statuses = {} if clinical_trials: @@ -169,9 +150,7 @@ def convert_statements_to_alterations( recruitment_statuses[rid] = query_result[0]["recruitmentStatus"] for statement in statements: - variants = [ - c for c in statement["conditions"] if c["@class"] in VARIANT_CLASSES - ] + variants = [c for c in statement["conditions"] if c["@class"] in VARIANT_CLASSES] diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"] disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches pmid = ";".join([e["displayName"] for e in statement["evidence"]]) @@ -191,12 +170,8 @@ def convert_statements_to_alterations( evidence_level_str = display_evidence_levels(statement) evidence_levels = statement.get("evidenceLevel") or [] - ipr_evidence_levels = [ - ev_map[el.get("@rid", "")] for el in evidence_levels if el - ] - ipr_evidence_levels_str = ";".join( - sorted(set([el for el in ipr_evidence_levels])) - ) + ipr_evidence_levels = [ev_map[el.get("@rid", "")] for el in evidence_levels if el] + ipr_evidence_levels_str = ";".join(sorted(set([el for el in ipr_evidence_levels]))) for variant in variants: if variant["@rid"] not in variant_matches: @@ -206,13 +181,9 @@ def convert_statements_to_alterations( "approvedTherapy": approved_therapy, "category": ipr_section or "unknown", "context": ( - statement["subject"]["displayName"] - if statement["subject"] - else None - ), - "kbContextId": ( - statement["subject"]["@rid"] if statement["subject"] else None + statement["subject"]["displayName"] if statement["subject"] else None ), + "kbContextId": (statement["subject"]["@rid"] if statement["subject"] else None), "disease": ";".join(sorted(d["displayName"] for d in diseases)), "evidenceLevel": evidence_level_str, "iprEvidenceLevel": ipr_evidence_levels_str, @@ -273,9 +244,7 @@ def select_expression_plots( gene = str(variant.get("gene", "")) hist = str(variant.get("histogramImage", "")) if hist: - images_by_gene[gene] = ImageDefinition( - {"key": f"expDensity.{gene}", "path": hist} - ) + images_by_gene[gene] = ImageDefinition({"key": f"expDensity.{gene}", "path": hist}) return [images_by_gene[gene] for gene in selected_genes if gene in images_by_gene] @@ -318,9 +287,7 @@ def create_key_alterations( counts[type_mapping[variant_type]].add(variant_key) if variant_type == "exp": - alterations.append( - f'{variant.get("gene","")} ({variant.get("expressionState")})' - ) + alterations.append(f'{variant.get("gene","")} ({variant.get("expressionState")})') elif variant_type == "cnv": alterations.append(f'{variant.get("gene","")} ({variant.get("cnvState")})') # only show germline if relevant @@ -344,9 +311,7 @@ def create_key_alterations( def germline_kb_matches( - kb_matches: List[KbMatch], - all_variants: Sequence[IprVariant], - assume_somatic: bool = True, + kb_matches: List[KbMatch], all_variants: Sequence[IprVariant], assume_somatic: bool = True ) -> List[KbMatch]: """Filter kb_matches for matching to germline or somatic events using the 'germline' optional property. @@ -397,9 +362,7 @@ def germline_kb_matches( # Remove any matches to germline events for alt in somatic_alts: var_list = [v for v in all_variants if v["key"] == alt["variant"]] - somatic_var_list = [ - v for v in var_list if not v.get("germline", not assume_somatic) - ] + somatic_var_list = [v for v in var_list if not v.get("germline", not assume_somatic)] if var_list and not somatic_var_list: logger.debug( f"Dropping germline match to somatic statement kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}" @@ -407,8 +370,6 @@ def germline_kb_matches( elif somatic_var_list: ret_list.append(alt) # match to somatic variant else: - ret_list.append( - alt - ) # alteration not in any specific keys matches to check. + ret_list.append(alt) # alteration not in any specific keys matches to check. return ret_list diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index 8c08f24..4f6c523 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -49,9 +49,7 @@ def file_path(path: str) -> str: if not os.path.exists(path): - raise argparse.ArgumentTypeError( - f"{repr(path)} is not a valid filename. does not exist" - ) + raise argparse.ArgumentTypeError(f"{repr(path)} is not a valid filename. does not exist") return path @@ -68,9 +66,7 @@ def command_interface() -> None: default=os.environ.get("USER"), help="username to use connecting to graphkb/ipr", ) - req.add_argument( - "--password", required=True, help="password to use connecting to graphkb/ipr" - ) + req.add_argument("--password", required=True, help="password to use connecting to graphkb/ipr") req.add_argument( "-c", "--content", required=True, type=file_path, help="Report Content as JSON" ) @@ -78,10 +74,7 @@ def command_interface() -> None: parser.add_argument("--graphkb_url", default=None) parser.add_argument("--log_level", default="info", choices=LOG_LEVELS.keys()) parser.add_argument( - "--therapeutics", - default=False, - help="Generate therapeutic options", - action="store_true", + "--therapeutics", default=False, help="Generate therapeutic options", action="store_true" ) parser.add_argument( "--skip_comments", @@ -90,9 +83,7 @@ def command_interface() -> None: help="Turn off generating the analyst comments section of the report", ) parser.add_argument( - "-o", - "--output_json_path", - help="path to a JSON to output the report upload body", + "-o", "--output_json_path", help="path to a JSON to output the report upload body" ) parser.add_argument( "-w", @@ -133,9 +124,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict and "genesCreate" in ipr_spec["components"]["schemas"].keys() and "properties" in ipr_spec["components"]["schemas"]["genesCreate"].keys() ): - genes_spec = ipr_spec["components"]["schemas"]["genesCreate"][ - "properties" - ].keys() + genes_spec = ipr_spec["components"]["schemas"]["genesCreate"]["properties"].keys() # check what ipr report upload expects and adjust contents to match for old_name, new_name in RENAMED_GENE_PROPERTIES.items(): @@ -170,9 +159,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict removed_keys[key] = 1 gene.pop(key) for key, count in removed_keys.items(): - logger.warning( - f"IPR unsupported property '{key}' removed from {count} genes." - ) + logger.warning(f"IPR unsupported property '{key}' removed from {count} genes.") drop_columns = ["variant", "variantType", "histogramImage"] # DEVSU-2034 - use a 'displayName' @@ -188,9 +175,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict for variant in upload_content.get(variant_list_section, []): if not variant.get("displayName"): variant["displayName"] = ( - variant.get("variant") - or variant.get("kbCategory") - or variant.get("key", "") + variant.get("variant") or variant.get("kbCategory") or variant.get("key", "") ) if variant_list_section == "probeResults": # currently probeResults will error if they do NOT have a 'variant' column. @@ -213,9 +198,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict def create_report(**kwargs) -> Dict: - logger.warning( - "Deprecated function 'create_report' called - use ipr_report instead" - ) + logger.warning("Deprecated function 'create_report' called - use ipr_report instead") return ipr_report(**kwargs) @@ -270,22 +253,16 @@ def ipr_report( try: validate_report_content(content) except jsonschema.exceptions.ValidationError as err: - logger.error( - "Failed schema check - report variants may be corrupted or unmatched." - ) + logger.error("Failed schema check - report variants may be corrupted or unmatched.") logger.error(f"Failed schema check: {err}") kb_disease_match = content["kbDiseaseMatch"] # validate the input variants small_mutations = preprocess_small_mutations(content.get("smallMutations", [])) - structural_variants = preprocess_structural_variants( - content.get("structuralVariants", []) - ) + structural_variants = preprocess_structural_variants(content.get("structuralVariants", [])) copy_variants = preprocess_copy_variants(content.get("copyVariants", [])) - expression_variants = preprocess_expression_variants( - content.get("expressionVariants", []) - ) + expression_variants = preprocess_expression_variants(content.get("expressionVariants", [])) if expression_variants: check_comparators(content, expression_variants) @@ -328,9 +305,7 @@ def ipr_report( tmb["kbCategory"] = TMB_HIGH_CATEGORY # GERO-296 - try matching to graphkb - tmb_matches = annotate_tmb( - graphkb_conn, kb_disease_match, TMB_HIGH_CATEGORY - ) + tmb_matches = annotate_tmb(graphkb_conn, kb_disease_match, TMB_HIGH_CATEGORY) if tmb_matches: tmb_variant["kbCategory"] = TMB_HIGH_CATEGORY # type: ignore tmb_variant["variant"] = TMB_HIGH_CATEGORY @@ -361,9 +336,7 @@ def ipr_report( msi_variant["variant"] = msi_cat msi_variant["key"] = msi_cat msi_variant["variantType"] = "msi" - logger.info( - f"GERO-295 '{msi_cat}' matches {len(msi_matches)} msi statements." - ) + logger.info(f"GERO-295 '{msi_cat}' matches {len(msi_matches)} msi statements.") gkb_matches.extend(msi_matches) logger.debug(f"\tgkb_matches: {len(gkb_matches)}") @@ -378,10 +351,7 @@ def ipr_report( logger.info(f"annotating {len(structural_variants)} structural variants") gkb_matches.extend( annotate_positional_variants( - graphkb_conn, - structural_variants, - kb_disease_match, - show_progress=interactive, + graphkb_conn, structural_variants, kb_disease_match, show_progress=interactive ) ) logger.debug(f"\tgkb_matches: {len(gkb_matches)}") @@ -397,10 +367,7 @@ def ipr_report( logger.info(f"annotating {len(expression_variants)} expression variants") gkb_matches.extend( annotate_expression_variants( - graphkb_conn, - expression_variants, - kb_disease_match, - show_progress=interactive, + graphkb_conn, expression_variants, kb_disease_match, show_progress=interactive ) ) logger.debug(f"\tgkb_matches: {len(gkb_matches)}") @@ -412,14 +379,10 @@ def ipr_report( if tmb_matches: all_variants.append(tmb_variant) # type: ignore - if ( - match_germline - ): # verify germline kb statements matched germline observed variants + if match_germline: # verify germline kb statements matched germline observed variants gkb_matches = germline_kb_matches(gkb_matches, all_variants) if gkb_matches: - logger.info( - f"Removing {len(gkb_matches)} germline events without medical matches." - ) + logger.info(f"Removing {len(gkb_matches)} germline events without medical matches.") if custom_kb_match_filter: logger.info(f"custom_kb_match_filter on {len(gkb_matches)} variants") @@ -441,10 +404,7 @@ def ipr_report( if generate_comments: comments = { "comments": summarize( - graphkb_conn, - gkb_matches, - disease_name=kb_disease_match, - variants=all_variants, + graphkb_conn, gkb_matches, disease_name=kb_disease_match, variants=all_variants ) } else: @@ -456,9 +416,7 @@ def ipr_report( { "kbMatches": [trim_empty_values(a) for a in gkb_matches], "copyVariants": [ - trim_empty_values(c) - for c in copy_variants - if c["gene"] in genes_with_variants + trim_empty_values(c) for c in copy_variants if c["gene"] in genes_with_variants ], "smallMutations": [trim_empty_values(s) for s in small_mutations], "expressionVariants": [ @@ -482,9 +440,7 @@ def ipr_report( "therapeuticTarget": targets, } ) - output.setdefault("images", []).extend( - select_expression_plots(gkb_matches, all_variants) - ) + output.setdefault("images", []).extend(select_expression_plots(gkb_matches, all_variants)) output = clean_unsupported_content(output, ipr_spec) ipr_result = None diff --git a/pori_python/ipr/summary.py b/pori_python/ipr/summary.py index a91f57e..36783f7 100644 --- a/pori_python/ipr/summary.py +++ b/pori_python/ipr/summary.py @@ -47,8 +47,7 @@ def natural_join(word_list: List[str]) -> str: def natural_join_records( - records: Sequence[Record], - covert_to_word: Callable[[Dict], str] = lambda x: x["displayName"], + records: Sequence[Record], covert_to_word: Callable[[Dict], str] = lambda x: x["displayName"] ) -> str: word_list = sorted(list({covert_to_word(rec) for rec in records})) return natural_join(word_list) @@ -97,15 +96,7 @@ def merge_diseases(diseases: List[Ontology]) -> str: [d["@class"] == "Disease" for d in diseases] ): words = sorted( - list( - set( - [ - s["displayName"] - for s in diseases - if s["@rid"] in disease_matches - ] - ) - ) + list(set([s["displayName"] for s in diseases if s["@rid"] in disease_matches])) ) words.append(OTHER_DISEASES) return natural_join(words) @@ -115,29 +106,19 @@ def merge_diseases(diseases: List[Ontology]) -> str: if r"{subject}" in template: # remove subject from the conditions replacements subjects_ids = convert_to_rid_set(subjects) - disease_conditions = [ - d for d in disease_conditions if d["@rid"] not in subjects_ids - ] - variant_conditions = [ - d for d in variant_conditions if d["@rid"] not in subjects_ids - ] - other_conditions = [ - d for d in other_conditions if d["@rid"] not in subjects_ids - ] + disease_conditions = [d for d in disease_conditions if d["@rid"] not in subjects_ids] + variant_conditions = [d for d in variant_conditions if d["@rid"] not in subjects_ids] + other_conditions = [d for d in other_conditions if d["@rid"] not in subjects_ids] result = result.replace(r"{subject}", merge_diseases(subjects)) if r"{conditions:disease}" in template: - result = result.replace( - r"{conditions:disease}", merge_diseases(disease_conditions) - ) + result = result.replace(r"{conditions:disease}", merge_diseases(disease_conditions)) else: other_conditions.extend(disease_conditions) if r"{conditions:variant}" in template: - result = result.replace( - r"{conditions:variant}", natural_join_records(variant_conditions) - ) + result = result.replace(r"{conditions:variant}", natural_join_records(variant_conditions)) else: other_conditions.extend(variant_conditions) @@ -168,9 +149,7 @@ def aggregate_statements( def generate_key(statement: GkbStatement) -> Tuple: result = [ cond["displayName"] - for cond in filter_by_record_class( - statement["conditions"], "Disease", exclude=True - ) + for cond in filter_by_record_class(statement["conditions"], "Disease", exclude=True) if cond["@rid"] != statement["subject"]["@rid"] ] if statement.get("subject", {}).get("@class", "Disease") != "Disease": @@ -230,9 +209,7 @@ def display_variant(variant: IprVariant) -> str: # Use chosen legacy 'proteinChange' or an hgvs description of lowest detail. hgvs = variant.get( "proteinChange", - variant.get( - "hgvsProtein", variant.get("hgvsCds", variant.get("hgvsGenomic", "")) - ), + variant.get("hgvsProtein", variant.get("hgvsCds", variant.get("hgvsGenomic", ""))), ) if gene and hgvs: @@ -244,16 +221,14 @@ def display_variant(variant: IprVariant) -> str: def display_variants(gene_name: str, variants: List[IprVariant]) -> str: - result = sorted( - list({v for v in [display_variant(e) for e in variants] if gene_name in v}) - ) + result = sorted(list({v for v in [display_variant(e) for e in variants] if gene_name in v})) variants_text = natural_join(result) if len(result) > 1: - return f"Multiple variants of the gene {gene_name} were observed in this case: {variants_text}" - elif result: return ( - f"{variants_text[0].upper()}{variants_text[1:]} was observed in this case." + f"Multiple variants of the gene {gene_name} were observed in this case: {variants_text}" ) + elif result: + return f"{variants_text[0].upper()}{variants_text[1:]} was observed in this case." return "" @@ -274,9 +249,7 @@ def create_section_html( for statement_id, sentence in sentences_by_statement_id.items(): relevance = statements[statement_id]["relevance"]["@rid"] category = categorize_relevance( - graphkb_conn, - relevance, - RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])], + graphkb_conn, relevance, RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])] ) sentence_categories[sentence] = category @@ -287,12 +260,7 @@ def create_section_html( "target": "Feature", "filters": { "AND": [ - { - "source": { - "target": "Source", - "filters": {"name": "entrez gene"}, - } - }, + {"source": {"target": "Source", "filters": {"name": "entrez gene"}}}, {"name": gene_name}, {"biotype": "gene"}, ] @@ -326,22 +294,11 @@ def create_section_html( for section in [ {s for (s, v) in sentence_categories.items() if v == "diagnostic"}, {s for (s, v) in sentence_categories.items() if v == "biological"}, + {s for (s, v) in sentence_categories.items() if v in ["therapeutic", "prognostic"]}, { s for (s, v) in sentence_categories.items() - if v in ["therapeutic", "prognostic"] - }, - { - s - for (s, v) in sentence_categories.items() - if v - not in [ - "diagnostic", - "biological", - "therapeutic", - "prognostic", - "resistance", - ] + if v not in ["diagnostic", "biological", "therapeutic", "prognostic", "resistance"] }, {s for (s, v) in sentence_categories.items() if v == "resistance"}, ]: @@ -412,14 +369,10 @@ def summarize( # aggregate similar sentences sentences = {} for template, group in templates.items(): - sentences.update( - aggregate_statements(graphkb_conn, template, group, disease_matches) - ) + sentences.update(aggregate_statements(graphkb_conn, template, group, disease_matches)) # section statements by genes - statements_by_genes = section_statements_by_genes( - graphkb_conn, list(statements.values()) - ) + statements_by_genes = section_statements_by_genes(graphkb_conn, list(statements.values())) output: List[str] = [ "

The comments below were automatically generated from matches to GraphKB and have not been manually reviewed

" diff --git a/pori_python/ipr/therapeutic_options.py b/pori_python/ipr/therapeutic_options.py index d1cc1f8..c83d650 100644 --- a/pori_python/ipr/therapeutic_options.py +++ b/pori_python/ipr/therapeutic_options.py @@ -17,9 +17,7 @@ def create_therapeutic_options( - graphkb_conn: GraphKBConnection, - kb_matches: List[KbMatch], - variants: Sequence[IprVariant], + graphkb_conn: GraphKBConnection, kb_matches: List[KbMatch], variants: Sequence[IprVariant] ) -> List[Dict]: """ Generate therapeutic options summary from the list of kb-matches diff --git a/pori_python/ipr/util.py b/pori_python/ipr/util.py index 0b57de5..049e9cd 100644 --- a/pori_python/ipr/util.py +++ b/pori_python/ipr/util.py @@ -32,9 +32,7 @@ def get_terms_set(graphkb_conn: GraphKBConnection, base_terms: List[str]) -> Set terms = set() for base_term in base_terms: terms.update( - convert_to_rid_set( - get_term_tree(graphkb_conn, base_term, include_superclasses=False) - ) + convert_to_rid_set(get_term_tree(graphkb_conn, base_term, include_superclasses=False)) ) return terms @@ -71,9 +69,7 @@ def create_variant_name_tuple(variant: IprVariant) -> Tuple[str, str]: elif variant_type == "cnv": return (gene, str(variant.get("cnvState", ""))) variant_split = ( - variant["variant"].split(":", 1)[1] - if ":" in variant["variant"] - else variant["variant"] + variant["variant"].split(":", 1)[1] if ":" in variant["variant"] else variant["variant"] ) gene2 = str(variant.get("gene2", "")) @@ -97,9 +93,7 @@ def find_variant( raise KeyError(f"expected variant ({variant_key}, {variant_type}) does not exist") -def generate_ontology_preference_key( - record: Ontology, sources_sort: Dict[str, int] = {} -) -> Tuple: +def generate_ontology_preference_key(record: Ontology, sources_sort: Dict[str, int] = {}) -> Tuple: """Generate a tuple key for comparing preferred ontology terms.""" return ( record.get("name") == record.get("sourceId"), @@ -129,9 +123,7 @@ def get_preferred_drug_representation( source_preference = { r["@rid"]: r["sort"] - for r in graphkb_conn.query( - {"target": "Source", "returnProperties": ["sort", "@rid"]} - ) + for r in graphkb_conn.query({"target": "Source", "returnProperties": ["sort", "@rid"]}) } drugs = sorted( get_alternatives(graphkb_conn, drug_record_id), @@ -147,12 +139,8 @@ def get_preferred_gene_name( record = graphkb_conn.get_record_by_id(record_id) biotype = record.get("biotype", "") genes = [] - expanded_gene_names = graphkb_conn.query( - {"target": [record_id], "neighbors": neighbors} - ) - assert ( - len(expanded_gene_names) == 1 - ), "get_preferred_gene_name should have single result" + expanded_gene_names = graphkb_conn.query({"target": [record_id], "neighbors": neighbors}) + assert len(expanded_gene_names) == 1, "get_preferred_gene_name should have single result" expanded: Dict[str, List] = expanded_gene_names[0] # type: ignore if biotype != "gene": for edge in expanded.get("out_ElementOf", []): diff --git a/tests/test_graphkb/data.py b/tests/test_graphkb/data.py index c0f69d8..0b7b6f1 100644 --- a/tests/test_graphkb/data.py +++ b/tests/test_graphkb/data.py @@ -48,31 +48,19 @@ # ambiguous structural variations -> non-structural "FGFR3:c.1200dup": { "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]}, - "does_not_matches": { - "displayName": ["FGFR3 rearrangement"], - "type": ["rearrangement"], - }, + "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]}, }, "FGFR3:c.1200_1201insA": { "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]}, - "does_not_matches": { - "displayName": ["FGFR3 rearrangement"], - "type": ["rearrangement"], - }, + "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]}, }, "FGFR3:g.5000del": { "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]}, - "does_not_matches": { - "displayName": ["FGFR3 rearrangement"], - "type": ["rearrangement"], - }, + "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]}, }, "FGFR3:c.1200delinsA": { "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]}, - "does_not_matches": { - "displayName": ["FGFR3 rearrangement"], - "type": ["rearrangement"], - }, + "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]}, }, "STK11:e.1_100del": { "matches": {"displayName": ["STK11 mutation"], "type": ["mutation"]}, @@ -85,9 +73,6 @@ # non-structural variations "FGFR3:c.1200C>A": { "matches": {"displayName": ["FGFR3 mutation"], "type": ["mutation"]}, - "does_not_matches": { - "displayName": ["FGFR3 rearrangement"], - "type": ["rearrangement"], - }, + "does_not_matches": {"displayName": ["FGFR3 rearrangement"], "type": ["rearrangement"]}, }, } diff --git a/tests/test_graphkb/test_genes.py b/tests/test_graphkb/test_genes.py index efd5506..7788836 100644 --- a/tests/test_graphkb/test_genes.py +++ b/tests/test_graphkb/test_genes.py @@ -168,18 +168,14 @@ def test_get_gene_linked_pharmacogenomic_info(conn): assert False, f"No rid found for a pharmacogenomic with {gene}" -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") def test_get_cancer_predisposition_info(conn): genes, matches = get_cancer_predisposition_info(conn) for gene in CANCER_PREDISP_INITIAL_GENES: assert gene in genes, f"{gene} not found in get_cancer_predisposition_info" -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") def test_get_gene_linked_cancer_predisposition_info(conn): genes, matches = get_gene_linked_cancer_predisposition_info(conn) for gene in CANCER_PREDISP_INITIAL_GENES: @@ -196,9 +192,7 @@ def test_get_preferred_gene_name_kras(alt_rep, conn): ), f"Expected KRAS as preferred gene name for {alt_rep}, not '{gene_name}'" -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") def test_find_genes_by_variant_type_structural_variant(conn): result = get_genes_from_variant_types(conn, ["structural variant"]) names = {row["name"] for row in result} @@ -206,9 +200,7 @@ def test_find_genes_by_variant_type_structural_variant(conn): assert gene in names, f"{gene} was not identified as a structural variant gene." -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") def test_find_no_genes_by_variant_type_with_nonmatching_source_record_id(conn): refseq_id = get_rid(conn, target="source", name="refseq") result = get_genes_from_variant_types( @@ -217,9 +209,7 @@ def test_find_no_genes_by_variant_type_with_nonmatching_source_record_id(conn): assert not result -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") def test_get_therapeutic_associated_genes(conn): gene_list = get_therapeutic_associated_genes(graphkb_conn=conn) assert gene_list, "No get_therapeutic_associated_genes found" @@ -231,9 +221,7 @@ def test_get_therapeutic_associated_genes(conn): assert gene in names, f"{gene} not found by get_therapeutic_associated_genes" -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") def test_get_gene_information(conn): gene_info = get_gene_information( conn, diff --git a/tests/test_graphkb/test_match.py b/tests/test_graphkb/test_match.py index 3df10e3..0a466f6 100644 --- a/tests/test_graphkb/test_match.py +++ b/tests/test_graphkb/test_match.py @@ -76,11 +76,7 @@ def test_checks_by_source_id_kras(self, conn): kras = [ f["displayName"] for f in match.get_equivalent_features( - conn, - "nm_033360", - source="refseq", - source_id_version="4", - is_source_id=True, + conn, "nm_033360", source="refseq", source_id_version="4", is_source_id=True ) ] assert "KRAS" in kras @@ -93,14 +89,10 @@ def test_bad_category(self, conn): def test_bad_gene_name(self, conn): with pytest.raises(FeatureNotFoundError): - match.match_copy_variant( - conn, "not a real gene name", match.INPUT_COPY_CATEGORIES.AMP - ) + match.match_copy_variant(conn, "not a real gene name", match.INPUT_COPY_CATEGORIES.AMP) def test_known_loss(self, conn): - matches = match.match_copy_variant( - conn, "CDKN2A", match.INPUT_COPY_CATEGORIES.ANY_LOSS - ) + matches = match.match_copy_variant(conn, "CDKN2A", match.INPUT_COPY_CATEGORIES.ANY_LOSS) assert matches types_selected = {record["type"]["name"] for record in matches} @@ -150,9 +142,7 @@ def test_known_gain(self, conn): EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" ) def test_low_gain_excludes_amplification(self, conn): - matches = match.match_copy_variant( - conn, "KRAS", match.INPUT_COPY_CATEGORIES.GAIN - ) + matches = match.match_copy_variant(conn, "KRAS", match.INPUT_COPY_CATEGORIES.GAIN) types_selected = {record["type"]["name"] for record in matches} @@ -164,13 +154,9 @@ def test_low_gain_excludes_amplification(self, conn): assert not has_prefix(variant_type, DECREASE_PREFIXES) -@pytest.mark.parametrize( - "pos1,pos2_start,pos2_end", [[3, 2, 5], [2, None, 5], [3, 2, None]] -) +@pytest.mark.parametrize("pos1,pos2_start,pos2_end", [[3, 2, 5], [2, None, 5], [3, 2, None]]) def test_range_overlap(pos1, pos2_start, pos2_end): - assert match.positions_overlap( - {"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end} - ) + assert match.positions_overlap({"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end}) @pytest.mark.parametrize( @@ -178,9 +164,7 @@ def test_range_overlap(pos1, pos2_start, pos2_end): [[2, 4, 5], [5, 2, 3], [10, None, 9], [10, 11, None], [1, 2, 2], [2, 1, 1]], ) def test_range_not_overlap(pos1, pos2_start, pos2_end): - assert not match.positions_overlap( - {"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end} - ) + assert not match.positions_overlap({"pos": pos1}, {"pos": pos2_start}, {"pos": pos2_end}) @pytest.mark.parametrize("pos1", [None, 1]) @@ -218,9 +202,7 @@ def test_known_reduced_expression(self, conn): assert not has_prefix(variant_type, INCREASE_PREFIXES) def test_known_reduced_expression_gene_id(self, conn): - gene_id = conn.query({"target": "Feature", "filters": [{"name": "PTEN"}]})[0][ - "@rid" - ] + gene_id = conn.query({"target": "Feature", "filters": [{"name": "PTEN"}]})[0]["@rid"] matches = match.match_expression_variant( conn, gene_id, match.INPUT_EXPRESSION_CATEGORIES.DOWN ) @@ -238,9 +220,7 @@ def test_known_reduced_expression_gene_id(self, conn): EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" ) def test_known_increased_expression(self, conn): - matches = match.match_expression_variant( - conn, "CA9", match.INPUT_EXPRESSION_CATEGORIES.UP - ) + matches = match.match_expression_variant(conn, "CA9", match.INPUT_EXPRESSION_CATEGORIES.UP) assert matches types_selected = {record["type"]["name"] for record in matches} @@ -259,12 +239,10 @@ def test_nonspecific_altseq(self): ) # null matches anything assert match.compare_positional_variants( - {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}, - {"break1Start": {"pos": 1}}, + {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}, {"break1Start": {"pos": 1}} ) assert match.compare_positional_variants( - {"break1Start": {"pos": 1}}, - {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}, + {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"} ) @pytest.mark.parametrize("seq1", ["T", "X", "?"]) @@ -300,18 +278,15 @@ def test_nonspecific_refseq(self): def test_ambiguous_refseq(self, seq1, seq2): # ambiguous AA matches anything the same length assert match.compare_positional_variants( - {"break1Start": {"pos": 1}, "refSeq": seq1}, - {"break1Start": {"pos": 1}, "refSeq": seq2}, + {"break1Start": {"pos": 1}, "refSeq": seq1}, {"break1Start": {"pos": 1}, "refSeq": seq2} ) def test_refseq_length_mismatch(self): assert not match.compare_positional_variants( - {"break1Start": {"pos": 1}, "refSeq": "??"}, - {"break1Start": {"pos": 1}, "refSeq": "T"}, + {"break1Start": {"pos": 1}, "refSeq": "??"}, {"break1Start": {"pos": 1}, "refSeq": "T"} ) assert not match.compare_positional_variants( - {"break1Start": {"pos": 1}, "refSeq": "?"}, - {"break1Start": {"pos": 1}, "refSeq": "TT"}, + {"break1Start": {"pos": 1}, "refSeq": "?"}, {"break1Start": {"pos": 1}, "refSeq": "TT"} ) def test_diff_altseq(self): @@ -328,14 +303,12 @@ def test_same_altseq_matches(self): def test_diff_refseq(self): assert not match.compare_positional_variants( - {"break1Start": {"pos": 1}, "refSeq": "M"}, - {"break1Start": {"pos": 1}, "refSeq": "R"}, + {"break1Start": {"pos": 1}, "refSeq": "M"}, {"break1Start": {"pos": 1}, "refSeq": "R"} ) def test_same_refseq_matches(self): assert match.compare_positional_variants( - {"break1Start": {"pos": 1}, "refSeq": "R"}, - {"break1Start": {"pos": 1}, "refSeq": "R"}, + {"break1Start": {"pos": 1}, "refSeq": "R"}, {"break1Start": {"pos": 1}, "refSeq": "R"} ) def test_range_vs_sub(self): @@ -389,9 +362,7 @@ def test_bad_gene2_name(self, conn): match.match_positional_variant(conn, "(BCR,ME-AS-A-GENE):fusion(e.13,e.3)") def test_match_explicit_reference1(self, conn): - reference1 = conn.query({"target": "Feature", "filters": {"name": "KRAS"}})[0][ - "@rid" - ] + reference1 = conn.query({"target": "Feature", "filters": {"name": "KRAS"}})[0]["@rid"] matches = match.match_positional_variant(conn, "p.G12D", reference1=reference1) assert matches @@ -399,12 +370,8 @@ def test_match_explicit_reference1(self, conn): EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" ) def test_match_explicit_references(self, conn): - reference1 = conn.query({"target": "Feature", "filters": {"name": "BCR"}})[0][ - "@rid" - ] - reference2 = conn.query({"target": "Feature", "filters": {"name": "ABL1"}})[0][ - "@rid" - ] + reference1 = conn.query({"target": "Feature", "filters": {"name": "BCR"}})[0]["@rid"] + reference2 = conn.query({"target": "Feature", "filters": {"name": "ABL1"}})[0]["@rid"] matches = match.match_positional_variant( conn, "fusion(e.13,e.3)", reference1=reference1, reference2=reference2 ) @@ -422,9 +389,7 @@ def test_match_explicit_references(self, conn): ["EGFR:p.E746_S752delinsI", ["EGFR mutation"], ["EGFR copy variant"]], ], ) - def test_known_variants( - self, conn, known_variant, related_variants, unrelated_variants - ): + def test_known_variants(self, conn, known_variant, related_variants, unrelated_variants): matches = match.match_positional_variant(conn, known_variant) names = {m["displayName"] for m in matches} assert matches @@ -438,10 +403,7 @@ def test_known_variants( "known_variant,related_variants", [ ["(BCR,ABL1):fusion(e.13,e.3)", ["BCR and ABL1 fusion"]], - [ - "(ATP1B1,NRG1):fusion(e.2,e.2)", - ["NRG1 fusion", "ATP1B1 and NRG1 fusion"], - ], + ["(ATP1B1,NRG1):fusion(e.2,e.2)", ["NRG1 fusion", "ATP1B1 and NRG1 fusion"]], ], ) def test_known_fusions(self, conn, known_variant, related_variants): @@ -482,8 +444,7 @@ def test_tert_promoter(self, conn): assert match.match_positional_variant(conn, "TERT:c.-124C>T") @pytest.mark.skipif( - True, - reason="GERO-303 - technically incorrect notation for GSC backwards compatibility.", + True, reason="GERO-303 - technically incorrect notation for GSC backwards compatibility." ) def test_tert_promoter_leading_one_alt_notation(self, conn): # GERO-303 - technically this format is incorrect. @@ -548,13 +509,7 @@ class TestTypeScreening: threshold = STRUCTURAL_VARIANT_SIZE_THRESHOLD unambiguous_structural = ["fusion", "translocation"] ambiguous_structural = ["duplication", "deletion", "insertion", "indel"] - non_structural = [ - "substitution", - "missense", - "nonsense", - "frameshift", - "truncating", - ] + non_structural = ["substitution", "missense", "nonsense", "frameshift", "truncating"] def test_type_screening_update(self, conn, monkeypatch): # Monkey-patching get_terms_set() @@ -563,15 +518,11 @@ def mock_get_terms_set(graphkb_conn, base_terms): called = True return set() - monkeypatch.setattr( - "pori_python.graphkb.match.get_terms_set", mock_get_terms_set - ) + monkeypatch.setattr("pori_python.graphkb.match.get_terms_set", mock_get_terms_set) # Assert get_terms_set() has been called called = False - pori_python.graphkb.match.type_screening( - conn, {"type": ""}, updateStructuralTypes=True - ) + pori_python.graphkb.match.type_screening(conn, {"type": ""}, updateStructuralTypes=True) assert called # Assert get_terms_set() has not been called (default behavior) @@ -590,10 +541,7 @@ def test_type_screening_structural(self, conn): assert match.type_screening(conn, {"type": type}) == type for type in TestTypeScreening.ambiguous_structural: # w/ reference2 - assert ( - match.type_screening(conn, {"type": type, "reference2": "#123:45"}) - == type - ) + assert match.type_screening(conn, {"type": type, "reference2": "#123:45"}) == type # w/ cytoband coordinates assert match.type_screening(conn, {"type": type, "prefix": "y"}) == type @@ -618,19 +566,14 @@ def test_type_screening_structural_untemplatedSeqSize(self, conn): # Variation length too small (< threshold) assert ( match.type_screening( - conn, - { - "type": type, - "untemplatedSeqSize": TestTypeScreening.threshold - 1, - }, + conn, {"type": type, "untemplatedSeqSize": TestTypeScreening.threshold - 1} ) == TestTypeScreening.default_type ) # Variation length big enough (>= threshold) assert ( match.type_screening( - conn, - {"type": type, "untemplatedSeqSize": TestTypeScreening.threshold}, + conn, {"type": type, "untemplatedSeqSize": TestTypeScreening.threshold} ) == type ) @@ -640,26 +583,11 @@ def test_type_screening_structural_positions(self, conn): # Variation length too small (< threshold) for opt in [ {"break2Start": {"pos": TestTypeScreening.threshold - 1}}, - { - "break2Start": {"pos": TestTypeScreening.threshold - 1}, - "prefix": "c", - }, - { - "break2Start": {"pos": TestTypeScreening.threshold - 1}, - "prefix": "g", - }, - { - "break2Start": {"pos": TestTypeScreening.threshold - 1}, - "prefix": "n", - }, - { - "break2Start": {"pos": TestTypeScreening.threshold - 1}, - "prefix": "r", - }, - { - "break2Start": {"pos": int(TestTypeScreening.threshold / 3) - 1}, - "prefix": "p", - }, + {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "c"}, + {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "g"}, + {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "n"}, + {"break2Start": {"pos": TestTypeScreening.threshold - 1}, "prefix": "r"}, + {"break2Start": {"pos": int(TestTypeScreening.threshold / 3) - 1}, "prefix": "p"}, { "break1Start": {"pos": 1 + 99}, "break2Start": {"pos": TestTypeScreening.threshold + 99 - 1}, @@ -676,10 +604,7 @@ def test_type_screening_structural_positions(self, conn): {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "g"}, {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "n"}, {"break2Start": {"pos": TestTypeScreening.threshold}, "prefix": "r"}, - { - "break2Start": {"pos": int(TestTypeScreening.threshold / 3) + 1}, - "prefix": "p", - }, + {"break2Start": {"pos": int(TestTypeScreening.threshold / 3) + 1}, "prefix": "p"}, { "break1Start": {"pos": 1 + 99}, "break2Start": {"pos": TestTypeScreening.threshold + 99}, diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py index ff0b9b6..d0d8a50 100644 --- a/tests/test_graphkb/test_statement.py +++ b/tests/test_graphkb/test_statement.py @@ -85,17 +85,9 @@ def test_custom_categories(self, graphkb_conn): assert category == "blargh" -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") class TestStatementMatch: - def test_truncating_categories( - self, conn - ): # noqa - pytest fixture, not redefinition - variant = { - "@class": "CategoryVariant", - "@rid": "#161:429", - "displayName": "RB1 truncating", - } + def test_truncating_categories(self, conn): # noqa - pytest fixture, not redefinition + variant = {"@class": "CategoryVariant", "@rid": "#161:429", "displayName": "RB1 truncating"} statements = statement.get_statements_from_variants(conn, [variant]) assert statements diff --git a/tests/test_graphkb/test_util.py b/tests/test_graphkb/test_util.py index a61bc92..e76f03e 100644 --- a/tests/test_graphkb/test_util.py +++ b/tests/test_graphkb/test_util.py @@ -97,28 +97,12 @@ class TestStripDisplayName: @pytest.mark.parametrize( "opt,stripDisplayName", [ + [{"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": True}, "ABL1:p.T315I"], + [{"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": True}, "p.T315I"], + [{"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": False}, "ABL1:p.315I"], + [{"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": False}, "p.315I"], [ - {"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": True}, - "ABL1:p.T315I", - ], - [ - {"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": True}, - "p.T315I", - ], - [ - {"displayName": "ABL1:p.T315I", "withRef": True, "withRefSeq": False}, - "ABL1:p.315I", - ], - [ - {"displayName": "ABL1:p.T315I", "withRef": False, "withRefSeq": False}, - "p.315I", - ], - [ - { - "displayName": "chr3:g.41266125C>T", - "withRef": False, - "withRefSeq": False, - }, + {"displayName": "chr3:g.41266125C>T", "withRef": False, "withRefSeq": False}, "g.41266125>T", ], [ @@ -158,16 +142,8 @@ class TestStringifyVariant: {"withRef": False, "withRefSeq": False}, "fusion(e.10,e.12)", ], - [ - "ABCA12:p.N1671Ifs*4", - {"withRef": False, "withRefSeq": False}, - "p.1671Ifs*4", - ], - [ - "x:y.p22.33copyloss", - {"withRef": False, "withRefSeq": False}, - "y.p22.33copyloss", - ], + ["ABCA12:p.N1671Ifs*4", {"withRef": False, "withRefSeq": False}, "p.1671Ifs*4"], + ["x:y.p22.33copyloss", {"withRef": False, "withRefSeq": False}, "y.p22.33copyloss"], # TODO: ['MED12:p.(?34_?68)mut', {'withRef': False, 'withRefSeq': False}, 'p.(34_68)mut'], # TODO: ['FLT3:p.(?572_?630)_(?572_?630)ins', {'withRef': False, 'withRefSeq': False}, 'p.(572_630)_(572_630)ins'], ], @@ -186,9 +162,7 @@ def test_stringifyVariant_parsed(self, conn, hgvs_string, opt, stringifiedVarian ["#158:35317", 1652734056311, "c.1>G"], ], ) - def test_stringifyVariant_positional( - self, conn, rid, createdAt, stringifiedVariant - ): + def test_stringifyVariant_positional(self, conn, rid, createdAt, stringifiedVariant): opt = {"withRef": False, "withRefSeq": False} variant = conn.get_record_by_id(rid) if variant and variant.get("createdAt", None) == createdAt: diff --git a/tests/test_ipr/test_annotate.py b/tests/test_ipr/test_annotate.py index 00a63a2..4fa2a3f 100644 --- a/tests/test_ipr/test_annotate.py +++ b/tests/test_ipr/test_annotate.py @@ -53,9 +53,7 @@ def test_annotate_nonsense_vs_missense(self, graphkb_conn): """Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation.""" disease = "cancer" for key in ("prot_only", "cds_only", "genome_only", "pref"): - matched = annotate_positional_variants( - graphkb_conn, [TP53_MUT_DICT[key]], disease - ) + matched = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[key]], disease) # nonsense - stop codon - should not match. This is missense not nonsense (#164:933). nonsense = [a for a in matched if a["kbVariant"] == "TP53 nonsense"] assert not nonsense, f"nonsense matched to {key}: {TP53_MUT_DICT[key]}" @@ -65,9 +63,7 @@ def test_annotate_nonsense_vs_missense_protein(self, graphkb_conn): """Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation.""" disease = "cancer" for key in ("prot_only", "pref"): - matched = annotate_positional_variants( - graphkb_conn, [TP53_MUT_DICT[key]], disease - ) + matched = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[key]], disease) # nonsense - stop codon - should not match. This is missense not nonsense (#164:933). nonsense = [a for a in matched if "nonsense" in a["kbVariant"]] assert not nonsense, f"nonsense matched to {key}: {TP53_MUT_DICT[key]}" @@ -77,9 +73,7 @@ def test_annotate_structural_variants_tp53(self, graphkb_conn): """Verify alternate TP53 variants match.""" disease = "cancer" ref_key = "prot_only" - pref = annotate_positional_variants( - graphkb_conn, [TP53_MUT_DICT[ref_key]], disease - ) + pref = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[ref_key]], disease) # GERO-299 - nonsense - stop codon - should not match. This is missense not nonsense (#164:933). nonsense = [a for a in pref if a["kbVariant"] == "TP53 nonsense"] assert not nonsense diff --git a/tests/test_ipr/test_connection.py b/tests/test_ipr/test_connection.py index a825a97..611afb2 100644 --- a/tests/test_ipr/test_connection.py +++ b/tests/test_ipr/test_connection.py @@ -32,9 +32,7 @@ def request(*args, **kwargs): result = conn.post_images( "report_id", files={ - "expression.correlation": os.path.join( - IMAGE_DIR, "expression_correlation.png" - ), + "expression.correlation": os.path.join(IMAGE_DIR, "expression_correlation.png"), "mixcr.circos_trb_vj_gene_usage": os.path.join( IMAGE_DIR, "mixcr.circos_trb_vj_gene_usage.png" ), @@ -55,9 +53,7 @@ def request(*args, **kwargs): result = conn.post_images( "report_id", files={ - "expression.correlation": os.path.join( - IMAGE_DIR, "expression_correlation.png" - ), + "expression.correlation": os.path.join(IMAGE_DIR, "expression_correlation.png"), "mixcr.circos_trb_vj_gene_usage": os.path.join( IMAGE_DIR, "mixcr.circos_trb_vj_gene_usage.png" ), @@ -77,8 +73,7 @@ def request(*args, **kwargs): conn = IprConnection("user", "pass") with pytest.raises(FileNotFoundError): conn.post_images( - "report_id", - files={"expression.correlation": "thing/that/does/not/exist.png"}, + "report_id", files={"expression.correlation": "thing/that/does/not/exist.png"} ) def test_failed_image_load(self): diff --git a/tests/test_ipr/test_inputs.py b/tests/test_ipr/test_inputs.py index 07c0723..8a5b7e1 100644 --- a/tests/test_ipr/test_inputs.py +++ b/tests/test_ipr/test_inputs.py @@ -31,9 +31,7 @@ def read_data_file(filename): class TestPreProcessSmallMutations: def test_load_test_file(self) -> None: records = preprocess_small_mutations( - pd.read_csv( - os.path.join(DATA_DIR, "small_mutations.tab"), sep="\t" - ).to_dict("records") + pd.read_csv(os.path.join(DATA_DIR, "small_mutations.tab"), sep="\t").to_dict("records") ) assert records assert len(records) == 2614 @@ -90,9 +88,9 @@ def test_null(self): def test_load_small_mutations_probe(self) -> None: records = preprocess_small_mutations( - pd.read_csv( - os.path.join(DATA_DIR, "small_mutations_probe.tab"), sep="\t" - ).to_dict("records") + pd.read_csv(os.path.join(DATA_DIR, "small_mutations_probe.tab"), sep="\t").to_dict( + "records" + ) ) assert records assert len(records) == 4 @@ -103,9 +101,7 @@ def test_load_small_mutations_probe(self) -> None: class TestPreProcessCopyVariants: def test_load_copy_variants(self) -> None: records = preprocess_copy_variants( - pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t").to_dict( - "records" - ) + pd.read_csv(os.path.join(DATA_DIR, "copy_variants.tab"), sep="\t").to_dict("records") ) assert records assert len(records) == 4603 @@ -136,9 +132,7 @@ def test_load_structural_variants() -> None: def test_load_expression_variants() -> None: records = preprocess_expression_variants( - pd.read_csv(os.path.join(DATA_DIR, "expression.tab"), sep="\t").to_dict( - "records" - ) + pd.read_csv(os.path.join(DATA_DIR, "expression.tab"), sep="\t").to_dict("records") ) assert records assert len(records) == 4603 @@ -296,8 +290,6 @@ def test_missing_mutation_burden(self): @pytest.mark.parametrize("example_name", ["no_variants", "sm_and_exp", "sm_only"]) def test_valid_json_inputs(example_name: str): - with open( - os.path.join(DATA_DIR, "json_examples", f"{example_name}.json"), "r" - ) as fh: + with open(os.path.join(DATA_DIR, "json_examples", f"{example_name}.json"), "r") as fh: content = json.load(fh) validate_report_content(content) diff --git a/tests/test_ipr/test_ipr.py b/tests/test_ipr/test_ipr.py index bda4d1d..132e4bc 100644 --- a/tests/test_ipr/test_ipr.py +++ b/tests/test_ipr/test_ipr.py @@ -154,11 +154,7 @@ class QueryMock: def __call__(self, *args, **kwargs): self.index += 1 - ret_val = ( - self.return_values[self.index] - if self.index < len(self.return_values) - else [] - ) + ret_val = self.return_values[self.index] if self.index < len(self.return_values) else [] return ret_val def mock_get_source(source): @@ -175,11 +171,7 @@ def base_graphkb_statement( statement = GkbStatement( # type: ignore { "conditions": [ - { - "@class": "Disease", - "@rid": disease_id, - "displayName": "disease_display_name", - }, + {"@class": "Disease", "@rid": disease_id, "displayName": "disease_display_name"}, { "@class": "CategoryVariant", "@rid": "variant_rid", @@ -302,9 +294,7 @@ def test_diagnostic(self, graphkb_conn) -> None: assert row["category"] == "diagnostic" @patch("pori_python.ipr.ipr.get_evidencelevel_mapping") - def test_unapproved_therapeutic( - self, mock_get_evidencelevel_mapping, graphkb_conn - ) -> None: + def test_unapproved_therapeutic(self, mock_get_evidencelevel_mapping, graphkb_conn) -> None: mock_get_evidencelevel_mapping.return_value = {"other": "test"} statement = base_graphkb_statement() @@ -319,12 +309,8 @@ def test_unapproved_therapeutic( assert row["category"] == "therapeutic" @patch("pori_python.ipr.ipr.get_evidencelevel_mapping") - def test_approved_therapeutic( - self, mock_get_evidencelevel_mapping, graphkb_conn - ) -> None: - mock_get_evidencelevel_mapping.return_value = { - APPROVED_EVIDENCE_RIDS[0]: "test" - } + def test_approved_therapeutic(self, mock_get_evidencelevel_mapping, graphkb_conn) -> None: + mock_get_evidencelevel_mapping.return_value = {APPROVED_EVIDENCE_RIDS[0]: "test"} statement = base_graphkb_statement() statement["relevance"]["@rid"] = "therapeutic" diff --git a/tests/test_ipr/test_main.py b/tests/test_ipr/test_main.py index a679ae3..acebe8d 100644 --- a/tests/test_ipr/test_main.py +++ b/tests/test_ipr/test_main.py @@ -37,10 +37,7 @@ def report_upload_content(tmp_path_factory) -> Dict: {"analysisRole": "expression (disease)", "name": "1"}, {"analysisRole": "expression (primary site)", "name": "2"}, {"analysisRole": "expression (biopsy site)", "name": "3"}, - { - "analysisRole": "expression (internal pancancer cohort)", - "name": "4", - }, + {"analysisRole": "expression (internal pancancer cohort)", "name": "4"}, ], "patientId": "PATIENT001", "project": "TEST", @@ -53,9 +50,9 @@ def report_upload_content(tmp_path_factory) -> Dict: "copyVariants": pd.read_csv( get_test_file("copy_variants.short.tab"), sep="\t" ).to_dict("records"), - "structuralVariants": pd.read_csv( - get_test_file("fusions.tab"), sep="\t" - ).to_dict("records"), + "structuralVariants": pd.read_csv(get_test_file("fusions.tab"), sep="\t").to_dict( + "records" + ), "kbDiseaseMatch": "colorectal cancer", } ) @@ -86,9 +83,7 @@ def report_upload_content(tmp_path_factory) -> Dict: return report_content -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") class TestCreateReport: def test_main_sections_present(self, report_upload_content: Dict) -> None: sections = set(report_upload_content.keys()) @@ -104,10 +99,7 @@ def test_main_sections_present(self, report_upload_content: Dict) -> None: assert section in sections def test_kept_low_quality_fusion(self, report_upload_content: Dict) -> None: - fusions = [ - (sv["gene1"], sv["gene2"]) - for sv in report_upload_content["structuralVariants"] - ] + fusions = [(sv["gene1"], sv["gene2"]) for sv in report_upload_content["structuralVariants"]] assert ("SARM1", "SUZ12") in fusions def test_pass_through_content_added(self, report_upload_content: Dict) -> None: @@ -133,8 +125,6 @@ def test_found_kb_statement_related_gene(self, report_upload_content: Dict) -> N genes = report_upload_content["genes"] assert any([g.get("kbStatementRelated", False) for g in genes]) - def test_found_cancer_gene_list_match_gene( - self, report_upload_content: Dict - ) -> None: + def test_found_cancer_gene_list_match_gene(self, report_upload_content: Dict) -> None: genes = report_upload_content["genes"] assert any([g.get("cancerGeneListMatch", False) for g in genes]) diff --git a/tests/test_ipr/test_probe.py b/tests/test_ipr/test_probe.py index 0d70846..57235a9 100644 --- a/tests/test_ipr/test_probe.py +++ b/tests/test_ipr/test_probe.py @@ -46,16 +46,12 @@ def probe_upload_content() -> Dict: return report_content -@pytest.mark.skipif( - EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests" -) +@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests") class TestCreateReport: def test_found_probe_small_mutations(self, probe_upload_content: Dict) -> None: assert probe_upload_content["smallMutations"] - def test_found_probe_small_mutations_match( - self, probe_upload_content: Dict - ) -> None: + def test_found_probe_small_mutations_match(self, probe_upload_content: Dict) -> None: # verify each probe had a KB match for sm_probe in probe_upload_content["smallMutations"]: match_list = [ diff --git a/tests/test_ipr/test_summary.py b/tests/test_ipr/test_summary.py index edbcd35..9c1ccaa 100644 --- a/tests/test_ipr/test_summary.py +++ b/tests/test_ipr/test_summary.py @@ -14,18 +14,8 @@ def test_prefers_non_alias(self): side_effect=[ [], [ - { - "sourceId": "1", - "alias": False, - "source": "source", - "name": "name", - }, - { - "sourceId": "2", - "alias": True, - "source": "source", - "name": "name", - }, + {"sourceId": "1", "alias": False, "source": "source", "name": "name"}, + {"sourceId": "2", "alias": True, "source": "source", "name": "name"}, ], ] ) @@ -39,18 +29,8 @@ def test_prefers_non_deprecated(self): side_effect=[ [], [ - { - "sourceId": "1", - "deprecated": True, - "source": "source", - "name": "name", - }, - { - "sourceId": "2", - "deprecated": False, - "source": "source", - "name": "name", - }, + {"sourceId": "1", "deprecated": True, "source": "source", "name": "name"}, + {"sourceId": "2", "deprecated": False, "source": "source", "name": "name"}, ], ] ) @@ -64,18 +44,8 @@ def test_prefers_lower_sort_source(self): side_effect=[ [{"@rid": "source2", "sort": 0}, {"@rid": "source1", "sort": 1}], [ - { - "sourceId": "1", - "deprecated": False, - "source": "source1", - "name": "name", - }, - { - "sourceId": "2", - "deprecated": False, - "source": "source2", - "name": "name", - }, + {"sourceId": "1", "deprecated": False, "source": "source1", "name": "name"}, + {"sourceId": "2", "deprecated": False, "source": "source2", "name": "name"}, ], ] ) @@ -129,13 +99,7 @@ def test_multiple_diseases_no_matches(self): ] subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}] sentence = substitute_sentence_template( - template, - diseases + variants, - subjects, - relevance, - [], - ["6", "7"], - disease_matches, + template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches ) assert ( sentence @@ -160,13 +124,7 @@ def test_multiple_diseases_some_matches(self): ] subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}] sentence = substitute_sentence_template( - template, - diseases + variants, - subjects, - relevance, - [], - ["6", "7"], - disease_matches, + template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches ) assert ( sentence @@ -191,13 +149,7 @@ def test_multiple_diseases_only_matches(self): ] subjects = [{"@class": "Therapy", "displayName": "some drug", "@rid": "5"}] sentence = substitute_sentence_template( - template, - diseases + variants, - subjects, - relevance, - [], - ["6", "7"], - disease_matches, + template, diseases + variants, subjects, relevance, [], ["6", "7"], disease_matches ) assert ( sentence diff --git a/tests/test_ipr/test_util.py b/tests/test_ipr/test_util.py index ffc35a7..7031881 100644 --- a/tests/test_ipr/test_util.py +++ b/tests/test_ipr/test_util.py @@ -5,12 +5,7 @@ @pytest.mark.parametrize( "input,output_keys", - [ - [{"key": 0}, ["key"]], - [{"key": None}, []], - [{"key": ""}, []], - [{"gene1": None}, ["gene1"]], - ], + [[{"key": 0}, ["key"]], [{"key": None}, []], [{"key": ""}, []], [{"gene1": None}, ["gene1"]]], ) def test_trim_empty_values(input, output_keys): modified_object = trim_empty_values(input) @@ -21,21 +16,11 @@ def test_trim_empty_values(input, output_keys): "variant,result", [ [ - { - "variantType": "exp", - "gene": "GENE", - "expressionState": "increased expression", - }, + {"variantType": "exp", "gene": "GENE", "expressionState": "increased expression"}, "increased expression", ], - [ - {"variantType": "cnv", "gene": "GENE", "cnvState": "amplification"}, - "amplification", - ], - [ - {"variantType": "other", "gene2": "GENE", "variant": "GENE:anything"}, - "anything", - ], + [{"variantType": "cnv", "gene": "GENE", "cnvState": "amplification"}, "amplification"], + [{"variantType": "other", "gene2": "GENE", "variant": "GENE:anything"}, "anything"], ], ) def test_create_variant_name_tuple(variant, result): diff --git a/tests/test_ipr/util.py b/tests/test_ipr/util.py index 25ad657..cb3dd27 100644 --- a/tests/test_ipr/util.py +++ b/tests/test_ipr/util.py @@ -5,9 +5,5 @@ def __init__(self, return_values) -> None: def __call__(self, *args, **kwargs): self.index += 1 - ret_val = ( - self.return_values[self.index] - if self.index < len(self.return_values) - else [] - ) + ret_val = self.return_values[self.index] if self.index < len(self.return_values) else [] return ret_val