Merge branch 'develop' into bugfix/KBDEV-1236-matching-to-exon20inser…

…tion
bcgsc · Sep 25, 2024 · d4781cd · d4781cd
2 parents bf4679c + 303c611
commit d4781cd
Show file tree

Hide file tree

Showing 6 changed files with 83 additions and 13 deletions.
diff --git a/pori_python/graphkb/genes.py b/pori_python/graphkb/genes.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import Any, Dict, List, Sequence, Set, Tuple, cast
+from typing_extensions import deprecated
 
 from pori_python.types import IprGene, Ontology, Record, Statement, Variant
 
@@ -256,6 +257,7 @@ def get_preferred_gene_name(
     return gene_names[0]
 
 
+@deprecated("Use get_gene_linked_cancer_predisposition_info instead")
 def get_cancer_predisposition_info(
     conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
 ) -> Tuple[List[str], Dict[str, str]]:
@@ -360,6 +362,7 @@ def get_gene_linked_cancer_predisposition_info(
     return sorted(genes), variants
 
 
+@deprecated("Use get_gene_linked_pharmacogenomic_info instead")
 def get_pharmacogenomic_info(
     conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
 ) -> Tuple[List[str], Dict[str, str]]:

diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py
@@ -237,8 +237,8 @@ def ipr_report(
     always_write_output_json: bool = False,
     ipr_upload: bool = True,
     interactive: bool = False,
-    graphkb_username: str = None,
-    graphkb_password: str = None,
+    graphkb_username: str = "",
+    graphkb_password: str = "",
     graphkb_url: str = "",
     generate_therapeutics: bool = False,
     generate_comments: bool = True,

diff --git a/setup.cfg b/setup.cfg
@@ -18,7 +18,7 @@ known_standard_library = requests
 
 [metadata]
 name = pori_python
-version = 0.0.1
+version = 0.1.2
 url = https://github.com/bcgsc/pori_python
 author_email = [email protected]
 maintainer_email = [email protected]
@@ -29,7 +29,7 @@ long_description_content_type = text/markdown
 packages = find:
 python_requires = >=3.9
 dependency_links = []
-include_package_data = True
+include_package_data = true
 install_requires =
     biopython
     jsonschema
@@ -60,7 +60,7 @@ dev =
     mypy
 
 [options.package_data]
-pori_python.ipr = pori/ipr/content.spec.json, py.typed
+pori_python.ipr = content.spec.json, py.typed
 pori_python.graphkb = py.typed
 
 [options.entry_points]

diff --git a/tests/test_graphkb/test_match.py b/tests/test_graphkb/test_match.py
@@ -500,6 +500,12 @@ def test_genomic_coordinates(self, conn):
     def test_tert_promoter(self, conn):
         assert match.match_positional_variant(conn, "TERT:c.-124C>T")
 
+    def test_wildtype_match_error(self, conn):
+        for gkb_match in match.match_positional_variant(conn, "TP53:p.E285K"):
+            assert (
+                "wildtype" not in gkb_match["displayName"]
+            ), f"TP53:p.E285K should not match {gkb_match['displayName']}"
+
     @pytest.mark.skipif(
         True, reason="GERO-303 - technically incorrect notation for GSC backwards compatibility."
     )

diff --git a/tests/test_ipr/test_annotate.py b/tests/test_ipr/test_annotate.py
@@ -41,6 +41,39 @@
     ),
 }
 
+KBDEV1231_TP53_ERR_MATCH_WT = {
+    "altSeq": "",
+    "chromosome": "chr17",
+    "comments": "",
+    "endPosition": "",
+    "gene": "TP53",
+    "germline": False,
+    "hgvsCds": "ENST00000269305:c.853G>A",
+    "hgvsGenomic": "chr17:g.7673767C>T",
+    "hgvsProtein": "TP53:p.E285K",
+    "key": "c23a7b0387335e7a5ed6c1081a1822ae",
+    "library": "F145233;F145265",
+    "ncbiBuild": "GRCh38",
+    "normalAltCount": "",
+    "normalDepth": "",
+    "normalRefCount": "",
+    "proteinChange": "p.E285K",
+    "refSeq": "",
+    "rnaAltCount": 311,
+    "rnaDepth": 370,
+    "rnaRefCount": 59,
+    "startPosition": "",
+    "transcript": "ENST00000269305",
+    "tumourAltCopies": "",
+    "tumourAltCount": 64,
+    "tumourDepth": 100,
+    "tumourRefCopies": "",
+    "tumourRefCount": 36,
+    "variant": "TP53:p.E285K",
+    "variantType": "mut",
+    "zygosity": "",
+}
+
 
 @pytest.fixture(scope="module")
 def graphkb_conn():
@@ -106,3 +139,11 @@ def test_annotate_structural_variants_tp53(self, graphkb_conn):
             missing = pref_vars.difference(alt_vars).difference(known_issues)
             print(alt_vars)
             assert not missing, f"{key} missing{missing}: {diff}"
+
+    def test_wt_not_matched(self, graphkb_conn):
+        """Verify wildtypes are not matched to mutations."""
+        disease = "cancer"
+        matches = annotate_positional_variants(graphkb_conn, [KBDEV1231_TP53_ERR_MATCH_WT], disease)
+        # KBDEV-1231 - wildtype - should not match.  A mutation is not wildtype
+        wt_matches = sorted(set([m["kbVariant"] for m in matches if "wildtype" in m["kbVariant"]]))
+        assert not wt_matches, f"Mutation 'TP53:p.E285K' should NOT match {wt_matches}"
diff --git a/tests/test_ipr/test_upload.py b/tests/test_ipr/test_upload.py
@@ -4,7 +4,7 @@
 import pytest
 import sys
 import uuid
-from typing import Dict
+from typing import Generator
 from unittest.mock import patch
 
 from pori_python.ipr.connection import IprConnection
@@ -31,7 +31,7 @@ def get_test_file(name: str) -> str:
 
 
 @pytest.fixture(scope="module")
-def loaded_reports(tmp_path_factory) -> Dict:
+def loaded_reports(tmp_path_factory) -> Generator:
     json_file = tmp_path_factory.mktemp("inputs") / "content.json"
     async_json_file = tmp_path_factory.mktemp("inputs") / "async_content.json"
     patient_id = f"TEST_{str(uuid.uuid4())}"
@@ -41,7 +41,10 @@ def loaded_reports(tmp_path_factory) -> Dict:
             {"analysisRole": "expression (disease)", "name": "1"},
             {"analysisRole": "expression (primary site)", "name": "2"},
             {"analysisRole": "expression (biopsy site)", "name": "3"},
-            {"analysisRole": "expression (internal pancancer cohort)", "name": "4"},
+            {
+                "analysisRole": "expression (internal pancancer cohort)",
+                "name": "4",
+            },
         ],
         "patientId": patient_id,
         "project": "TEST",
@@ -80,10 +83,20 @@ def loaded_reports(tmp_path_factory) -> Dict:
         ),
         "kbDiseaseMatch": "colorectal cancer",
     }
-    json_file.write_text(json.dumps(json_contents, allow_nan=False))
+    json_file.write_text(
+        json.dumps(
+            json_contents,
+            allow_nan=False,
+        )
+    )
 
     json_contents["patientId"] = async_patient_id
-    async_json_file.write_text(json.dumps(json_contents, allow_nan=False))
+    async_json_file.write_text(
+        json.dumps(
+            json_contents,
+            allow_nan=False,
+        )
+    )
 
     argslist = [
         "ipr",
@@ -122,12 +135,11 @@ def loaded_reports(tmp_path_factory) -> Dict:
     loaded_report = ipr_conn.get(uri=f"reports?searchText={patient_id}")
     async_loaded_report = ipr_conn.get(uri=f"reports?searchText={async_patient_id}")
 
-    loaded_reports = {
+    loaded_reports_result = {
         "sync": (patient_id, loaded_report),
         "async": (async_patient_id, async_loaded_report),
     }
-    yield loaded_reports
-
+    yield loaded_reports_result
     ipr_conn.delete(uri=f"reports/{loaded_report['reports'][0]['ident']}")
     ipr_conn.delete(uri=f"reports/{async_loaded_report['reports'][0]['ident']}")
 
@@ -154,6 +166,14 @@ def compare_sections(section1, section2):
                         item[subitem].pop(key, None)
                     if isinstance(item[subitem], list):
                         [subsubitem.pop(key, None) for subsubitem in item[subitem]]
+                        if item[subitem] != []:
+                            item[subitem] = sorted(item[subitem], key=lambda d: str(d))
+    if isinstance(section1, list):
+        section2_items = [str(item) for item in section2]
+        section2_items.sort()
+        section1_items = [str(item) for item in section1]
+        section1_items.sort()
+        return str(section2_items) == str(section1_items)
     return str(section1) == str(section2)