From d2d0145c35b4bdf6d14aeacdbf5083c16bab61c6 Mon Sep 17 00:00:00 2001 From: rhdolin Date: Wed, 11 Sep 2024 14:05:44 -0700 Subject: [PATCH 1/3] add find-population-molecular-consequence API --- app/api_spec.yml | 57 +++++++++++++++ app/common.py | 9 ++- app/endpoints.py | 72 +++++++++++++++++++ .../1.json | 26 +++++++ .../test_population_phenotype_operations.py | 13 ++++ tests/utilities.py | 7 ++ 6 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 tests/expected_outputs/find_population_molecular_consequences/1.json diff --git a/app/api_spec.yml b/app/api_spec.yml index 8e3f8ca2..48576307 100644 --- a/app/api_spec.yml +++ b/app/api_spec.yml @@ -752,6 +752,8 @@ paths: type: array items: type: string + example: + - "http://sequenceontology.org|SO:0001629" style: "form" explode: false - name: testIdentifiers @@ -1181,6 +1183,61 @@ paths: default: false description: Include list of matching patients if set to true. + /population-operations/phenotype-operations/$find-population-molecular-consequences: + get: + description: |- + Retrieve count or list of patients having molecular consequences. More specifically, this operation retrieves the count +/- list of patients that have molecular consequences involving specific featureConsequences, derived from specific variants. + + A patient meets numerator criteria if they have at least one molecular consequence matching the query parameters. + summary: "Find Population Molecular Consequences" + operationId: "app.endpoints.find_population_molecular_consequences" + tags: + - "Population Phenotype Operations" + responses: + '200': + description: "Returns a FHIR Parameters resource containing a count +/- list of patients having at least one matching molecular consequence." + content: + application/json: + schema: + type: object + parameters: + - name: variants + in: query + description: List of variants from which consequences are derived. Must be in HGVS or SPDI format. + schema: + type: array + items: + type: string + example: + - "NC_000001.10:152785039:G:T" + style: "form" + explode: false + - name: featureConsequences + in: query + description: List of consequences sought. Must be in token or codesystem|code format. + schema: + type: array + items: + type: string + example: + - "http://sequenceontology.org|SO:0001583" + style: "form" + explode: false + - name: genomicSourceClass + in: query + schema: + type: string + enum: + - "germline" + - "somatic" + description: Enables an App to limit results to those that are 'germline' or 'somatic'. Default is to include variants irrespective of genomic source class. + - name: includePatientList + in: query + schema: + type: boolean + default: false + description: Include list of matching patients if set to true. + /utilities/get-feature-coordinates: get: description: 'This utility returns genomic feature coordinates and other annotations. All data are from NCBI Human Genome Resources. For chromosomes, build 37 and build 38 reference sequences are returned. For genes, genomic coordinates are returned, along with a list of transcripts. MANE transcript is flagged. For transcripts, genomic coordinates are returned, along with the gene name and composite exons, along with exon coordinates. For proteins, the corresponding transcript is returned.' diff --git a/app/common.py b/app/common.py index 70303e92..f52c6058 100644 --- a/app/common.py +++ b/app/common.py @@ -1859,7 +1859,7 @@ def query_transcript(transcript): return results -def query_molecular_consequences_by_variants(normalized_variant_list, feature_consequence_list, query): +def query_molecular_consequences_by_variants(normalized_variant_list, feature_consequence_list, query, population=False): variant_list = [] for item in normalized_variant_list: if "GRCh37" in item: @@ -1883,12 +1883,17 @@ def query_molecular_consequences_by_variants(normalized_variant_list, feature_co ]}) pipeline_part[-1]['$match']['$or'] = or_query - query['SPDI'] = {'$in': variant_list} + if normalized_variant_list != []: + query['SPDI'] = {'$in': variant_list} query_string = [{'$match': query}, {'$lookup': {'from': 'MolecConseq', 'let': {'myvariant_id': '$_id'}, 'pipeline': pipeline_part, 'as': 'molecularConsequenceMatches'}}, {'$match': {'molecularConsequenceMatches': {'$exists': True, '$not': {'$size': 0}}}}] + + if population: + query_string.append({'$group': {'_id': '$patientID'}}) + try: results = variants_db.aggregate(query_string) results = list(results) diff --git a/app/endpoints.py b/app/endpoints.py index fc5ae849..8386c70a 100644 --- a/app/endpoints.py +++ b/app/endpoints.py @@ -2322,3 +2322,75 @@ def find_population_dx_implications( result.pop("parameter") return jsonify(result) + + +def find_population_molecular_consequences( + variants=None, featureConsequences=None, genomicSourceClass=None, + includePatientList=None): + + # Parameters + if not variants and not featureConsequences: + abort(400, "You must supply either 'variants' or 'featureConsequences'.") + + normalized_feature_consequence_list = [] + if featureConsequences: + normalized_feature_consequence_list = list(map(common.get_feature_consequence, featureConsequences)) + + # Query + query = {} + + # Genomic Source Class Query + if genomicSourceClass: + genomicSourceClass = genomicSourceClass.strip().lower() + query["genomicSourceClass"] = {"$eq": genomicSourceClass} + + normalized_variants = [] + if variants: + normalized_variants = list(map(common.get_variant, variants)) + + # Result Object + result = OrderedDict() + result["resourceType"] = "Parameters" + result["parameter"] = [] + + if normalized_variants: + genomics_build_presence = common.get_genomics_build_presence(query) + + for normalizedVariant in normalized_variants: + if not normalizedVariant["GRCh37"] and genomics_build_presence["GRCh37"]: + abort(422, f'Failed LiftOver. Variant: {normalizedVariant["variant"]}') + elif not normalizedVariant["GRCh38"] and genomics_build_presence["GRCh38"]: + abort(422, f'Failed LiftOver. Variant: {normalizedVariant["variant"]}') + + query_results = common.query_molecular_consequences_by_variants(normalized_variants, normalized_feature_consequence_list, query, True) + + parameter = OrderedDict() + parameter["name"] = "consequences" + parameter["part"] = [] + + parameter["part"].append({ + "name": "numerator", + "valueQuantity": {'value': len(query_results)} + }) + + parameter["part"].append({ + "name": "denominator", + "valueQuantity": {"value": common.patients_db.count_documents({})} + }) + + if includePatientList: + patients = [] + for patientID in query_results: + patients.append(f'{patientID["_id"]}') + + for patientID in sorted(patients): + parameter["part"].append({ + "name": "subject", + "valueString": f'{patientID}' + }) + + result["parameter"].append(parameter) + + if not result["parameter"]: + result.pop("parameter") + return jsonify(result) diff --git a/tests/expected_outputs/find_population_molecular_consequences/1.json b/tests/expected_outputs/find_population_molecular_consequences/1.json new file mode 100644 index 00000000..3aa654cc --- /dev/null +++ b/tests/expected_outputs/find_population_molecular_consequences/1.json @@ -0,0 +1,26 @@ +{ + "resourceType": "Parameters", + "parameter": [ + { + "name": "consequences", + "part": [ + { + "name": "numerator", + "valueQuantity": { + "value": 1 + } + }, + { + "name": "denominator", + "valueQuantity": { + "value": 1116 + } + }, + { + "name": "subject", + "valueString": "TCGA-DD-A1EH" + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/integration_tests/test_population_phenotype_operations.py b/tests/integration_tests/test_population_phenotype_operations.py index 41431e13..5de70447 100644 --- a/tests/integration_tests/test_population_phenotype_operations.py +++ b/tests/integration_tests/test_population_phenotype_operations.py @@ -74,3 +74,16 @@ def test_find_population_dx_implications_4(client): response = client.get(url) tu.compare_actual_and_expected_output(f'{tu.FIND_POPULATION_DX_IMPLICATIONS_OUTPUT_DIR}4.json', response.json) + + +""" +Find Population Molecular Consequences Tests +------------------------------------- +""" + + +def test_find_population_molecular_consequences_1(client): + url = tu.find_population_molecular_consequences_query('variants=NC_000001.10:152785039:G:T&featureConsequences=http://sequenceontology.org|SO:0001583&includePatientList=true') + response = client.get(url) + + tu.compare_actual_and_expected_output(f'{tu.FIND_POPULATION_MOLECULAR_CONSEQUENCES_OUTPUT_DIR}1.json', response.json) diff --git a/tests/utilities.py b/tests/utilities.py index 3ce0e003..247124db 100644 --- a/tests/utilities.py +++ b/tests/utilities.py @@ -51,6 +51,9 @@ FIND_POPULATION_DX_IMPLICATIONS_URL = "/population-operations/phenotype-operations/$find-population-dx-implications" FIND_POPULATION_DX_IMPLICATIONS_OUTPUT_DIR = "tests/expected_outputs/find_population_dx_implications/" +FIND_POPULATION_MOLECULAR_CONSEQUENCES_URL = "/population-operations/phenotype-operations/$find-population-molecular-consequences" +FIND_POPULATION_MOLECULAR_CONSEQUENCES_OUTPUT_DIR = "tests/expected_outputs/find_population_molecular_consequences/" + GET_FEATURE_COORDINATES_URL = "/utilities/get-feature-coordinates" GET_FEATURE_COORDINATES_OUTPUT_DIR = "tests/expected_outputs/get_feature_coordinates/" @@ -122,6 +125,10 @@ def find_population_dx_implications_query(query): return f"{FIND_POPULATION_DX_IMPLICATIONS_URL}?{query}" +def find_population_molecular_consequences_query(query): + return f"{FIND_POPULATION_MOLECULAR_CONSEQUENCES_URL}?{query}" + + def get_feature_coordinates_query(query): return f"{GET_FEATURE_COORDINATES_URL}?{query}" From 8b0adba32de11bedb286d69d2c00a1b39ade1c88 Mon Sep 17 00:00:00 2001 From: rhdolin Date: Wed, 11 Sep 2024 14:25:05 -0700 Subject: [PATCH 2/3] Update FHIRGenomicsOperations.postman_collection.json --- ...GenomicsOperations.postman_collection.json | 148 +++++++++++++++--- 1 file changed, 129 insertions(+), 19 deletions(-) diff --git a/FHIRGenomicsOperations.postman_collection.json b/FHIRGenomicsOperations.postman_collection.json index 85367f20..3b9612ed 100644 --- a/FHIRGenomicsOperations.postman_collection.json +++ b/FHIRGenomicsOperations.postman_collection.json @@ -16,7 +16,7 @@ "method": "GET", "header": [], "url": { - "raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/genotype-operations/$find-subject-variants?subject=NB6TK329&ranges=NC_000002.12:178525988-178807423&includeVariants=true&includePhasing=true", + "raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/genotype-operations/$find-subject-variants?subject=NB6TK329&testIdentifiers=TL000016660-ASM&includeVariants=true&ranges=NC_000002.12:178525988-178807423,NC_000002.12:178525988-178807423", "protocol": "https", "host": [ "fhir-gen-ops", @@ -33,14 +33,9 @@ "key": "subject", "value": "NB6TK329" }, - { - "key": "ranges", - "value": "NC_000002.12:178525988-178807423" - }, { "key": "testIdentifiers", - "value": "TL000016660-ASM", - "disabled": true + "value": "TL000016660-ASM" }, { "key": "specimenIdentifiers", @@ -68,7 +63,12 @@ }, { "key": "includePhasing", - "value": "true" + "value": "true", + "disabled": true + }, + { + "key": "ranges", + "value": "NC_000002.12:178525988-178807423,NC_000002.12:178525988-178807423" } ] } @@ -1514,6 +1514,70 @@ }, "response": [] }, + { + "name": "find-subject-dx-implications AWS", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "https://5ipkr7tx39.execute-api.us-east-1.amazonaws.com/dev/subject-operations/genotype-operations/$find-subject-dx-implications?subject=HCC1143&ranges=NC_000019.10:8892900-8892999&testIdentifiers=e64f5164-a2d3-47f4-97fb-495e84f394c1", + "protocol": "https", + "host": [ + "5ipkr7tx39", + "execute-api", + "us-east-1", + "amazonaws", + "com" + ], + "path": [ + "dev", + "subject-operations", + "genotype-operations", + "$find-subject-dx-implications" + ], + "query": [ + { + "key": "testIdentifiers", + "value": "TL000016660-ASM", + "disabled": true + }, + { + "key": "specimenIdentifiers", + "value": "GS000016660-ASM", + "disabled": true + }, + { + "key": "genomicSourceClass", + "value": "germline", + "disabled": true + }, + { + "key": "testDateRange", + "value": "ge2014-01-01", + "disabled": true + }, + { + "key": "variants", + "value": "NC_000001.10:161333381:C:T", + "disabled": true + }, + { + "key": "subject", + "value": "HCC1143" + }, + { + "key": "ranges", + "value": "NC_000019.10:8892900-8892999" + }, + { + "key": "testIdentifiers", + "value": "e64f5164-a2d3-47f4-97fb-495e84f394c1" + } + ] + } + }, + "response": [] + }, { "name": "Dx implications for LDLR", "request": { @@ -1763,7 +1827,7 @@ "method": "GET", "header": [], "url": { - "raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/phenotype-operations/$find-subject-molecular-consequences?subject=HG00403&ranges=NC_000001.10:86852500-86852800&featureConsequences=http://sequenceontology.org|SO:0001575", + "raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/phenotype-operations/$find-subject-molecular-consequences?subject=NB6TK329&variants=NC_000006.12:7576293:G:A", "protocol": "https", "host": [ "fhir-gen-ops", @@ -1778,20 +1842,21 @@ "query": [ { "key": "subject", - "value": "HG00403" + "value": "NB6TK329" }, { "key": "variants", - "value": "NM_031475.3:c.2217C>T", - "disabled": true + "value": "NC_000006.12:7576293:G:A" }, { "key": "ranges", - "value": "NC_000001.10:86852500-86852800" + "value": "NC_000001.10:86852500-86852800", + "disabled": true }, { "key": "featureConsequences", - "value": "http://sequenceontology.org|SO:0001575" + "value": "http://sequenceontology.org|SO:0001575", + "disabled": true }, { "key": "variants", @@ -2735,7 +2800,7 @@ "method": "GET", "header": [], "url": { - "raw": "https://fhir-gen-ops.herokuapp.com/population-operations/phenotype-operations/$find-population-dx-implications?conditions=https://www.ncbi.nlm.nih.gov/medgen|C1708353", + "raw": "https://fhir-gen-ops.herokuapp.com/population-operations/phenotype-operations/$find-population-dx-implications?variants=NC_000019.10:38499669:C:T&includePatientList=true", "protocol": "https", "host": [ "fhir-gen-ops", @@ -2749,12 +2814,16 @@ ], "query": [ { - "key": "conditions", - "value": "https://www.ncbi.nlm.nih.gov/medgen|C1708353" + "key": "variants", + "value": "NC_000019.10:38499669:C:T" }, { - "key": "variants", - "value": "NC_000019.10:38499669:C:T", + "key": "includePatientList", + "value": "true" + }, + { + "key": "conditions", + "value": "https://www.ncbi.nlm.nih.gov/medgen|C1708353", "disabled": true } ] @@ -2828,6 +2897,47 @@ } ] }, + { + "name": "find-population-molecular-consequences", + "item": [ + { + "name": "find-population-molecular-consequences", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "https://fhir-gen-ops.herokuapp.com/population-operations/phenotype-operations/$find-population-molecular-consequences?variants=NC_000001.10:152785039:G:T&featureConsequences=http://sequenceontology.org|SO:0001583&includePatientList=true", + "protocol": "https", + "host": [ + "fhir-gen-ops", + "herokuapp", + "com" + ], + "path": [ + "population-operations", + "phenotype-operations", + "$find-population-molecular-consequences" + ], + "query": [ + { + "key": "variants", + "value": "NC_000001.10:152785039:G:T" + }, + { + "key": "featureConsequences", + "value": "http://sequenceontology.org|SO:0001583" + }, + { + "key": "includePatientList", + "value": "true" + } + ] + } + }, + "response": [] + } + ] + }, { "name": "find-study-metadata", "item": [ From e065e9cd92e96a8d1cecd6939f8631c710e902af Mon Sep 17 00:00:00 2001 From: rhdolin Date: Wed, 11 Sep 2024 15:00:27 -0700 Subject: [PATCH 3/3] Update FHIRGenomicsOperations.postman_collection.json --- ...GenomicsOperations.postman_collection.json | 64 ------------------- 1 file changed, 64 deletions(-) diff --git a/FHIRGenomicsOperations.postman_collection.json b/FHIRGenomicsOperations.postman_collection.json index 3b9612ed..72b7c7df 100644 --- a/FHIRGenomicsOperations.postman_collection.json +++ b/FHIRGenomicsOperations.postman_collection.json @@ -1514,70 +1514,6 @@ }, "response": [] }, - { - "name": "find-subject-dx-implications AWS", - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "https://5ipkr7tx39.execute-api.us-east-1.amazonaws.com/dev/subject-operations/genotype-operations/$find-subject-dx-implications?subject=HCC1143&ranges=NC_000019.10:8892900-8892999&testIdentifiers=e64f5164-a2d3-47f4-97fb-495e84f394c1", - "protocol": "https", - "host": [ - "5ipkr7tx39", - "execute-api", - "us-east-1", - "amazonaws", - "com" - ], - "path": [ - "dev", - "subject-operations", - "genotype-operations", - "$find-subject-dx-implications" - ], - "query": [ - { - "key": "testIdentifiers", - "value": "TL000016660-ASM", - "disabled": true - }, - { - "key": "specimenIdentifiers", - "value": "GS000016660-ASM", - "disabled": true - }, - { - "key": "genomicSourceClass", - "value": "germline", - "disabled": true - }, - { - "key": "testDateRange", - "value": "ge2014-01-01", - "disabled": true - }, - { - "key": "variants", - "value": "NC_000001.10:161333381:C:T", - "disabled": true - }, - { - "key": "subject", - "value": "HCC1143" - }, - { - "key": "ranges", - "value": "NC_000019.10:8892900-8892999" - }, - { - "key": "testIdentifiers", - "value": "e64f5164-a2d3-47f4-97fb-495e84f394c1" - } - ] - } - }, - "response": [] - }, { "name": "Dx implications for LDLR", "request": {