Skip to content

Commit

Permalink
Merge pull request #107 from FHIR/add-hla-normalization
Browse files Browse the repository at this point in the history
Add hla normalization
  • Loading branch information
mihaitodor authored Dec 24, 2024
2 parents c626ec7 + d6b6121 commit 0c86166
Show file tree
Hide file tree
Showing 8 changed files with 202 additions and 98 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ utilities/FASTA
utilities/mongo_utilities.py
/data
secrets.env
app/temp.py
30 changes: 22 additions & 8 deletions app/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import requests
from flask import abort

from utilities.pyard import redux

# MongoDB Client URIs
FHIR_genomics_data_client_uri = f"mongodb+srv://readonly:{os.getenv('MONGODB_READONLY_PASSWORD')}@cluster0.8ianr.mongodb.net/FHIRGenomicsData"
utilities_data_client_uri = f"mongodb+srv://readonly:{os.getenv('MONGODB_READONLY_PASSWORD')}@cluster0.8ianr.mongodb.net/UtilitiesData"
Expand Down Expand Up @@ -257,16 +259,22 @@ def get_feature_consequence(feature_consequence):

def get_haplotype(haplotype):
haplotype = haplotype.strip()
haplotype_return = {'isSystem': False, 'haplotype': haplotype, 'system': None}
haplotype_return = {'isSystem': False, 'haplotype': haplotype, 'system': None, 'lgxHaplotype': None}
try:
haplotype_return['lgxHaplotype'] = redux(haplotype, "lgx")
except Exception:
haplotype_return['lgxHaplotype'] = None
if "|" in haplotype:
if haplotype.count("|") == 1:
haplotype_system_url = haplotype.rsplit("|")[0]
haplotype = haplotype.rsplit("|")[1]
if "HTTP" in haplotype.split("|")[0].upper():
haplotype_system_url = haplotype.split("|")[0]
haplotype = haplotype.split("|", 1)[1]
haplotype_return['isSystem'] = True
haplotype_return['haplotype'] = haplotype
haplotype_return['system'] = haplotype_system_url
else:
abort(400, f'haplotype ({haplotype}) is not in the correct format(codesystem|code)')
try:
haplotype_return['lgxHaplotype'] = redux(haplotype, "lgx")
except Exception:
haplotype_return['lgxHaplotype'] = None

return haplotype_return

Expand Down Expand Up @@ -1597,10 +1605,16 @@ def query_PharmGKB_by_haplotypes(normalizedHaplotypeList, treatmentCodeList, que

if haplotype['isSystem']:
query['$or'].append({'genotypeCode': {"$eq": haplotype['haplotype']}})
elif haplotype["lgxHaplotype"] is not None:
query['$or'].append({'$or': [
{'genotypeCode': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}},
{'genotypeDesc': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}},
{'hlaLgx': {'$regex': ".*"+str(haplotype['lgxHaplotype']).replace('*', r'\*')+".*"}}
]})
else:
query['$or'].append({'$or': [
{'genotypeCode': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*"}},
{'genotypeDesc': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*"}}
{'genotypeCode': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}},
{'genotypeDesc': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}}
]})

query_string = [{'$match': query},
Expand Down
114 changes: 44 additions & 70 deletions app/endpoints.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from collections import OrderedDict

from flask import abort, jsonify

from app import common


Expand Down Expand Up @@ -724,9 +722,11 @@ def find_subject_specific_haplotypes(
]
else:
query['$or'] = [
{'genotypeCode': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*"}},
{'genotypeDesc': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*"}}
{'genotypeCode': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}},
{'genotypeDesc': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}}
]
if haplotype["lgxHaplotype"] is not None:
query["$or"].append({'hlaLgx': {'$regex': ".*"+str(haplotype['lgxHaplotype']).replace('*', r'\*')+".*"}})

try:
haplotype_q = common.genotypes_db.aggregate([{"$match": query}])
Expand All @@ -742,32 +742,21 @@ def find_subject_specific_haplotypes(
"valueBoolean": present
})

if present:
genotype_profiles = []
for qresult in haplotype_q:
# haplotype_profile = create_haplotype_profile(qresult, subject, "")
genotype_profile = common.create_genotype_profile(qresult, subject, [])

genotype_profiles.append(genotype_profile)

# parameter["part"].append({
# "name": "haplotype",
# "resource": haplotype_profile
# })

if genotype_profiles:
genotype_profiles = sorted(genotype_profiles, key=lambda d: d['id'])
genotype_profiles = []
for qresult in haplotype_q:
genotype_profile = common.create_genotype_profile(qresult, subject, [])
genotype_profiles.append(genotype_profile)

for genotype_profile in genotype_profiles:
parameter["part"].append({
"name": "genotype",
"resource": genotype_profile
})
if genotype_profiles:
genotype_profiles = sorted(genotype_profiles, key=lambda d: d['id'])

result["parameter"].append(parameter)
for genotype_profile in genotype_profiles:
parameter["part"].append({
"name": "genotype",
"resource": genotype_profile
})

if not result["parameter"]:
result.pop("parameter")
result["parameter"].append(parameter)

return jsonify(result)

Expand Down Expand Up @@ -898,7 +887,6 @@ def find_subject_tx_implications(
query.pop("genomicSourceClass")

query_results = common.query_PharmGKB_by_haplotypes(normalized_haplotype_list, treatment_code_list, query)
print(query_results)
for res in query_results:
for implication in res["txImplicationMatches"]:

Expand All @@ -915,13 +903,6 @@ def find_subject_tx_implications(
}
result["parameter"].append(impl_param)

# haplotype_profile = create_haplotype_profile(res, subject, res["UUID"])

# parameter["part"].append({
# "name": "haplotype",
# "resource": haplotype_profile
# })

genotype_profile = common.create_genotype_profile(res, subject, [str(res['_id'])])

geno_param = {
Expand Down Expand Up @@ -956,13 +937,6 @@ def find_subject_tx_implications(
genotype_profiles = []
for genItem in res["patientMatches"]:

# haplotype_profile = create_haplotype_profile(genItem, subject, genItem["UUID"])

# parameter["part"].append({
# "name": "haplotype",
# "resource": haplotype_profile
# })

genotype_profile = common.create_genotype_profile(genItem, subject, [str(genItem['_id'])])

genotype_profiles.append(genotype_profile)
Expand Down Expand Up @@ -1825,11 +1799,6 @@ def find_population_specific_haplotypes(
# Query
query = {}

# Genomic Source Class Query
# if genomicSourceClass:
# genomicSourceClass = genomicSourceClass.strip().lower()
# query["genomicSourceClass"] = {"$eq": genomicSourceClass}

haplotypeItem = []
normalizedHaplotypesLists = []
for normalized_haplotype_list in haplotypes:
Expand Down Expand Up @@ -1863,6 +1832,7 @@ def find_population_specific_haplotypes(

all_patients = []
for hapList in normalizedHaplotypesLists:
patients = []

for haplotype in hapList:
if haplotype['isSystem']:
Expand All @@ -1872,24 +1842,24 @@ def find_population_specific_haplotypes(
]
else:
query['$or'] = [
{'genotypeCode': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*"}},
{'genotypeDesc': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*"}}
{'genotypeCode': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}},
{'genotypeDesc': {'$regex': ".*"+str(haplotype['haplotype']).replace('*', r'\*')+".*", "$options": "i"}}
]

try:
haplotype_q = common.genotypes_db.aggregate([
{"$match": query},
{'$group': {'_id': '$patientID'}}
])
haplotype_q = list(haplotype_q)
except Exception as e:
print(f"DEBUG: Error{e} under find_population_specific_haplotypes query={query}")
haplotype_q = []

patients = []

for patientID in haplotype_q:
patients.append(patientID['_id'])
if haplotype["lgxHaplotype"] is not None:
query["$or"].append({'hlaLgx': {'$regex': ".*"+str(haplotype['lgxHaplotype']).replace('*', r'\*')+".*"}})

try:
haplotype_q = common.genotypes_db.aggregate([
{"$match": query},
{'$group': {'_id': '$patientID'}}
])
haplotype_q = list(haplotype_q)
except Exception as e:
print(f"DEBUG: Error{e} under find_population_specific_haplotypes query={query}")
haplotype_q = []

for patientID in haplotype_q:
patients.append(patientID['_id'])

all_patients.append(set(patients))

Expand Down Expand Up @@ -1932,9 +1902,11 @@ def find_population_specific_haplotypes(
]
else:
query['$or'] = [
{'genotypeCode': {'$regex': ".*"+str(hapItem['haplotype']).replace('*', r'\*')+".*"}},
{'genotypeDesc': {'$regex': ".*"+str(hapItem['haplotype']).replace('*', r'\*')+".*"}}
{'genotypeCode': {'$regex': ".*"+str(hapItem['haplotype']).replace('*', r'\*')+".*", "$options": "i"}},
{'genotypeDesc': {'$regex': ".*"+str(hapItem['haplotype']).replace('*', r'\*')+".*", "$options": "i"}}
]
if hapItem["lgxHaplotype"] is not None:
query["$or"].append({'hlaLgx': {'$regex': ".*"+str(hapItem['lgxHaplotype']).replace('*', r'\*')+".*"}})

try:
haplotype_q = common.genotypes_db.aggregate([
Expand Down Expand Up @@ -2200,9 +2172,10 @@ def find_population_dx_implications(
if conditions:
condition_code_list = list(map(common.get_condition, conditions))

normalized_haplotype_list = []
if haplotypes:
normalized_haplotype_list = list(map(common.get_haplotype, haplotypes))
# suppress this block for now, since we don't have any haplotype-related dxImplications
# normalized_haplotype_list = []
# if haplotypes:
# normalized_haplotype_list = list(map(common.get_haplotype, haplotypes))

# Query
query = {}
Expand Down Expand Up @@ -2263,7 +2236,8 @@ def find_population_dx_implications(
if genomicSourceClass:
query.pop("genomicSourceClass")

query_results = common.query_PharmGKB_by_haplotypes(normalized_haplotype_list, [], query, True)
# query_results = common.query_PharmGKB_by_haplotypes(normalized_haplotype_list, [], query, True)
query_results = [] # PharmGKB doesn't have dxImplications. This code block will need revision once we have a source of haplotype-based dxImplications.

parameter = OrderedDict()
parameter["name"] = "implications"
Expand Down
32 changes: 13 additions & 19 deletions app/utilities_endpoints.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
import os

import json
from collections import OrderedDict
from os.path import isdir
import pyard

import requests
from flask import abort, jsonify
from app import common
import json

# Make sure the pyard folder exists locally
if not isdir('./data/pyard'):
exit("Missing pyard folder. Please run fetch_utilities_data.sh!")

pyard_database_version = os.getenv('PYARD_DATABASE_VERSION', '3580')
ard = pyard.init(data_dir='./data/pyard', cache_size=1, imgt_version=pyard_database_version)
from app import common
from utilities.pyard import redux


def fetch_concept_map(mapID):
Expand Down Expand Up @@ -366,14 +360,14 @@ def normalize_hla(allele):
try:
return {
allele: {
"G": ard.redux(allele, "G"),
"P": ard.redux(allele, "P"),
"lg": ard.redux(allele, "lg"),
"lgx": ard.redux(allele, "lgx"),
"W": ard.redux(allele, "W"),
"exon": ard.redux(allele, "exon"),
"U2": ard.redux(allele, "U2"),
"S": ard.redux(allele, "S")
"G": redux(allele, "G"),
"P": redux(allele, "P"),
"lg": redux(allele, "lg"),
"lgx": redux(allele, "lgx"),
"W": redux(allele, "W"),
"exon": redux(allele, "exon"),
"U2": redux(allele, "U2"),
"S": redux(allele, "S")
}
}
except Exception as err:
Expand Down
17 changes: 16 additions & 1 deletion tests/expected_outputs/find_subject_specific_haplotypes/2.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
{
"resourceType": "Parameters"
"resourceType": "Parameters",
"parameter": [
{
"name": "haplotypes",
"part": [
{
"name": "haplotypeItem",
"valueString": "HLA-B*27"
},
{
"name": "presence",
"valueBoolean": false
}
]
}
]
}
Loading

0 comments on commit 0c86166

Please sign in to comment.