Skip to content

Commit

Permalink
style: clean up code formatting and improve consistency in string quotes
Browse files Browse the repository at this point in the history
  • Loading branch information
dermatologist committed Jan 28, 2025
1 parent 951d206 commit cf86eb5
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 92 deletions.
79 changes: 46 additions & 33 deletions src/fhiry/base_fhiry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def default_output_processor(
) -> str:
return output


class BaseFhiry(object):
def __init__(self, config_json=None):
self._df = None
Expand All @@ -33,12 +34,14 @@ def __init__(self, config_json=None):
self._delete_col_raw_coding = True
if config_json is not None:
try:
with open(config_json, 'r') as f: # config_json is a file path
with open(config_json, "r") as f: # config_json is a file path
self.config = json.load(f)
except:
self.config = json.loads(config_json) # config_json is a json string
self.config = json.loads(config_json) # config_json is a json string
else:
self.config = json.loads('{ "REMOVE": ["resource.text.div"], "RENAME": { "resource.id": "id" } }')
self.config = json.loads(
'{ "REMOVE": ["resource.text.div"], "RENAME": { "resource.id": "id" } }'
)

@property
def df(self):
Expand All @@ -53,23 +56,22 @@ def delete_col_raw_coding(self, delete_col_raw_coding):
self._delete_col_raw_coding = delete_col_raw_coding

def read_bundle_from_bundle_dict(self, bundle_dict):
return pd.json_normalize(bundle_dict['entry'])
return pd.json_normalize(bundle_dict["entry"])

def delete_unwanted_cols(self):
for col in self.config['REMOVE']:
for col in self.config["REMOVE"]:
if col in self._df.columns:
del self._df[col]

def rename_cols(self):
self._df.rename(columns=self.config['RENAME'], inplace=True)
self._df.rename(columns=self.config["RENAME"], inplace=True)

def process_df(self):
self.delete_unwanted_cols()
self.convert_object_to_list()
self.add_patient_id()
self.rename_cols()


def process_bundle_dict(self, bundle_dict):
self._df = self.read_bundle_from_bundle_dict(bundle_dict)
self.delete_unwanted_cols()
Expand All @@ -79,44 +81,54 @@ def process_bundle_dict(self, bundle_dict):
return self._df

def convert_object_to_list(self):
"""Convert object to a list of codes
"""
"""Convert object to a list of codes"""
for col in self._df.columns:
if 'coding' in col:
codes = self._df.apply(
lambda x: self.process_list(x[col]), axis=1)
if "coding" in col:
codes = self._df.apply(lambda x: self.process_list(x[col]), axis=1)
self._df = pd.concat(
[self._df, codes.to_frame(name=col+'codes')], axis=1)
[self._df, codes.to_frame(name=col + "codes")], axis=1
)
if self._delete_col_raw_coding:
del self._df[col]
if 'display' in col:
codes = self._df.apply(
lambda x: self.process_list(x[col]), axis=1)
if "display" in col:
codes = self._df.apply(lambda x: self.process_list(x[col]), axis=1)
self._df = pd.concat(
[self._df, codes.to_frame(name=col+'display')], axis=1)
[self._df, codes.to_frame(name=col + "display")], axis=1
)
del self._df[col]

def add_patient_id(self):
"""Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type
"""
"""Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type"""
try:
# PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
newframe = self._df.copy()
newframe['patientId'] = self._df.apply(lambda x: x['resource.id'] if x['resource.resourceType']
== 'Patient' else self.check_subject_reference(x), axis=1)
newframe["patientId"] = self._df.apply(
lambda x: (
x["resource.id"]
if x["resource.resourceType"] == "Patient"
else self.check_subject_reference(x)
),
axis=1,
)
self._df = newframe
except:
try:
newframe = self._df.copy()
newframe['patientId'] = self._df.apply(lambda x: x['id'] if x['resourceType']
== 'Patient' else self.check_subject_reference(x), axis=1)
newframe["patientId"] = self._df.apply(
lambda x: (
x["id"]
if x["resourceType"] == "Patient"
else self.check_subject_reference(x)
),
axis=1,
)
self._df = newframe
except:
pass

def check_subject_reference(self, row):
try:
return row['resource.subject.reference'].replace('Patient/', '')
return row["resource.subject.reference"].replace("Patient/", "")
except:
return ""

Expand All @@ -137,10 +149,10 @@ def process_list(self, myList):
myCodes = []
if isinstance(myList, list):
for entry in myList:
if 'code' in entry:
myCodes.append(entry['code'])
elif 'display' in entry:
myCodes.append(entry['display'])
if "code" in entry:
myCodes.append(entry["code"])
elif "display" in entry:
myCodes.append(entry["display"])
return myCodes

def llm_query(self, query, llm, embed_model=None, verbose=True):
Expand Down Expand Up @@ -177,12 +189,13 @@ def llm_query(self, query, llm, embed_model=None, verbose=True):
else:
embed_model = HuggingFaceEmbeddings(model_name=embed_model)
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model,
)
llm=llm,
embed_model=embed_model,
)
query_engine = PandasQueryEngine(
df=self._df,
service_context=service_context,
output_processor=default_output_processor,
verbose=verbose)
return query_engine.query(query)
verbose=verbose,
)
return query_engine.query(query)
6 changes: 2 additions & 4 deletions src/fhiry/bqsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
https://opensource.org/licenses/MIT
"""


from google.cloud import bigquery

from .base_fhiry import BaseFhiry
Expand All @@ -18,7 +17,7 @@ def __init__(self, config_json=None):
self._client = bigquery.Client()
super().__init__(config_json=config_json)

def search(self, query = None):
def search(self, query=None):
if query is None:
_query = """
SELECT *
Expand All @@ -27,12 +26,11 @@ def search(self, query = None):
"""
else:
try:
with open(query, 'r') as f:
with open(query, "r") as f:
_query = f.read()
except:
_query = query

self._df = self._client.query(_query).to_dataframe()
super().process_df()
return self._df

5 changes: 1 addition & 4 deletions src/fhiry/fhirndjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
https://opensource.org/licenses/MIT
"""


import pandas as pd
import json
import os
from .base_fhiry import BaseFhiry
from tqdm import tqdm


class Fhirndjson(BaseFhiry):
def __init__(self, config_json=None):
self._folder = ""
Expand All @@ -29,7 +29,6 @@ def folder(self):
def folder(self, folder):
self._folder = folder


def read_resource_from_line(self, line):
return pd.json_normalize(json.loads(line))

Expand All @@ -52,5 +51,3 @@ def process_file(self, file):
df = pd.concat([df, self._df])
self._df = df
return self._df


27 changes: 16 additions & 11 deletions src/fhiry/fhirsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import requests
from .base_fhiry import BaseFhiry


class Fhirsearch(BaseFhiry):

def __init__(self, fhir_base_url, config_json=None):
Expand All @@ -23,15 +24,20 @@ def search(self, resource_type="Patient", search_parameters={}):

headers = {"Content-Type": "application/fhir+json"}

if '_count' not in search_parameters:
search_parameters['_count'] = self.page_size
if "_count" not in search_parameters:
search_parameters["_count"] = self.page_size

search_url = f'{self.fhir_base_url}/{resource_type}'
r = requests.get(search_url, params=search_parameters, headers=headers, **self.requests_kwargs)
search_url = f"{self.fhir_base_url}/{resource_type}"
r = requests.get(
search_url,
params=search_parameters,
headers=headers,
**self.requests_kwargs,
)
r.raise_for_status()
bundle_dict = r.json()

if 'entry' in bundle_dict:
if "entry" in bundle_dict:
df = super().process_bundle_dict(bundle_dict)

next_page_url = get_next_page_url(bundle_dict)
Expand All @@ -51,13 +57,12 @@ def search(self, resource_type="Patient", search_parameters={}):
return self._df



def get_next_page_url(bundle_dict):
links = bundle_dict.get('link')
links = bundle_dict.get("link")
if links:
for link in links:
relation = link.get('relation')
if relation == 'next':
return link.get('url')
for link in links:
relation = link.get("relation")
if relation == "next":
return link.get("url")

return None
10 changes: 5 additions & 5 deletions src/fhiry/fhiry.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

logger = logging.getLogger(__name__)


class Fhiry(BaseFhiry):
def __init__(self, config_json=None):
self._filename = ""
Expand Down Expand Up @@ -50,10 +51,10 @@ def delete_col_raw_coding(self, delete_col_raw_coding):
self._delete_col_raw_coding = delete_col_raw_coding

def read_bundle_from_file(self, filename):
with open(filename, encoding='utf8', mode='r') as f:
with open(filename, encoding="utf8", mode="r") as f:
json_in = f.read()
json_in = json.loads(json_in)
return pd.json_normalize(json_in['entry'])
return pd.json_normalize(json_in["entry"])

def process_source(self):
"""Read a single JSON resource or a directory full of JSON resources
Expand All @@ -64,7 +65,8 @@ def process_source(self):
for file in tqdm(os.listdir(self._folder)):
if file.endswith(".json"):
self._df = self.read_bundle_from_file(
os.path.join(self._folder, file))
os.path.join(self._folder, file)
)
self.process_df()
if df.empty:
df = self._df
Expand All @@ -84,5 +86,3 @@ def process_bundle_dict(self, bundle_dict):
self._df = self.read_bundle_from_bundle_dict(bundle_dict)
self.process_df()
return self._df


Loading

0 comments on commit cf86eb5

Please sign in to comment.