style: clean up code formatting and improve consistency in string quotes

dermatologist · Jan 28, 2025 · cf86eb5 · cf86eb5
1 parent 951d206
commit cf86eb5
Show file tree

Hide file tree

Showing 7 changed files with 142 additions and 92 deletions.
diff --git a/src/fhiry/base_fhiry.py b/src/fhiry/base_fhiry.py
@@ -15,6 +15,7 @@ def default_output_processor(
 ) -> str:
     return output
 
+
 class BaseFhiry(object):
     def __init__(self, config_json=None):
         self._df = None
@@ -33,12 +34,14 @@ def __init__(self, config_json=None):
         self._delete_col_raw_coding = True
         if config_json is not None:
             try:
-                with open(config_json, 'r') as f: # config_json is a file path
+                with open(config_json, "r") as f:  # config_json is a file path
                     self.config = json.load(f)
             except:
-                self.config = json.loads(config_json)   # config_json is a json string
+                self.config = json.loads(config_json)  # config_json is a json string
         else:
-            self.config = json.loads('{ "REMOVE": ["resource.text.div"], "RENAME": { "resource.id": "id" } }')
+            self.config = json.loads(
+                '{ "REMOVE": ["resource.text.div"], "RENAME": { "resource.id": "id" } }'
+            )
 
     @property
     def df(self):
@@ -53,23 +56,22 @@ def delete_col_raw_coding(self, delete_col_raw_coding):
         self._delete_col_raw_coding = delete_col_raw_coding
 
     def read_bundle_from_bundle_dict(self, bundle_dict):
-        return pd.json_normalize(bundle_dict['entry'])
+        return pd.json_normalize(bundle_dict["entry"])
 
     def delete_unwanted_cols(self):
-        for col in self.config['REMOVE']:
+        for col in self.config["REMOVE"]:
             if col in self._df.columns:
                 del self._df[col]
 
     def rename_cols(self):
-        self._df.rename(columns=self.config['RENAME'], inplace=True)
+        self._df.rename(columns=self.config["RENAME"], inplace=True)
 
     def process_df(self):
         self.delete_unwanted_cols()
         self.convert_object_to_list()
         self.add_patient_id()
         self.rename_cols()
 
-
     def process_bundle_dict(self, bundle_dict):
         self._df = self.read_bundle_from_bundle_dict(bundle_dict)
         self.delete_unwanted_cols()
@@ -79,44 +81,54 @@ def process_bundle_dict(self, bundle_dict):
         return self._df
 
     def convert_object_to_list(self):
-        """Convert object to a list of codes
-        """
+        """Convert object to a list of codes"""
         for col in self._df.columns:
-            if 'coding' in col:
-                codes = self._df.apply(
-                    lambda x: self.process_list(x[col]), axis=1)
+            if "coding" in col:
+                codes = self._df.apply(lambda x: self.process_list(x[col]), axis=1)
                 self._df = pd.concat(
-                    [self._df, codes.to_frame(name=col+'codes')], axis=1)
+                    [self._df, codes.to_frame(name=col + "codes")], axis=1
+                )
                 if self._delete_col_raw_coding:
                     del self._df[col]
-            if 'display' in col:
-                codes = self._df.apply(
-                    lambda x: self.process_list(x[col]), axis=1)
+            if "display" in col:
+                codes = self._df.apply(lambda x: self.process_list(x[col]), axis=1)
                 self._df = pd.concat(
-                    [self._df, codes.to_frame(name=col+'display')], axis=1)
+                    [self._df, codes.to_frame(name=col + "display")], axis=1
+                )
                 del self._df[col]
 
     def add_patient_id(self):
-        """Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type
-        """
+        """Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type"""
         try:
             # PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
             newframe = self._df.copy()
-            newframe['patientId'] = self._df.apply(lambda x: x['resource.id'] if x['resource.resourceType']
-                                               == 'Patient' else self.check_subject_reference(x), axis=1)
+            newframe["patientId"] = self._df.apply(
+                lambda x: (
+                    x["resource.id"]
+                    if x["resource.resourceType"] == "Patient"
+                    else self.check_subject_reference(x)
+                ),
+                axis=1,
+            )
             self._df = newframe
         except:
             try:
                 newframe = self._df.copy()
-                newframe['patientId'] = self._df.apply(lambda x: x['id'] if x['resourceType']
-                                                    == 'Patient' else self.check_subject_reference(x), axis=1)
+                newframe["patientId"] = self._df.apply(
+                    lambda x: (
+                        x["id"]
+                        if x["resourceType"] == "Patient"
+                        else self.check_subject_reference(x)
+                    ),
+                    axis=1,
+                )
                 self._df = newframe
             except:
                 pass
 
     def check_subject_reference(self, row):
         try:
-            return row['resource.subject.reference'].replace('Patient/', '')
+            return row["resource.subject.reference"].replace("Patient/", "")
         except:
             return ""
 
@@ -137,10 +149,10 @@ def process_list(self, myList):
         myCodes = []
         if isinstance(myList, list):
             for entry in myList:
-                if 'code' in entry:
-                    myCodes.append(entry['code'])
-                elif 'display' in entry:
-                    myCodes.append(entry['display'])
+                if "code" in entry:
+                    myCodes.append(entry["code"])
+                elif "display" in entry:
+                    myCodes.append(entry["display"])
         return myCodes
 
     def llm_query(self, query, llm, embed_model=None, verbose=True):
@@ -177,12 +189,13 @@ def llm_query(self, query, llm, embed_model=None, verbose=True):
         else:
             embed_model = HuggingFaceEmbeddings(model_name=embed_model)
         service_context = ServiceContext.from_defaults(
-                llm=llm,
-                embed_model=embed_model,
-            )
+            llm=llm,
+            embed_model=embed_model,
+        )
         query_engine = PandasQueryEngine(
             df=self._df,
             service_context=service_context,
             output_processor=default_output_processor,
-            verbose=verbose)
-        return query_engine.query(query)
+            verbose=verbose,
+        )
+        return query_engine.query(query)
diff --git a/src/fhiry/bqsearch.py b/src/fhiry/bqsearch.py
@@ -5,7 +5,6 @@
  https://opensource.org/licenses/MIT
 """
 
-
 from google.cloud import bigquery
 
 from .base_fhiry import BaseFhiry
@@ -18,7 +17,7 @@ def __init__(self, config_json=None):
         self._client = bigquery.Client()
         super().__init__(config_json=config_json)
 
-    def search(self, query = None):
+    def search(self, query=None):
         if query is None:
             _query = """
                 SELECT *
@@ -27,12 +26,11 @@ def search(self, query = None):
             """
         else:
             try:
-                with open(query, 'r') as f:
+                with open(query, "r") as f:
                     _query = f.read()
             except:
                 _query = query
 
         self._df = self._client.query(_query).to_dataframe()
         super().process_df()
         return self._df
-
diff --git a/src/fhiry/fhirndjson.py b/src/fhiry/fhirndjson.py
@@ -5,13 +5,13 @@
  https://opensource.org/licenses/MIT
 """
 
-
 import pandas as pd
 import json
 import os
 from .base_fhiry import BaseFhiry
 from tqdm import tqdm
 
+
 class Fhirndjson(BaseFhiry):
     def __init__(self, config_json=None):
         self._folder = ""
@@ -29,7 +29,6 @@ def folder(self):
     def folder(self, folder):
         self._folder = folder
 
-
     def read_resource_from_line(self, line):
         return pd.json_normalize(json.loads(line))
 
@@ -52,5 +51,3 @@ def process_file(self, file):
                     df = pd.concat([df, self._df])
         self._df = df
         return self._df
-
-
diff --git a/src/fhiry/fhirsearch.py b/src/fhiry/fhirsearch.py
@@ -2,6 +2,7 @@
 import requests
 from .base_fhiry import BaseFhiry
 
+
 class Fhirsearch(BaseFhiry):
 
     def __init__(self, fhir_base_url, config_json=None):
@@ -23,15 +24,20 @@ def search(self, resource_type="Patient", search_parameters={}):
 
         headers = {"Content-Type": "application/fhir+json"}
 
-        if '_count' not in search_parameters:
-            search_parameters['_count'] = self.page_size
+        if "_count" not in search_parameters:
+            search_parameters["_count"] = self.page_size
 
-        search_url = f'{self.fhir_base_url}/{resource_type}'
-        r = requests.get(search_url, params=search_parameters, headers=headers, **self.requests_kwargs)
+        search_url = f"{self.fhir_base_url}/{resource_type}"
+        r = requests.get(
+            search_url,
+            params=search_parameters,
+            headers=headers,
+            **self.requests_kwargs,
+        )
         r.raise_for_status()
         bundle_dict = r.json()
 
-        if 'entry' in bundle_dict:
+        if "entry" in bundle_dict:
             df = super().process_bundle_dict(bundle_dict)
 
             next_page_url = get_next_page_url(bundle_dict)
@@ -51,13 +57,12 @@ def search(self, resource_type="Patient", search_parameters={}):
         return self._df
 
 
-
 def get_next_page_url(bundle_dict):
-    links = bundle_dict.get('link')
+    links = bundle_dict.get("link")
     if links:
-       for link in links:
-            relation = link.get('relation')
-            if relation == 'next':
-                return link.get('url')
+        for link in links:
+            relation = link.get("relation")
+            if relation == "next":
+                return link.get("url")
 
     return None
diff --git a/src/fhiry/fhiry.py b/src/fhiry/fhiry.py
@@ -14,6 +14,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class Fhiry(BaseFhiry):
     def __init__(self, config_json=None):
         self._filename = ""
@@ -50,10 +51,10 @@ def delete_col_raw_coding(self, delete_col_raw_coding):
         self._delete_col_raw_coding = delete_col_raw_coding
 
     def read_bundle_from_file(self, filename):
-        with open(filename, encoding='utf8', mode='r') as f:
+        with open(filename, encoding="utf8", mode="r") as f:
             json_in = f.read()
             json_in = json.loads(json_in)
-            return pd.json_normalize(json_in['entry'])
+            return pd.json_normalize(json_in["entry"])
 
     def process_source(self):
         """Read a single JSON resource or a directory full of JSON resources
@@ -64,7 +65,8 @@ def process_source(self):
             for file in tqdm(os.listdir(self._folder)):
                 if file.endswith(".json"):
                     self._df = self.read_bundle_from_file(
-                        os.path.join(self._folder, file))
+                        os.path.join(self._folder, file)
+                    )
                     self.process_df()
                     if df.empty:
                         df = self._df
@@ -84,5 +86,3 @@ def process_bundle_dict(self, bundle_dict):
         self._df = self.read_bundle_from_bundle_dict(bundle_dict)
         self.process_df()
         return self._df
-
-