Merge branch 'release/3.1.0'

dermatologist · Nov 14, 2023 · 7de89d4 · 7de89d4
2 parents 9355247 + a57876a
commit 7de89d4
Show file tree

Hide file tree

Showing 15 changed files with 101 additions and 75 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -1,5 +1,5 @@
 # [Choice] Python version: 3, 3.9, 3.8, 3.7, 3.6
-ARG VARIANT=3.8
+ARG VARIANT="3.10"
 FROM mcr.microsoft.com/vscode/devcontainers/python:${VARIANT}
 
 # [Option] Install Node.js

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -5,7 +5,7 @@
         "context": "..",
         "args": {
             // Update 'VARIANT' to pick a Python version: 3, 3.6, 3.7, 3.8, 3.9
-            "VARIANT": "3.8",
+            "VARIANT": "3.10",
             // Options
             "INSTALL_NODE": "false",
             "NODE_VERSION": "lts/*"

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -12,9 +12,9 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4.1.0
       with:
-        python-version: '3.8'
+        python-version: '3.10'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -12,9 +12,9 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4.1.0
       with:
-        python-version: '3.8'
+        python-version: '3.10'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -16,12 +16,12 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: [3.8]
+        python-version: [3.10.13]
 
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4.1.0
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies

diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
@@ -1,4 +1,4 @@
-name: Python Test
+name: Tox Test
 
 on:
   push:
@@ -12,12 +12,12 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: [3.8]
+        python-version: [3.10.13]
 
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4.1.0
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -16,7 +16,12 @@ sphinx:
 formats:
   - pdf
 
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
 python:
-  version: 3.8
   install:
     - requirements: docs/requirements.txt
+    - {path: ., method: pip}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,20 @@
 
 ## [Unreleased](https://github.com/dermatologist/fhiry/tree/HEAD)
 
-[Full Changelog](https://github.com/dermatologist/fhiry/compare/2.1.0...HEAD)
+[Full Changelog](https://github.com/dermatologist/fhiry/compare/3.0.0...HEAD)
+
+**Implemented enhancements:**
+
+- Flattening FHIR resources / bundle for LLMs [\#144](https://github.com/dermatologist/fhiry/issues/144)
+
+**Closed issues:**
+
+- Performance warning: DataFrame is highly fragmented [\#135](https://github.com/dermatologist/fhiry/issues/135)
+- 'charmap' codec can't decode byte 0x81 in position 1603 [\#133](https://github.com/dermatologist/fhiry/issues/133)
+
+## [3.0.0](https://github.com/dermatologist/fhiry/tree/3.0.0) (2023-03-09)
+
+[Full Changelog](https://github.com/dermatologist/fhiry/compare/2.1.0...3.0.0)
 
 **Implemented enhancements:**
 

diff --git a/README.md b/README.md
@@ -5,14 +5,16 @@ Virtual flattened view of *FHIR Bundle / ndjson / FHIR server / BigQuery!*
 [![PyPI download total](https://img.shields.io/pypi/dm/fhiry.svg)](https://pypi.python.org/pypi/fhiry/)
 ![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/dermatologist/fhiry)
 
-[Bulk data export using FHIR](https://hl7.org/fhir/uv/bulkdata/export/index.html) is needed to export a cohort for data analytics or machine learning.
-:fire: **Fhiry** is a [python](https://www.python.org/) package to facilitate this by converting a folder of [FHIR bundles](https://www.hl7.org/fhir/bundle.html)/ndjson into a [pandas](https://pandas.pydata.org/docs/user_guide/index.html) data frame for analysis and importing
-into ML packages such as Tensorflow and PyTorch. Fhiry also supports FHIR server search and FHIR tables on BigQuery.
+:fire: **FHIRy** is a [python](https://www.python.org/) package to facilitate health data analytics and machine learning by converting a folder of [FHIR bundles](https://www.hl7.org/fhir/bundle.html)/ndjson from [bulk data export](https://hl7.org/fhir/uv/bulkdata/export/index.html) into a [pandas](https://pandas.pydata.org/docs/user_guide/index.html) data frame for analysis. You can import the dataframe
+into ML packages such as Tensorflow and PyTorch. **FHIRy also supports FHIR server search and FHIR tables on BigQuery.**
 
 Test this with the [synthea sample](https://synthea.mitre.org/downloads) or the downloaded ndjson from the [SMART Bulk data server](https://bulk-data.smarthealthit.org/). Use the 'Discussions' tab above for feature requests.
 
 :sparkles: Checkout [this template](https://github.com/dermatologist/kedro-multimodal) for Multimodal machine learning in healthcare!
 
+:fire: Checkout [MedPrompt](https://github.com/dermatologist/medprompt) for Medical LLM prompts, including FHIR related prompts, such as text-to-FHIRQuery mapper!
+
+
 ## Installation
 
 ### Stable

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile dev-requirements.in
@@ -12,18 +12,20 @@ babel==2.9.1
     # via sphinx
 backports-entry-points-selectable==1.1.0
     # via virtualenv
-certifi==2022.12.7
+certifi==2023.7.22
     # via
     #   -c requirements.txt
     #   requests
-charset-normalizer==3.1.0
+charset-normalizer==3.3.2
     # via
     #   -c requirements.txt
     #   requests
 commonmark==0.9.1
     # via recommonmark
 coverage[toml]==5.5
-    # via pytest-cov
+    # via
+    #   coverage
+    #   pytest-cov
 distlib==0.3.2
     # via virtualenv
 docutils==0.17.1
@@ -40,15 +42,13 @@ idna==3.4
     #   requests
 imagesize==1.2.0
     # via sphinx
-importlib-metadata==5.1.0
-    # via sphinx
 iniconfig==1.1.1
     # via pytest
 jinja2==3.0.1
     # via sphinx
 markupsafe==2.0.1
     # via jinja2
-packaging==23.0
+packaging==23.2
     # via
     #   -c requirements.txt
     #   pytest
@@ -73,13 +73,13 @@ pytest==7.1.2
     #   pytest-cov
 pytest-cov==3.0.0
     # via -r dev-requirements.in
-pytz==2022.7.1
+pytz==2023.3.post1
     # via
     #   -c requirements.txt
     #   babel
 recommonmark==0.7.1
     # via -r dev-requirements.in
-requests==2.28.2
+requests==2.31.0
     # via
     #   -c requirements.txt
     #   responses
@@ -124,17 +124,15 @@ tox==3.25.0
     # via -r dev-requirements.in
 types-toml==0.10.8.1
     # via responses
-urllib3==1.26.14
+urllib3==2.1.0
     # via
     #   -c requirements.txt
     #   requests
     #   responses
 virtualenv==20.8.0
     # via tox
-wheel==0.37.1
+wheel==0.41.0
     # via -r dev-requirements.in
-zipp==3.11.0
-    # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools
diff --git a/requirements.txt b/requirements.txt
@@ -1,93 +1,94 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile
 #
-cachetools==5.3.0
+cachetools==5.3.2
     # via google-auth
-certifi==2022.12.7
+certifi==2023.7.22
     # via requests
-charset-normalizer==3.1.0
+charset-normalizer==3.3.2
     # via requests
-db-dtypes==1.0.5
+db-dtypes==1.1.1
     # via fhiry (setup.py)
-google-api-core[grpc]==2.11.0
+google-api-core[grpc]==2.14.0
     # via
+    #   google-api-core
     #   google-cloud-bigquery
     #   google-cloud-core
-google-auth==2.16.2
+google-auth==2.23.4
     # via
     #   google-api-core
     #   google-cloud-core
-google-cloud-bigquery==3.6.0
+google-cloud-bigquery==3.13.0
     # via fhiry (setup.py)
-google-cloud-core==2.3.2
+google-cloud-core==2.3.3
     # via google-cloud-bigquery
 google-crc32c==1.5.0
     # via google-resumable-media
-google-resumable-media==2.4.1
+google-resumable-media==2.6.0
     # via google-cloud-bigquery
-googleapis-common-protos==1.58.0
+googleapis-common-protos==1.61.0
     # via
     #   google-api-core
     #   grpcio-status
-grpcio==1.51.3
+grpcio==1.59.2
     # via
     #   google-api-core
     #   google-cloud-bigquery
     #   grpcio-status
-grpcio-status==1.51.3
+grpcio-status==1.59.2
     # via google-api-core
 idna==3.4
     # via requests
-numpy==1.24.2
+numpy==1.26.2
     # via
     #   db-dtypes
     #   pandas
     #   pyarrow
-packaging==23.0
+packaging==23.2
     # via
     #   db-dtypes
     #   google-cloud-bigquery
-pandas==1.5.3
+pandas==2.1.3
     # via
     #   db-dtypes
     #   fhiry (setup.py)
-proto-plus==1.22.2
+proto-plus==1.22.3
     # via google-cloud-bigquery
-protobuf==4.22.1
+protobuf==4.25.0
     # via
     #   google-api-core
     #   google-cloud-bigquery
     #   googleapis-common-protos
     #   grpcio-status
     #   proto-plus
-pyarrow==11.0.0
+pyarrow==14.0.1
     # via db-dtypes
-pyasn1==0.4.8
+pyasn1==0.5.0
     # via
     #   pyasn1-modules
     #   rsa
-pyasn1-modules==0.2.8
+pyasn1-modules==0.3.0
     # via google-auth
 python-dateutil==2.8.2
     # via
     #   google-cloud-bigquery
     #   pandas
-pytz==2022.7.1
+pytz==2023.3.post1
     # via pandas
-requests==2.28.2
+requests==2.31.0
     # via
     #   google-api-core
     #   google-cloud-bigquery
 rsa==4.9
     # via google-auth
 six==1.16.0
-    # via
-    #   google-auth
-    #   python-dateutil
-tqdm==4.65.0
+    # via python-dateutil
+tqdm==4.66.1
     # via fhiry (setup.py)
-urllib3==1.26.14
+tzdata==2023.3
+    # via pandas
+urllib3==2.1.0
     # via requests
diff --git a/setup.cfg b/setup.cfg
@@ -32,6 +32,8 @@ classifiers =
     Operating System :: OS Independent
     Programming Language :: Python
     Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
     Topic :: Scientific/Engineering :: Information Analysis
 
 

diff --git a/src/fhiry/base_fhiry.py b/src/fhiry/base_fhiry.py
@@ -93,12 +93,17 @@ def add_patient_id(self):
         """Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type
         """
         try:
-            self._df['patientId'] = self._df.apply(lambda x: x['resource.id'] if x['resource.resourceType']
+            # PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
+            newframe = self._df.copy()
+            newframe['patientId'] = self._df.apply(lambda x: x['resource.id'] if x['resource.resourceType']
                                                == 'Patient' else self.check_subject_reference(x), axis=1)
+            self._df = newframe
         except:
             try:
-                self._df['patientId'] = self._df.apply(lambda x: x['id'] if x['resourceType']
+                newframe = self._df.copy()
+                newframe['patientId'] = self._df.apply(lambda x: x['id'] if x['resourceType']
                                                     == 'Patient' else self.check_subject_reference(x), axis=1)
+                self._df = newframe
             except:
                 pass
 

diff --git a/src/fhiry/fhiry.py b/src/fhiry/fhiry.py
@@ -50,7 +50,7 @@ def delete_col_raw_coding(self, delete_col_raw_coding):
         self._delete_col_raw_coding = delete_col_raw_coding
 
     def read_bundle_from_file(self, filename):
-        with open(filename, 'r') as f:
+        with open(filename, encoding='utf8', mode='r') as f:
             json_in = f.read()
             json_in = json.loads(json_in)
             return pd.json_normalize(json_in['entry'])