From f1a5e7a564e1c9f54325cb00f99de082a9ad207d Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Fri, 18 Mar 2022 11:10:25 -0400 Subject: [PATCH 01/31] Create _config.yml --- docs/_config.yml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 docs/_config.yml diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..617ed98 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,2 @@ +theme: jekyll-theme-leap-day +include: [_sources, _modules, _static] From d8d0d9b2f2da0a252fb672d9ed00f1170ad1c751 Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Fri, 18 Mar 2022 11:12:37 -0400 Subject: [PATCH 02/31] Update docs.yml --- .github/workflows/docs.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 48644b1..1c899cf 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -22,8 +22,9 @@ jobs: - name: Create docs run: | make -C docs/ html + cp docs/_config.yml docs/_build/html/_config.yml - name: Deploy Docs 🚀 uses: JamesIves/github-pages-deploy-action@v4.2.5 with: branch: gh-pages # The branch the action should deploy to. - folder: docs/_build/html # The folder the action should deploy. \ No newline at end of file + folder: docs/_build/html # The folder the action should deploy. From c13a866c0cbf359c6c786eb8cc61d27fe363d086 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sun, 20 Mar 2022 10:54:47 +0100 Subject: [PATCH 03/31] Make delete of raw coding data optional by property delete_col_raw_coding --- src/fhiry/fhiry.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/fhiry/fhiry.py b/src/fhiry/fhiry.py index dd38bff..7501555 100644 --- a/src/fhiry/fhiry.py +++ b/src/fhiry/fhiry.py @@ -17,6 +17,19 @@ def __init__(self): self._filename = "" self._folder = "" + # Codes from the FHIR datatype "coding" + # (f.e. element resource.code.coding or element resource.clinicalStatus.coding) + # are extracted to a col "codingcodes" + # (f.e. col resource.code.codingcodes or col resource.clinicalStatus.codingcodes) + # without other for analysis often not needed metadata like f.e. codesystem URI + # or FHIR extensions for coding entries. + # The full / raw object in col "coding" is deleted after this extraction. + # If you want to analyze more than the content of code and display from codings + # (like f.e. different codesystem URIs or further codes in extensions + # in the raw data/object), you can disable deletion of the raw source object "coding" + # (f.e. col "resource.code.coding") by setting property delete_col_raw_coding to False + self._delete_col_raw_coding = True + @property def df(self): return self._df @@ -29,6 +42,10 @@ def filename(self): def folder(self): return self._folder + @property + def delete_col_raw_coding(self): + return self._delete_col_raw_coding + @filename.setter def filename(self, filename): self._filename = filename @@ -38,6 +55,10 @@ def filename(self, filename): def folder(self, folder): self._folder = folder + @delete_col_raw_coding.setter + def delete_col_raw_coding(self, delete_col_raw_coding): + self._delete_col_raw_coding = delete_col_raw_coding + def read_bundle_from_file(self, filename): with open(filename, 'r') as f: json_in = f.read() @@ -88,7 +109,8 @@ def convert_object_to_list(self): lambda x: self.process_list(x[col]), axis=1) self._df = pd.concat( [self._df, codes.to_frame(name=col+'codes')], 1) - del self._df[col] + if self._delete_col_raw_coding: + del self._df[col] if 'display' in col: codes = self._df.apply( lambda x: self.process_list(x[col]), axis=1) From 443a4a8504ad47a6531cc398b427051e15fd74f6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Mar 2022 15:08:54 +0000 Subject: [PATCH 04/31] Bump pytest from 7.1.0 to 7.1.1 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.1.0 to 7.1.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.1.0...7.1.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 2464441..306b1d9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -60,7 +60,7 @@ pygments==2.10.0 # via sphinx pyparsing==2.4.7 # via packaging -pytest==7.1.0 +pytest==7.1.1 # via # -r dev-requirements.in # pytest-cov From a88c71820c8a320e5977147f10fd7af2d8225a2a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Apr 2022 15:08:45 +0000 Subject: [PATCH 05/31] Bump pandas from 1.4.1 to 1.4.2 Bumps [pandas](https://github.com/pandas-dev/pandas) from 1.4.1 to 1.4.2. - [Release notes](https://github.com/pandas-dev/pandas/releases) - [Changelog](https://github.com/pandas-dev/pandas/blob/main/RELEASE.md) - [Commits](https://github.com/pandas-dev/pandas/compare/v1.4.1...v1.4.2) --- updated-dependencies: - dependency-name: pandas dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b9ed11c..4606e2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ # numpy==1.22.3 # via pandas -pandas==1.4.1 +pandas==1.4.2 # via fhiry (setup.py) python-dateutil==2.8.2 # via pandas From f0fa173dcb10b88e689e6040fba3d15232763b9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Apr 2022 15:36:12 +0000 Subject: [PATCH 06/31] Bump tox from 3.24.5 to 3.25.0 Bumps [tox](https://github.com/tox-dev/tox) from 3.24.5 to 3.25.0. - [Release notes](https://github.com/tox-dev/tox/releases) - [Changelog](https://github.com/tox-dev/tox/blob/master/docs/changelog.rst) - [Commits](https://github.com/tox-dev/tox/compare/3.24.5...3.25.0) --- updated-dependencies: - dependency-name: tox dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 306b1d9..5cf8358 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -107,7 +107,7 @@ tomli==1.2.1 # via # pytest # setuptools-scm -tox==3.24.5 +tox==3.25.0 # via -r dev-requirements.in urllib3==1.26.6 # via requests From 9b310f281750d5553fa888ffea4250a2adbbe35a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Apr 2022 15:09:35 +0000 Subject: [PATCH 07/31] Bump pytest from 7.1.1 to 7.1.2 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.1.1 to 7.1.2. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.1.1...7.1.2) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 306b1d9..1b754ed 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -60,7 +60,7 @@ pygments==2.10.0 # via sphinx pyparsing==2.4.7 # via packaging -pytest==7.1.1 +pytest==7.1.2 # via # -r dev-requirements.in # pytest-cov From db5d0bc802c94ce610cb1b3c04013106cc93fc9c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 May 2022 15:07:51 +0000 Subject: [PATCH 08/31] Bump numpy from 1.22.3 to 1.22.4 Bumps [numpy](https://github.com/numpy/numpy) from 1.22.3 to 1.22.4. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/HOWTO_RELEASE.rst.txt) - [Commits](https://github.com/numpy/numpy/compare/v1.22.3...v1.22.4) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4606e2e..1389ebf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile # -numpy==1.22.3 +numpy==1.22.4 # via pandas pandas==1.4.2 # via fhiry (setup.py) From 57de5fca125889f29e161bdf88cc26160e6f8f16 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Jun 2022 15:10:44 +0000 Subject: [PATCH 09/31] Bump pandas from 1.4.2 to 1.4.3 Bumps [pandas](https://github.com/pandas-dev/pandas) from 1.4.2 to 1.4.3. - [Release notes](https://github.com/pandas-dev/pandas/releases) - [Changelog](https://github.com/pandas-dev/pandas/blob/main/RELEASE.md) - [Commits](https://github.com/pandas-dev/pandas/compare/v1.4.2...v1.4.3) --- updated-dependencies: - dependency-name: pandas dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1389ebf..f69e02d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ # numpy==1.22.4 # via pandas -pandas==1.4.2 +pandas==1.4.3 # via fhiry (setup.py) python-dateutil==2.8.2 # via pandas From c6243ed19a14698cff7346c763f3c86ca116aceb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Jul 2022 15:16:56 +0000 Subject: [PATCH 10/31] Bump numpy from 1.22.4 to 1.23.1 Bumps [numpy](https://github.com/numpy/numpy) from 1.22.4 to 1.23.1. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.22.4...v1.23.1) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f69e02d..adddb04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile # -numpy==1.22.4 +numpy==1.23.1 # via pandas pandas==1.4.3 # via fhiry (setup.py) From 9c93c478f6889ed8fd1ed6cd076bda059667acd3 Mon Sep 17 00:00:00 2001 From: sinujackson Date: Wed, 12 Oct 2022 13:22:10 -0400 Subject: [PATCH 11/31] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index adddb04..c9f24a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile # -numpy==1.23.1 +numpy==1.23.3 # via pandas pandas==1.4.3 # via fhiry (setup.py) From 5fc457ef662864d38c2e23ccda8ffb7203248cd7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 12 Oct 2022 19:12:16 +0000 Subject: [PATCH 12/31] Bump numpy from 1.23.3 to 1.23.4 Bumps [numpy](https://github.com/numpy/numpy) from 1.23.3 to 1.23.4. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.23.3...v1.23.4) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c9f24a1..34ed88a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile # -numpy==1.23.3 +numpy==1.23.4 # via pandas pandas==1.4.3 # via fhiry (setup.py) From 2dfed375bac5f69f71fa77f412e9f0f4e893db32 Mon Sep 17 00:00:00 2001 From: Bell Eapen Date: Fri, 9 Dec 2022 13:18:49 -0600 Subject: [PATCH 13/31] Update deps --- dev-requirements.txt | 10 +++++++--- requirements.txt | 10 +++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index c46c373..d6efd0b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile dev-requirements.in # @@ -34,6 +34,8 @@ idna==3.2 # via requests imagesize==1.2.0 # via sphinx +importlib-metadata==5.1.0 + # via sphinx iniconfig==1.1.1 # via pytest jinja2==3.0.1 @@ -66,7 +68,7 @@ pytest==7.1.2 # pytest-cov pytest-cov==3.0.0 # via -r dev-requirements.in -pytz==2021.3 +pytz==2022.6 # via # -c requirements.txt # babel @@ -115,6 +117,8 @@ virtualenv==20.8.0 # via tox wheel==0.37.1 # via -r dev-requirements.in +zipp==3.11.0 + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements.txt b/requirements.txt index 34ed88a..51e82da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ # -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile # -numpy==1.23.4 +numpy==1.23.5 # via pandas -pandas==1.4.3 +pandas==1.5.2 # via fhiry (setup.py) python-dateutil==2.8.2 # via pandas -pytz==2021.3 +pytz==2022.6 # via pandas six==1.16.0 # via python-dateutil From 1b0bc2d1c8614f50b2eb5a8ddc9fc02a1c0d29b5 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sat, 17 Dec 2022 08:51:17 +0100 Subject: [PATCH 14/31] README: Part "Usage" more generic and some links Headings of README part "Usage" more generic; Link the Jupiter Notebooks; Link some related docs --- README.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 478b32a..95ecb26 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ## About [Bulk data export using FHIR](https://hl7.org/fhir/uv/bulkdata/export/index.html) may be important if you want to export a cohort for analysis or machine learning. -:fire: **Fhiry** is a python package to facilitate this by converting a folder of FHIR bundles/ndjson into a pandas data frame for analysis and importing +:fire: **Fhiry** is a python package to facilitate this by converting a folder of [FHIR bundles](https://www.hl7.org/fhir/bundle.html)/ndjson into a [pandas](https://pandas.pydata.org/docs/user_guide/index.html) data frame for analysis and importing into ML packages such as Tensorflow and PyTorch. Test it with the [synthea sample](https://synthea.mitre.org/downloads) or the downloaded ndjson from the [SMART Bulk data server](https://bulk-data.smarthealthit.org/). Use the 'Discussions' tab above for feature requests. ## Installation @@ -18,7 +18,7 @@ pip install fhiry ## Usage -### Synthea +### Import FHIR bundles (JSON) from folder to pandas dataframe ``` import fhiry.parallel as fp @@ -26,12 +26,22 @@ df = fp.process('/path/to/fhir/resources') print(df.info()) ``` -### [SMART Bulk Data Server](https://bulk-data.smarthealthit.org/) Export +Example source data set: [Synthea](https://synthea.mitre.org/downloads) + +Jupyter notebook example: [`notebooks/synthea.ipynb`](notebooks/synthea.ipynb) + +### Import NDJSON from folder to pandas dataframe + ``` import fhiry.parallel as fp df = fp.ndjson('/path/to/fhir/ndjson/files') print(df.info()) ``` + +Example source data set: [SMART Bulk Data Server](https://bulk-data.smarthealthit.org/) Export + +Jupyter notebook example: [`notebooks/ndjson.ipynb`](notebooks/ndjson.ipynb) + ## Columns * see df.columns @@ -49,8 +59,9 @@ resource.gender ``` ### [Documentation](https://dermatologist.github.io/fhiry/) + ## Contributors * [Bell Eapen](https://nuchange.ca) | [![Twitter Follow](https://img.shields.io/twitter/follow/beapen?style=social)](https://twitter.com/beapen) * WIP, PR welcome, please see CONTRIBUTING.md -* [![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg) using CC](https://computecanada.ca) \ No newline at end of file +* [![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg) using CC](https://computecanada.ca) From 31b570d4d28fa2ce5c6272f94dff5db95d9cd2e2 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sat, 17 Dec 2022 13:49:33 +0100 Subject: [PATCH 15/31] Contributors (CR of PR) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 95ecb26..e360bfb 100644 --- a/README.md +++ b/README.md @@ -63,5 +63,6 @@ resource.gender ## Contributors * [Bell Eapen](https://nuchange.ca) | [![Twitter Follow](https://img.shields.io/twitter/follow/beapen?style=social)](https://twitter.com/beapen) +* [Markus Mandalka](https://github.com/Mandalka) * WIP, PR welcome, please see CONTRIBUTING.md * [![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg) using CC](https://computecanada.ca) From 97f603d39ee47640ccde7c14d9b958afa2dc7d4b Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sat, 17 Dec 2022 19:53:55 +0100 Subject: [PATCH 16/31] To be able to process FHIR bundles without (maximal and at least one) Patient resource do Patient id handling in fhiry.py like in fhirndjson.py --- src/fhiry/fhirndjson.py | 2 +- src/fhiry/fhiry.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/fhiry/fhirndjson.py b/src/fhiry/fhirndjson.py index 1eed86e..e29c6ec 100644 --- a/src/fhiry/fhirndjson.py +++ b/src/fhiry/fhirndjson.py @@ -75,7 +75,7 @@ def convert_object_to_list(self): del self._df[col] def add_patient_id(self): - """Create a patientId column with the resource.id of the first Patient resource + """Create a patientId column with the id if a Patient resource or with the subject.reference if other resource type """ self._df['patientId'] = self._df.apply(lambda x: x['id'] if x['resourceType'] == 'Patient' else self.check_subject_reference(x), axis=1) diff --git a/src/fhiry/fhiry.py b/src/fhiry/fhiry.py index 7501555..3ba9e62 100644 --- a/src/fhiry/fhiry.py +++ b/src/fhiry/fhiry.py @@ -119,10 +119,16 @@ def convert_object_to_list(self): del self._df[col] def add_patient_id(self): - """Create a patientId column with the resource.id of the first Patient resource + """Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type """ - self._df['patientId'] = self._df[( - self._df['resource.resourceType'] == "Patient")].iloc[0]['resource.id'] + self._df['patientId'] = self._df.apply(lambda x: x['resource.id'] if x['resource.resourceType'] + == 'Patient' else self.check_subject_reference(x), axis=1) + + def check_subject_reference(self, row): + try: + return row['resource.subject.reference'].replace('Patient/', '') + except: + return "" def get_info(self): if self._df is None: From d78048111d45fc6084ce74d472419e956182e2d0 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sun, 18 Dec 2022 10:20:44 +0100 Subject: [PATCH 17/31] Import from FHIR Search API --- README.md | 39 ++++++++++++++++++++++++++ src/fhiry/fhirsearch.py | 61 +++++++++++++++++++++++++++++++++++++++++ src/fhiry/fhiry.py | 10 +++++++ 3 files changed, 110 insertions(+) create mode 100644 src/fhiry/fhirsearch.py diff --git a/README.md b/README.md index e360bfb..06d91ba 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,45 @@ Example source data set: [SMART Bulk Data Server](https://bulk-data.smarthealthi Jupyter notebook example: [`notebooks/ndjson.ipynb`](notebooks/ndjson.ipynb) +### Import FHIR Search results to pandas dataframe + +*Warning: Import from FHIR Search API is under development and not well tested yet!* + +Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to pandas dataframe: + +For using filter options by `search_parameters` see [FHIR search common parameters for all resource types](https://www.hl7.org/fhir/search.html#standard) and additional FHIR search parameters for certain resource types like [Patient](https://www.hl7.org/fhir/patient.html#search), [Condition](https://www.hl7.org/fhir/condition.html#search), [Observation](https://www.hl7.org/fhir/observation.html#search), ... + +#### Example: Import all Observations + +Import all resources (since empty search parameters / no filter) of type Observation +``` +from fhiry.fhirsearch import Fhirsearch + +fs = Fhirsearch() +fs.fhir_base_url = "http://fhir-server:8080/fhir" +df = fs.search(type = "Observation", search_parameters = {}) + +print(df.info()) +``` +#### Example: Import all conditions with a certain code + +Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element Condition.code: + +``` +from fhiry.fhirsearch import Fhirsearch +fs = Fhirsearch() + +fs.fhir_base_url = "http://fhir-server:8080/fhir" + +my_fhir_search_parameters = { + "code": "http://snomed.info/sct|39065001", +} + +df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) + +print(df.info()) +``` + ## Columns * see df.columns diff --git a/src/fhiry/fhirsearch.py b/src/fhiry/fhirsearch.py new file mode 100644 index 0000000..28d9f1a --- /dev/null +++ b/src/fhiry/fhirsearch.py @@ -0,0 +1,61 @@ +import pandas as pd +import requests + +from . import Fhiry + + +class Fhirsearch(object): + + def __init__(self): + self.fhir_base_url = "http://fhir-server/fhir" + + # Batch size (entries per page) + self.page_size = 500 + + # todo: Parameters for HTTP(s) requests (f.e. for auth) + self.requests_kwargs = None + + def search(self, type="Patient", search_parameters={}): + + headers = {"Content-Type": "application/fhir+json"} + + if '_count' not in search_parameters: + search_parameters['_count'] = self.page_size + + search_url = f'{self.fhir_base_url}/{type}' + r = requests.get(search_url, params=search_parameters, headers=headers) + bundle_dict = r.json() + + if 'entry' in bundle_dict: + df = process_bundle(bundle_dict) + + next_page_url = get_next_page_url(bundle_dict) + + while next_page_url: + r = requests.get(next_page_url, headers=headers) + bundle_dict = r.json() + df_page = process_bundle(bundle_dict) + df = pd.concat([df, df_page]) + + next_page_url = get_next_page_url(bundle_dict) + else: + df = pd.DataFrame(columns=[]) + + return df + + +def process_bundle(bundle_dict): + f = Fhiry() + f.process_bundle_dict(bundle_dict) + return f.df + + +def get_next_page_url(bundle_dict): + links = bundle_dict.get('link') + + for link in links: + relation = link.get('relation') + if relation == 'next': + return link.get('url') + + return None diff --git a/src/fhiry/fhiry.py b/src/fhiry/fhiry.py index 3ba9e62..0f57b34 100644 --- a/src/fhiry/fhiry.py +++ b/src/fhiry/fhiry.py @@ -65,6 +65,9 @@ def read_bundle_from_file(self, filename): json_in = json.loads(json_in) return pd.json_normalize(json_in['entry']) + def read_bundle_from_bundle_dict(self, bundle_dict): + return pd.json_normalize(bundle_dict['entry']) + def delete_unwanted_cols(self): if 'resource.text.div' in self._df.columns: del self._df['resource.text.div'] @@ -100,6 +103,13 @@ def process_file(self, filename): self.add_patient_id() return self._df + def process_bundle_dict(self, bundle_dict): + self._df = self.read_bundle_from_bundle_dict(bundle_dict) + self.delete_unwanted_cols() + self.convert_object_to_list() + self.add_patient_id() + return self._df + def convert_object_to_list(self): """Convert object to a list of codes """ From 6d8568194b3f0f51916c0d8b5ca190e51a9e9151 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sun, 18 Dec 2022 12:02:33 +0100 Subject: [PATCH 18/31] No KeyError if missing code and display --- src/fhiry/fhiry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fhiry/fhiry.py b/src/fhiry/fhiry.py index 3ba9e62..4236cbe 100644 --- a/src/fhiry/fhiry.py +++ b/src/fhiry/fhiry.py @@ -149,6 +149,6 @@ def process_list(self, myList): for entry in myList: if 'code' in entry: myCodes.append(entry['code']) - else: + elif 'display' in entry: myCodes.append(entry['display']) return myCodes From 05450198e874e30588f0a0df2d9f00593a1d5ba8 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Mon, 19 Dec 2022 10:34:21 +0100 Subject: [PATCH 19/31] FHIR Search: Support basic auth, proxies and other connection settings --- README.md | 33 +++++++++++++++++++++++++++++++++ src/fhiry/fhirsearch.py | 12 ++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 06d91ba..156ced2 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,39 @@ df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters print(df.info()) ``` +#### Connection settings + +To set connection parameters like authentication, SSL certificates, proxies and so on, set or add standard [Python requests](https://requests.readthedocs.io/en/latest/) keyword arguments to the property `requests_kwargs`. + +##### Authentication + +Authentication is set by [requests parameter `auth`](https://requests.readthedocs.io/en/latest/user/authentication/). + +Example using [HTTP Basic Auth](https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication): +``` +from fhiry.fhirsearch import Fhirsearch + +fs = Fhirsearch() +fs.fhir_base_url = "http://fhir-server:8080/fhir" + +# Set basic auth credentials (https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication) +fs.requests_kwargs["auth"] = ('myUser', 'myPassword') +``` + +##### Proxy settings + +You can set a HTTP(S)-Proxies by [requests parameter `proxies`](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). + +Example: + +``` +fs.requests_kwargs["proxies"] = { + 'http': 'http://10.10.1.10:3128', + 'https': 'http://10.10.1.10:1080', +} +``` + + ## Columns * see df.columns diff --git a/src/fhiry/fhirsearch.py b/src/fhiry/fhirsearch.py index 28d9f1a..71fc62d 100644 --- a/src/fhiry/fhirsearch.py +++ b/src/fhiry/fhirsearch.py @@ -12,8 +12,12 @@ def __init__(self): # Batch size (entries per page) self.page_size = 500 - # todo: Parameters for HTTP(s) requests (f.e. for auth) - self.requests_kwargs = None + # Keyword arguments for HTTP(s) requests (f.e. for auth) + # Example parameters: + # Authentication: https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication + # Proxies: https://requests.readthedocs.io/en/latest/user/advanced/#proxies + # SSL Certificates: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification + self.requests_kwargs = {} def search(self, type="Patient", search_parameters={}): @@ -23,7 +27,7 @@ def search(self, type="Patient", search_parameters={}): search_parameters['_count'] = self.page_size search_url = f'{self.fhir_base_url}/{type}' - r = requests.get(search_url, params=search_parameters, headers=headers) + r = requests.get(search_url, params=search_parameters, headers=headers, **self.requests_kwargs) bundle_dict = r.json() if 'entry' in bundle_dict: @@ -32,7 +36,7 @@ def search(self, type="Patient", search_parameters={}): next_page_url = get_next_page_url(bundle_dict) while next_page_url: - r = requests.get(next_page_url, headers=headers) + r = requests.get(next_page_url, headers=headers, **self.requests_kwargs) bundle_dict = r.json() df_page = process_bundle(bundle_dict) df = pd.concat([df, df_page]) From 37a7a8442ef145f2583b7ed784e829246910c805 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Mon, 19 Dec 2022 10:51:29 +0100 Subject: [PATCH 20/31] For easier debugging of settings check HTTP status before trying to read (not existent) JSON result. So raise more specific HTTP exceptions on HTTP error codes like "requests.exceptions.HTTPError: 401 Client Error: Unauthorized" instead of a following error "simplejson.errors.JSONDecodeError" --- src/fhiry/fhirsearch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fhiry/fhirsearch.py b/src/fhiry/fhirsearch.py index 71fc62d..de6d791 100644 --- a/src/fhiry/fhirsearch.py +++ b/src/fhiry/fhirsearch.py @@ -28,6 +28,7 @@ def search(self, type="Patient", search_parameters={}): search_url = f'{self.fhir_base_url}/{type}' r = requests.get(search_url, params=search_parameters, headers=headers, **self.requests_kwargs) + r.raise_for_status() bundle_dict = r.json() if 'entry' in bundle_dict: @@ -37,6 +38,7 @@ def search(self, type="Patient", search_parameters={}): while next_page_url: r = requests.get(next_page_url, headers=headers, **self.requests_kwargs) + r.raise_for_status() bundle_dict = r.json() df_page = process_bundle(bundle_dict) df = pd.concat([df, df_page]) From f0e8defd58580113847769a15e6ba89252058f2d Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Mon, 19 Dec 2022 11:50:00 +0100 Subject: [PATCH 21/31] Move (grown) documentation of fhir search options to separate doc for better overview in README.md for newbies (so f.e. important section "Columns" better visible again) --- README.md | 51 ++------------------------------- fhir-search.md | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 48 deletions(-) create mode 100644 fhir-search.md diff --git a/README.md b/README.md index 156ced2..27f5a01 100644 --- a/README.md +++ b/README.md @@ -46,25 +46,13 @@ Jupyter notebook example: [`notebooks/ndjson.ipynb`](notebooks/ndjson.ipynb) *Warning: Import from FHIR Search API is under development and not well tested yet!* -Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to pandas dataframe: +Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to pandas dataframe. -For using filter options by `search_parameters` see [FHIR search common parameters for all resource types](https://www.hl7.org/fhir/search.html#standard) and additional FHIR search parameters for certain resource types like [Patient](https://www.hl7.org/fhir/patient.html#search), [Condition](https://www.hl7.org/fhir/condition.html#search), [Observation](https://www.hl7.org/fhir/observation.html#search), ... +Documentation: [`fhir-search.md`](fhir-search.md) -#### Example: Import all Observations - -Import all resources (since empty search parameters / no filter) of type Observation -``` -from fhiry.fhirsearch import Fhirsearch - -fs = Fhirsearch() -fs.fhir_base_url = "http://fhir-server:8080/fhir" -df = fs.search(type = "Observation", search_parameters = {}) - -print(df.info()) -``` #### Example: Import all conditions with a certain code -Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element Condition.code: +Import all [Condition](https://www.hl7.org/fhir/condition.html#search) resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element Condition.code: ``` from fhiry.fhirsearch import Fhirsearch @@ -81,39 +69,6 @@ df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters print(df.info()) ``` -#### Connection settings - -To set connection parameters like authentication, SSL certificates, proxies and so on, set or add standard [Python requests](https://requests.readthedocs.io/en/latest/) keyword arguments to the property `requests_kwargs`. - -##### Authentication - -Authentication is set by [requests parameter `auth`](https://requests.readthedocs.io/en/latest/user/authentication/). - -Example using [HTTP Basic Auth](https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication): -``` -from fhiry.fhirsearch import Fhirsearch - -fs = Fhirsearch() -fs.fhir_base_url = "http://fhir-server:8080/fhir" - -# Set basic auth credentials (https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication) -fs.requests_kwargs["auth"] = ('myUser', 'myPassword') -``` - -##### Proxy settings - -You can set a HTTP(S)-Proxies by [requests parameter `proxies`](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). - -Example: - -``` -fs.requests_kwargs["proxies"] = { - 'http': 'http://10.10.1.10:3128', - 'https': 'http://10.10.1.10:1080', -} -``` - - ## Columns * see df.columns diff --git a/fhir-search.md b/fhir-search.md new file mode 100644 index 0000000..6ef17cd --- /dev/null +++ b/fhir-search.md @@ -0,0 +1,76 @@ +# Import FHIR search results to pandas dataframe + +Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to [pandas](https://pandas.pydata.org/docs/user_guide/index.html) dataframe by [fhiry](README.md): + +## FHIR search parameters + +For filter options you can set by `search_parameters` see [FHIR search common parameters for all resource types](https://www.hl7.org/fhir/search.html#standard) and additional FHIR search parameters for certain resource types like [Patient](https://www.hl7.org/fhir/patient.html#search), [Condition](https://www.hl7.org/fhir/condition.html#search), [Observation](https://www.hl7.org/fhir/observation.html#search), ... + +## Example: Import all Observations + +Import all resources (since empty search parameters / no filter) of type Observation +``` +from fhiry.fhirsearch import Fhirsearch + +fs = Fhirsearch() +fs.fhir_base_url = "http://fhir-server:8080/fhir" +df = fs.search(type = "Observation", search_parameters = {}) + +print(df.info()) +``` + +## Example: Import all conditions with a certain code + +Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element Condition.code: + +``` +from fhiry.fhirsearch import Fhirsearch +fs = Fhirsearch() + +fs.fhir_base_url = "http://fhir-server:8080/fhir" + +my_fhir_search_parameters = { + "code": "http://snomed.info/sct|39065001", +} + +df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) + +print(df.info()) +``` + +## Connection settings + +To set connection parameters like authentication, SSL certificates, proxies and so on, set or add standard [Python requests](https://requests.readthedocs.io/en/latest/) keyword arguments to the property `requests_kwargs`. + +### Authentication + +Authentication is set by [requests parameter `auth`](https://requests.readthedocs.io/en/latest/user/authentication/). + +Example using [HTTP Basic Auth](https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication): + +``` +from fhiry.fhirsearch import Fhirsearch + +fs = Fhirsearch() +fs.fhir_base_url = "http://fhir-server:8080/fhir" + +# Set basic auth credentials (https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication) +fs.requests_kwargs["auth"] = ('myUser', 'myPassword') +``` + +### Proxy settings + +You can set a HTTP(S)-Proxies by [requests parameter `proxies`](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). + +Example: + +``` +fs.requests_kwargs["proxies"] = { + 'http': 'http://10.10.1.10:3128', + 'https': 'http://10.10.1.10:1080', +} +``` + + +## Columns +* see [`df.columns`](README.md#columns) From 3ebcd0dd40838455e15b03c6c5bb625b9d5d581a Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Mon, 19 Dec 2022 12:45:19 +0100 Subject: [PATCH 22/31] FHIR search documentation: Decrease RAM usage by FHIR search option _elements --- fhir-search.md | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/fhir-search.md b/fhir-search.md index 6ef17cd..455cd83 100644 --- a/fhir-search.md +++ b/fhir-search.md @@ -38,6 +38,39 @@ df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters print(df.info()) ``` +## Columns +* see [`df.columns`](README.md#columns) + + +## Decrease RAM usage + +If you want to analyze only certain elements, you can decrease RAM usage and network overhead by defining the elements you need for your data analysis by the [FHIR search option `_elements`](https://www.hl7.org/fhir/search.html#elements). + +Example: + +``` +from fhiry.fhirsearch import Fhirsearch +fs = Fhirsearch() + +fs.fhir_base_url = "http://fhir-server:8080/fhir" + +my_fhir_search_parameters = { +``` +... Other FHIR search parameters / filters ... + +``` + + "_elements": "code,verification-status,recorded-date", +} + +df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) + +print(df.info()) +``` + + + + ## Connection settings To set connection parameters like authentication, SSL certificates, proxies and so on, set or add standard [Python requests](https://requests.readthedocs.io/en/latest/) keyword arguments to the property `requests_kwargs`. @@ -70,7 +103,3 @@ fs.requests_kwargs["proxies"] = { 'https': 'http://10.10.1.10:1080', } ``` - - -## Columns -* see [`df.columns`](README.md#columns) From e01ec92620a7e9a4e9a09dd1f279bf2876a7afcb Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Tue, 20 Dec 2022 14:39:45 +0100 Subject: [PATCH 23/31] Tested fetching hundreds of thousands of resources/fhir search paging with Blaze and HAPI, with basic auth and without auth, with HTTP and with HTTPS --- README.md | 2 -- fhir-search.md | 60 ++++++++++++++++++++++++++------------------------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 27f5a01..d5a3b8a 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,6 @@ Jupyter notebook example: [`notebooks/ndjson.ipynb`](notebooks/ndjson.ipynb) ### Import FHIR Search results to pandas dataframe -*Warning: Import from FHIR Search API is under development and not well tested yet!* - Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to pandas dataframe. Documentation: [`fhir-search.md`](fhir-search.md) diff --git a/fhir-search.md b/fhir-search.md index 455cd83..84840aa 100644 --- a/fhir-search.md +++ b/fhir-search.md @@ -41,65 +41,67 @@ print(df.info()) ## Columns * see [`df.columns`](README.md#columns) +## Connection settings -## Decrease RAM usage +To set connection parameters like authentication, SSL certificates, proxies and so on, set or add standard [Python requests](https://requests.readthedocs.io/en/latest/) keyword arguments to the property `requests_kwargs`. -If you want to analyze only certain elements, you can decrease RAM usage and network overhead by defining the elements you need for your data analysis by the [FHIR search option `_elements`](https://www.hl7.org/fhir/search.html#elements). +### Authentication -Example: +Authentication is set by [requests parameter `auth`](https://requests.readthedocs.io/en/latest/user/authentication/). + +Example using [HTTP Basic Auth](https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication): ``` from fhiry.fhirsearch import Fhirsearch -fs = Fhirsearch() +fs = Fhirsearch() fs.fhir_base_url = "http://fhir-server:8080/fhir" -my_fhir_search_parameters = { +# Set basic auth credentials (https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication) +fs.requests_kwargs["auth"] = ('myUser', 'myPassword') ``` -... Other FHIR search parameters / filters ... -``` +### Proxy settings - "_elements": "code,verification-status,recorded-date", -} +You can set a HTTP(S)-Proxies by [requests parameter `proxies`](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). -df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) +Example: -print(df.info()) +``` +fs.requests_kwargs["proxies"] = { + 'http': 'http://10.10.1.10:3128', + 'https': 'http://10.10.1.10:1080', +} ``` +## Performance +### Fetching all found resources from FHIR server +Since such search calls are fetching all found resources of the searched resource type matching the fhir search parameters (if none, fetching all resources of the resource type) from the FHIR server, dependent on the performance of the FHIR Server for example fetching one million resources by FHIR search (page thorough all the search results pages) can take an hour to load the resources into the resulting pandas dataframe which for this example has a RAM usage of few hundred MB RAM. -## Connection settings - -To set connection parameters like authentication, SSL certificates, proxies and so on, set or add standard [Python requests](https://requests.readthedocs.io/en/latest/) keyword arguments to the property `requests_kwargs`. - -### Authentication +### Decrease RAM usage -Authentication is set by [requests parameter `auth`](https://requests.readthedocs.io/en/latest/user/authentication/). +If you want to analyze only certain elements, you can decrease RAM usage and network overhead by defining the elements you need for your data analysis by the [FHIR search option `_elements`](https://www.hl7.org/fhir/search.html#elements). -Example using [HTTP Basic Auth](https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication): +Example: ``` from fhiry.fhirsearch import Fhirsearch - fs = Fhirsearch() + fs.fhir_base_url = "http://fhir-server:8080/fhir" -# Set basic auth credentials (https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication) -fs.requests_kwargs["auth"] = ('myUser', 'myPassword') +my_fhir_search_parameters = { ``` +... Other FHIR search parameters / filters ... -### Proxy settings +``` -You can set a HTTP(S)-Proxies by [requests parameter `proxies`](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). + "_elements": "code,verification-status,recorded-date", +} -Example: +df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) -``` -fs.requests_kwargs["proxies"] = { - 'http': 'http://10.10.1.10:3128', - 'https': 'http://10.10.1.10:1080', -} +print(df.info()) ``` From 7cbf40741ae9e43b82b0ef687da14dac1d86904c Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Thu, 29 Dec 2022 15:53:26 +0100 Subject: [PATCH 24/31] Test import FHIR search results --- dev-requirements.in | 1 + src/fhiry/fhirsearch.py | 10 +- tests/resources/fhirsearch/conditions.json | 110 ++++++++++++++++++ .../fhirsearch/search-conditions-page1.json | 65 +++++++++++ .../fhirsearch/search-conditions-page2.json | 68 +++++++++++ .../fhirsearch/search-conditions-page3.json | 41 +++++++ tests/test_fhirsearch.py | 59 ++++++++++ 7 files changed, 349 insertions(+), 5 deletions(-) create mode 100644 tests/resources/fhirsearch/conditions.json create mode 100644 tests/resources/fhirsearch/search-conditions-page1.json create mode 100644 tests/resources/fhirsearch/search-conditions-page2.json create mode 100644 tests/resources/fhirsearch/search-conditions-page3.json create mode 100644 tests/test_fhirsearch.py diff --git a/dev-requirements.in b/dev-requirements.in index 5a73416..e883a57 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -3,6 +3,7 @@ pytest-cov pytest recommonmark +responses sphinx>=3.2.1 setuptools setuptools_scm diff --git a/src/fhiry/fhirsearch.py b/src/fhiry/fhirsearch.py index de6d791..35c1636 100644 --- a/src/fhiry/fhirsearch.py +++ b/src/fhiry/fhirsearch.py @@ -58,10 +58,10 @@ def process_bundle(bundle_dict): def get_next_page_url(bundle_dict): links = bundle_dict.get('link') - - for link in links: - relation = link.get('relation') - if relation == 'next': - return link.get('url') + if links: + for link in links: + relation = link.get('relation') + if relation == 'next': + return link.get('url') return None diff --git a/tests/resources/fhirsearch/conditions.json b/tests/resources/fhirsearch/conditions.json new file mode 100644 index 0000000..d313fea --- /dev/null +++ b/tests/resources/fhirsearch/conditions.json @@ -0,0 +1,110 @@ +{ + "resourceType": "Bundle", + "type": "transaction", + "entry": [ { + "fullUrl": "Condition/cond-id-1", + "resource": { + "resourceType": "Condition", + "id": "cond-id-1", + "code": { + "coding": [ + { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A00.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-1" + } + }, + "request": { + "method": "PUT", + "url": "Condition/cond-id-1" + } + }, { + "fullUrl": "Condition/cond-id-2", + "resource": { + "resourceType": "Condition", + "id": "cond-id-2", + "code": { + "coding": [ + { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A01.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "request": { + "method": "PUT", + "url": "Condition/cond-id-2" + } + }, { + "fullUrl": "Condition/cond-id-3", + "resource": { + "resourceType": "Condition", + "id": "cond-id-3", + "code": { + "coding": [ + { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A02.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "request": { + "method": "PUT", + "url": "Condition/cond-id-3" + } + }, { + "fullUrl": "Condition/cond-id-4", + "resource": { + "resourceType": "Condition", + "id": "cond-id-4", + "code": { + "coding": [ + { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A03.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "request": { + "method": "PUT", + "url": "Condition/cond-id-4" + } + }, { + "fullUrl": "Condition/cond-id-5", + "resource": { + "resourceType": "Condition", + "id": "cond-id-5", + "code": { + "coding": [ + { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A04.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "request": { + "method": "PUT", + "url": "Condition/cond-id-5" + } + } ] +} diff --git a/tests/resources/fhirsearch/search-conditions-page1.json b/tests/resources/fhirsearch/search-conditions-page1.json new file mode 100644 index 0000000..9c9973e --- /dev/null +++ b/tests/resources/fhirsearch/search-conditions-page1.json @@ -0,0 +1,65 @@ +{ + "resourceType": "Bundle", + "id": "b5f2b2b3-6372-4159-969a-49cbd243e154", + "meta": { + "lastUpdated": "2022-12-29T12:39:38.158+00:00" + }, + "type": "searchset", + "total": 5, + "link": [ { + "relation": "self", + "url": "http://fhir-server/fhir/Condition?_count=2" + }, { + "relation": "next", + "url": "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=2&_count=2&_bundletype=searchset" + } ], + "entry": [ { + "fullUrl": "http://fhir-server/fhir/Condition/cond-id-1", + "resource": { + "resourceType": "Condition", + "id": "cond-id-1", + "meta": { + "versionId": "1", + "lastUpdated": "2022-12-29T12:39:14.443+00:00", + "source": "#VJFR5ZXtH6s2PLWu" + }, + "code": { + "coding": [ { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A00.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-1" + } + }, + "search": { + "mode": "match" + } + }, { + "fullUrl": "http://fhir-server/fhir/Condition/cond-id-2", + "resource": { + "resourceType": "Condition", + "id": "cond-id-2", + "meta": { + "versionId": "1", + "lastUpdated": "2022-12-29T12:39:14.443+00:00", + "source": "#VJFR5ZXtH6s2PLWu" + }, + "code": { + "coding": [ { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A01.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "search": { + "mode": "match" + } + } ] +} \ No newline at end of file diff --git a/tests/resources/fhirsearch/search-conditions-page2.json b/tests/resources/fhirsearch/search-conditions-page2.json new file mode 100644 index 0000000..c1acf30 --- /dev/null +++ b/tests/resources/fhirsearch/search-conditions-page2.json @@ -0,0 +1,68 @@ +{ + "resourceType": "Bundle", + "id": "b5f2b2b3-6372-4159-969a-49cbd243e154", + "meta": { + "lastUpdated": "2022-12-29T12:39:38.158+00:00" + }, + "type": "searchset", + "total": 5, + "link": [ { + "relation": "self", + "url": "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=2&_count=2&_bundletype=searchset" + }, { + "relation": "next", + "url": "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=4&_count=2&_bundletype=searchset" + }, { + "relation": "previous", + "url": "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=0&_count=2&_bundletype=searchset" + } ], + "entry": [ { + "fullUrl": "http://fhir-server/fhir/Condition/cond-id-3", + "resource": { + "resourceType": "Condition", + "id": "cond-id-3", + "meta": { + "versionId": "1", + "lastUpdated": "2022-12-29T12:39:14.443+00:00", + "source": "#VJFR5ZXtH6s2PLWu" + }, + "code": { + "coding": [ { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A02.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "search": { + "mode": "match" + } + }, { + "fullUrl": "http://fhir-server/fhir/Condition/cond-id-4", + "resource": { + "resourceType": "Condition", + "id": "cond-id-4", + "meta": { + "versionId": "1", + "lastUpdated": "2022-12-29T12:39:14.443+00:00", + "source": "#VJFR5ZXtH6s2PLWu" + }, + "code": { + "coding": [ { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A03.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "search": { + "mode": "match" + } + } ] +} \ No newline at end of file diff --git a/tests/resources/fhirsearch/search-conditions-page3.json b/tests/resources/fhirsearch/search-conditions-page3.json new file mode 100644 index 0000000..8ba1d71 --- /dev/null +++ b/tests/resources/fhirsearch/search-conditions-page3.json @@ -0,0 +1,41 @@ +{ + "resourceType": "Bundle", + "id": "b5f2b2b3-6372-4159-969a-49cbd243e154", + "meta": { + "lastUpdated": "2022-12-29T12:39:38.158+00:00" + }, + "type": "searchset", + "total": 5, + "link": [ { + "relation": "self", + "url": "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=4&_count=2&_bundletype=searchset" + }, { + "relation": "previous", + "url": "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=2&_count=2&_bundletype=searchset" + } ], + "entry": [ { + "fullUrl": "http://fhir-server/fhir/Condition/cond-id-5", + "resource": { + "resourceType": "Condition", + "id": "cond-id-5", + "meta": { + "versionId": "1", + "lastUpdated": "2022-12-29T12:39:14.443+00:00", + "source": "#VJFR5ZXtH6s2PLWu" + }, + "code": { + "coding": [ { + "system": "http://fhir.de/CodeSystem/bfarm/icd-10-gm", + "version": "2010", + "code": "A04.0" + } ] + }, + "subject": { + "reference": "Patient/pat-id-2" + } + }, + "search": { + "mode": "match" + } + } ] +} \ No newline at end of file diff --git a/tests/test_fhirsearch.py b/tests/test_fhirsearch.py new file mode 100644 index 0000000..1172f7c --- /dev/null +++ b/tests/test_fhirsearch.py @@ -0,0 +1,59 @@ +import json +import responses +from pkg_resources import resource_filename + +from src.fhiry.fhirsearch import Fhirsearch + +# Test import FHIR search results (five resources with different codes) +# separated by three mocked FHIR search results pages +# to one Pandas dataframe +@responses.activate +def test_fhirsearch(): + + # Mock FHIR search URL of page 1 + jsonfile = open(resource_filename(__name__, 'resources') + '/fhirsearch/search-conditions-page1.json') + responses.add( + responses.GET, + "http://fhir-server/fhir/Condition?_count=2", + json=json.load(jsonfile), + status=200, + ) + + # Mock FHIR search URL of page 2 + jsonfile = open(resource_filename(__name__, 'resources') + '/fhirsearch/search-conditions-page2.json') + responses.add( + responses.GET, + "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=2&_count=2&_bundletype=searchset", + json=json.load(jsonfile), + status=200, + ) + + # Mock FHIR search URL of page 3 + jsonfile = open(resource_filename(__name__, 'resources') + '/fhirsearch/search-conditions-page3.json') + responses.add( + responses.GET, + "http://fhir-server/fhir?_getpages=b5f2b2b3-6372-4159-969a-49cbd243e154&_getpagesoffset=4&_count=2&_bundletype=searchset", + json=json.load(jsonfile), + status=200, + ) + + # Start a FHIR search on/with the mocked FHIR Server URLs + # which should process all 5 Condition resources (separated on three FHIR search results pages) + fs = Fhirsearch() + fs.fhir_base_url = "http://fhir-server/fhir" + fs.page_size = 2 + + df = fs.search(type="Condition", search_parameters={}) + + # resulting df must include all 5 condition resources (processed from all three mocked search results pages) + assert len(df) == 5 + + # Are all the different Condition codes there (exactly once)? + assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A00.0\']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A01.0\']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A02.0\']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A03.0\']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A04.0\']"]) == 1 + + # There is no resource with code A05.0 in the FHIR search results + assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A05.0\']"]) == 0 From e7b9329a8842833a18b59570402033bd4910ecd7 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Thu, 29 Dec 2022 16:11:23 +0100 Subject: [PATCH 25/31] Add dependencies for tests of FHIR search to compiled dev-requirements.txt (by pip-compile dev-requirements.in), too --- dev-requirements.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index d6efd0b..0b19461 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -75,7 +75,11 @@ pytz==2022.6 recommonmark==0.7.1 # via -r dev-requirements.in requests==2.26.0 - # via sphinx + # via + # responses + # sphinx +responses==0.22.0 + # via -r dev-requirements.in setuptools-scm==6.4.2 # via -r dev-requirements.in six==1.16.0 @@ -104,6 +108,7 @@ sphinxcontrib-serializinghtml==1.1.5 toml==0.10.2 # via # coverage + # responses # tox tomli==1.2.1 # via @@ -111,8 +116,12 @@ tomli==1.2.1 # setuptools-scm tox==3.25.0 # via -r dev-requirements.in +types-toml==0.10.8.1 + # via responses urllib3==1.26.6 - # via requests + # via + # requests + # responses virtualenv==20.8.0 # via tox wheel==0.37.1 From 1e4dc3615d38e5aab00f0781757994a7afc11992 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Fri, 30 Dec 2022 14:08:35 +0100 Subject: [PATCH 26/31] Documentation: Syntax highlighting for source code examples --- README.md | 10 +++++----- fhir-search.md | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index d5a3b8a..ddfd076 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ into ML packages such as Tensorflow and PyTorch. Test it with the [synthea sampl ## Installation -``` +```shell pip install fhiry ``` @@ -20,7 +20,7 @@ pip install fhiry ### Import FHIR bundles (JSON) from folder to pandas dataframe -``` +```python import fhiry.parallel as fp df = fp.process('/path/to/fhir/resources') print(df.info()) @@ -32,7 +32,7 @@ Jupyter notebook example: [`notebooks/synthea.ipynb`](notebooks/synthea.ipynb) ### Import NDJSON from folder to pandas dataframe -``` +```python import fhiry.parallel as fp df = fp.ndjson('/path/to/fhir/ndjson/files') print(df.info()) @@ -50,9 +50,9 @@ Documentation: [`fhir-search.md`](fhir-search.md) #### Example: Import all conditions with a certain code -Import all [Condition](https://www.hl7.org/fhir/condition.html#search) resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element Condition.code: +Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)): -``` +```python from fhiry.fhirsearch import Fhirsearch fs = Fhirsearch() diff --git a/fhir-search.md b/fhir-search.md index 84840aa..1ec0836 100644 --- a/fhir-search.md +++ b/fhir-search.md @@ -2,14 +2,15 @@ Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to [pandas](https://pandas.pydata.org/docs/user_guide/index.html) dataframe by [fhiry](README.md): -## FHIR search parameters +## FHIR search query parameters For filter options you can set by `search_parameters` see [FHIR search common parameters for all resource types](https://www.hl7.org/fhir/search.html#standard) and additional FHIR search parameters for certain resource types like [Patient](https://www.hl7.org/fhir/patient.html#search), [Condition](https://www.hl7.org/fhir/condition.html#search), [Observation](https://www.hl7.org/fhir/observation.html#search), ... -## Example: Import all Observations +## Example: Import all observations Import all resources (since empty search parameters / no filter) of type Observation -``` + +```python from fhiry.fhirsearch import Fhirsearch fs = Fhirsearch() @@ -21,9 +22,9 @@ print(df.info()) ## Example: Import all conditions with a certain code -Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element Condition.code: +Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)): -``` +```python from fhiry.fhirsearch import Fhirsearch fs = Fhirsearch() @@ -51,7 +52,7 @@ Authentication is set by [requests parameter `auth`](https://requests.readthedoc Example using [HTTP Basic Auth](https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication): -``` +```python from fhiry.fhirsearch import Fhirsearch fs = Fhirsearch() @@ -67,7 +68,7 @@ You can set a HTTP(S)-Proxies by [requests parameter `proxies`](https://requests Example: -``` +```python fs.requests_kwargs["proxies"] = { 'http': 'http://10.10.1.10:3128', 'https': 'http://10.10.1.10:1080', @@ -86,7 +87,7 @@ If you want to analyze only certain elements, you can decrease RAM usage and net Example: -``` +```python from fhiry.fhirsearch import Fhirsearch fs = Fhirsearch() @@ -96,7 +97,7 @@ my_fhir_search_parameters = { ``` ... Other FHIR search parameters / filters ... -``` +```python "_elements": "code,verification-status,recorded-date", } From 71d6e364217a00dda8815e770d92ffe4f06595b1 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Fri, 30 Dec 2022 19:58:29 +0100 Subject: [PATCH 27/31] Fixed JSONDecodeError (occured in the test environment while processing of tests/resources/__init__.py which is not a json resource. --- src/fhiry/parallel.py | 52 ++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/src/fhiry/parallel.py b/src/fhiry/parallel.py index 5eeae4b..3e3179e 100644 --- a/src/fhiry/parallel.py +++ b/src/fhiry/parallel.py @@ -3,7 +3,8 @@ import multiprocessing as mp import pandas as pd -def process_files(file): + +def process_file(file): f = Fhiry() return f.process_file(file) @@ -12,34 +13,29 @@ def process_ndjson(file): f = Fhirndjson() return f.process_file(file) + def process(folder): - # TODO: Fix the below error when ? folder has few files - # Currently falls back when it fails - # json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) - try: - pool = mp.Pool(mp.cpu_count()) - list_of_dataframes = pool.map(process_files, [folder + '/' + row for row in os.listdir(folder)]) - pool.close() - return pd.concat(list_of_dataframes) - except: - f = Fhiry() - f.folder = folder - f.process_df() - return f.df + + pool = mp.Pool(mp.cpu_count()) + + filenames = [] + for filename in os.listdir(folder): + if filename.endswith(".json"): + filenames.append(folder + '/' + filename) + + list_of_dataframes = pool.map(process_file, filenames) + pool.close() + return pd.concat(list_of_dataframes) def ndjson(folder): - # TODO: Fix the below error when ? folder has few files - # Currently falls back when it fails - # json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) - try: - pool = mp.Pool(mp.cpu_count()) - list_of_dataframes = pool.map( - process_ndjson, [folder + '/' + row for row in os.listdir(folder)]) - pool.close() - return pd.concat(list_of_dataframes) - except: - f = Fhirndjson() - f.folder = folder - f.process_df() - return f.df + pool = mp.Pool(mp.cpu_count()) + + filenames = [] + for filename in os.listdir(folder): + if filename.endswith(".ndjson"): + filenames.append(folder + '/' + filename) + + list_of_dataframes = pool.map(process_ndjson, filenames) + pool.close() + return pd.concat(list_of_dataframes) From 6690b7872ad18ac19260f538ec5598b4f3f97b87 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sat, 31 Dec 2022 01:17:58 +0100 Subject: [PATCH 28/31] FHIR Search: Make fhir_base_url mandatory init parameter --- README.md | 7 +++---- fhir-search.md | 23 +++++++++++------------ src/fhiry/fhirsearch.py | 5 +++-- tests/test_fhirsearch.py | 3 +-- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index ddfd076..14f6d81 100644 --- a/README.md +++ b/README.md @@ -44,19 +44,18 @@ Jupyter notebook example: [`notebooks/ndjson.ipynb`](notebooks/ndjson.ipynb) ### Import FHIR Search results to pandas dataframe -Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to pandas dataframe. +Fetch and import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to pandas dataframe. Documentation: [`fhir-search.md`](fhir-search.md) #### Example: Import all conditions with a certain code -Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)): +Fetch and import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)) to a pandas dataframe: ```python from fhiry.fhirsearch import Fhirsearch -fs = Fhirsearch() -fs.fhir_base_url = "http://fhir-server:8080/fhir" +fs = Fhirsearch(fhir_base_url = "http://fhir-server:8080/fhir") my_fhir_search_parameters = { "code": "http://snomed.info/sct|39065001", diff --git a/fhir-search.md b/fhir-search.md index 1ec0836..9f0778b 100644 --- a/fhir-search.md +++ b/fhir-search.md @@ -1,6 +1,6 @@ # Import FHIR search results to pandas dataframe -Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to [pandas](https://pandas.pydata.org/docs/user_guide/index.html) dataframe by [fhiry](README.md): +Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) results to [pandas](https://pandas.pydata.org/docs/user_guide/index.html) dataframe by [fhiry](README.md): ## FHIR search query parameters @@ -8,13 +8,13 @@ For filter options you can set by `search_parameters` see [FHIR search common pa ## Example: Import all observations -Import all resources (since empty search parameters / no filter) of type Observation +Fetch and import all resources (since empty search parameters / no filter) of type Observation to a pandas dataframe: ```python from fhiry.fhirsearch import Fhirsearch -fs = Fhirsearch() -fs.fhir_base_url = "http://fhir-server:8080/fhir" +fs = Fhirsearch(fhir_base_url = "http://fhir-server:8080/fhir") + df = fs.search(type = "Observation", search_parameters = {}) print(df.info()) @@ -22,13 +22,12 @@ print(df.info()) ## Example: Import all conditions with a certain code -Import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)): +Fetch and import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)) to a pandas dataframe: ```python from fhiry.fhirsearch import Fhirsearch -fs = Fhirsearch() -fs.fhir_base_url = "http://fhir-server:8080/fhir" +fs = Fhirsearch(fhir_base_url = "http://fhir-server:8080/fhir") my_fhir_search_parameters = { "code": "http://snomed.info/sct|39065001", @@ -46,6 +45,8 @@ print(df.info()) To set connection parameters like authentication, SSL certificates, proxies and so on, set or add standard [Python requests](https://requests.readthedocs.io/en/latest/) keyword arguments to the property `requests_kwargs`. +Examples: + ### Authentication Authentication is set by [requests parameter `auth`](https://requests.readthedocs.io/en/latest/user/authentication/). @@ -55,8 +56,7 @@ Example using [HTTP Basic Auth](https://requests.readthedocs.io/en/latest/user/a ```python from fhiry.fhirsearch import Fhirsearch -fs = Fhirsearch() -fs.fhir_base_url = "http://fhir-server:8080/fhir" +fs = Fhirsearch(fhir_base_url = "http://fhir-server:8080/fhir") # Set basic auth credentials (https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication) fs.requests_kwargs["auth"] = ('myUser', 'myPassword') @@ -64,7 +64,7 @@ fs.requests_kwargs["auth"] = ('myUser', 'myPassword') ### Proxy settings -You can set a HTTP(S)-Proxies by [requests parameter `proxies`](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). +You can set HTTP(S)-Proxies by [requests parameter `proxies`](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). Example: @@ -89,9 +89,8 @@ Example: ```python from fhiry.fhirsearch import Fhirsearch -fs = Fhirsearch() -fs.fhir_base_url = "http://fhir-server:8080/fhir" +fs = Fhirsearch(fhir_base_url = "http://fhir-server:8080/fhir") my_fhir_search_parameters = { ``` diff --git a/src/fhiry/fhirsearch.py b/src/fhiry/fhirsearch.py index 35c1636..f05150f 100644 --- a/src/fhiry/fhirsearch.py +++ b/src/fhiry/fhirsearch.py @@ -6,8 +6,9 @@ class Fhirsearch(object): - def __init__(self): - self.fhir_base_url = "http://fhir-server/fhir" + def __init__(self, fhir_base_url): + + self.fhir_base_url = fhir_base_url # Batch size (entries per page) self.page_size = 500 diff --git a/tests/test_fhirsearch.py b/tests/test_fhirsearch.py index 1172f7c..29150cf 100644 --- a/tests/test_fhirsearch.py +++ b/tests/test_fhirsearch.py @@ -39,8 +39,7 @@ def test_fhirsearch(): # Start a FHIR search on/with the mocked FHIR Server URLs # which should process all 5 Condition resources (separated on three FHIR search results pages) - fs = Fhirsearch() - fs.fhir_base_url = "http://fhir-server/fhir" + fs = Fhirsearch(fhir_base_url="http://fhir-server/fhir") fs.page_size = 2 df = fs.search(type="Condition", search_parameters={}) From 7ef5c7bcbeb9784140be914c80b2467898278725 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sat, 31 Dec 2022 11:14:27 +0100 Subject: [PATCH 29/31] FHIR Search: Change search() parameter name "type" (which is a reserved Python function name, too) to more specific/less ambigous "resource_type" --- README.md | 4 ++-- fhir-search.md | 10 +++++----- src/fhiry/fhirsearch.py | 4 ++-- tests/test_fhirsearch.py | 14 +++++++------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 14f6d81..b0fb16b 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Fetch and import resources from [FHIR Search API](https://www.hl7.org/fhir/searc Documentation: [`fhir-search.md`](fhir-search.md) -#### Example: Import all conditions with a certain code +#### Example: Import all conditions with a certain code from FHIR Server Fetch and import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)) to a pandas dataframe: @@ -61,7 +61,7 @@ my_fhir_search_parameters = { "code": "http://snomed.info/sct|39065001", } -df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) +df = fs.search(resource_type = "Condition", search_parameters = my_fhir_search_parameters) print(df.info()) ``` diff --git a/fhir-search.md b/fhir-search.md index 9f0778b..101e423 100644 --- a/fhir-search.md +++ b/fhir-search.md @@ -6,7 +6,7 @@ Import resources from [FHIR Search API](https://www.hl7.org/fhir/search.html) re For filter options you can set by `search_parameters` see [FHIR search common parameters for all resource types](https://www.hl7.org/fhir/search.html#standard) and additional FHIR search parameters for certain resource types like [Patient](https://www.hl7.org/fhir/patient.html#search), [Condition](https://www.hl7.org/fhir/condition.html#search), [Observation](https://www.hl7.org/fhir/observation.html#search), ... -## Example: Import all observations +## Example: Import all observations from FHIR server Fetch and import all resources (since empty search parameters / no filter) of type Observation to a pandas dataframe: @@ -15,12 +15,12 @@ from fhiry.fhirsearch import Fhirsearch fs = Fhirsearch(fhir_base_url = "http://fhir-server:8080/fhir") -df = fs.search(type = "Observation", search_parameters = {}) +df = fs.search(resource_type = "Observation", search_parameters = {}) print(df.info()) ``` -## Example: Import all conditions with a certain code +## Example: Import all conditions with a certain code from FHIR server Fetch and import all condition resources with Snomed (Codesystem `http://snomed.info/sct`) Code `39065001` in the FHIR element `Condition.code` ([resource type specific FHIR search parameter `code`](https://www.hl7.org/fhir/condition.html#search)) to a pandas dataframe: @@ -33,7 +33,7 @@ my_fhir_search_parameters = { "code": "http://snomed.info/sct|39065001", } -df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) +df = fs.search(resource_type = "Condition", search_parameters = my_fhir_search_parameters) print(df.info()) ``` @@ -101,7 +101,7 @@ my_fhir_search_parameters = { "_elements": "code,verification-status,recorded-date", } -df = fs.search(type = "Condition", search_parameters = my_fhir_search_parameters) +df = fs.search(resource_type = "Condition", search_parameters = my_fhir_search_parameters) print(df.info()) ``` diff --git a/src/fhiry/fhirsearch.py b/src/fhiry/fhirsearch.py index f05150f..98bbf75 100644 --- a/src/fhiry/fhirsearch.py +++ b/src/fhiry/fhirsearch.py @@ -20,14 +20,14 @@ def __init__(self, fhir_base_url): # SSL Certificates: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification self.requests_kwargs = {} - def search(self, type="Patient", search_parameters={}): + def search(self, resource_type="Patient", search_parameters={}): headers = {"Content-Type": "application/fhir+json"} if '_count' not in search_parameters: search_parameters['_count'] = self.page_size - search_url = f'{self.fhir_base_url}/{type}' + search_url = f'{self.fhir_base_url}/{resource_type}' r = requests.get(search_url, params=search_parameters, headers=headers, **self.requests_kwargs) r.raise_for_status() bundle_dict = r.json() diff --git a/tests/test_fhirsearch.py b/tests/test_fhirsearch.py index 29150cf..16d351c 100644 --- a/tests/test_fhirsearch.py +++ b/tests/test_fhirsearch.py @@ -42,17 +42,17 @@ def test_fhirsearch(): fs = Fhirsearch(fhir_base_url="http://fhir-server/fhir") fs.page_size = 2 - df = fs.search(type="Condition", search_parameters={}) + df = fs.search(resource_type="Condition", search_parameters={}) # resulting df must include all 5 condition resources (processed from all three mocked search results pages) assert len(df) == 5 # Are all the different Condition codes there (exactly once)? - assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A00.0\']"]) == 1 - assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A01.0\']"]) == 1 - assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A02.0\']"]) == 1 - assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A03.0\']"]) == 1 - assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A04.0\']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "['A00.0']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "['A01.0']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "['A02.0']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "['A03.0']"]) == 1 + assert len(df[df['resource.code.codingcodes'].astype('string') == "['A04.0']"]) == 1 # There is no resource with code A05.0 in the FHIR search results - assert len(df[df['resource.code.codingcodes'].astype('string') == "[\'A05.0\']"]) == 0 + assert len(df[df['resource.code.codingcodes'].astype('string') == "['A05.0']"]) == 0 From 6ce5a27b0726268d9d77d36c03a175dac09b3bd7 Mon Sep 17 00:00:00 2001 From: Markus Mandalka Date: Sat, 31 Dec 2022 12:32:23 +0100 Subject: [PATCH 30/31] Findability: Add important keywords to headings --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e360bfb..72cdc33 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -# :fire: fhiry - FHIR for AI and ML +# :fire: fhiry - FHIR to pandas dataframe for data analysis, AI and ML ![Libraries.io SourceRank](https://img.shields.io/librariesio/sourcerank/pypi/fhiry) [![PyPI download total](https://img.shields.io/pypi/dm/fhiry.svg)](https://pypi.python.org/pypi/fhiry/) ![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/dermatologist/fhiry) -## About +## Open Source Python library for import of FHIR resources to pandas dataframe [Bulk data export using FHIR](https://hl7.org/fhir/uv/bulkdata/export/index.html) may be important if you want to export a cohort for analysis or machine learning. -:fire: **Fhiry** is a python package to facilitate this by converting a folder of [FHIR bundles](https://www.hl7.org/fhir/bundle.html)/ndjson into a [pandas](https://pandas.pydata.org/docs/user_guide/index.html) data frame for analysis and importing +:fire: **Fhiry** is a [python](https://www.python.org/) package to facilitate this by converting a folder of [FHIR bundles](https://www.hl7.org/fhir/bundle.html)/ndjson into a [pandas](https://pandas.pydata.org/docs/user_guide/index.html) data frame for analysis and importing into ML packages such as Tensorflow and PyTorch. Test it with the [synthea sample](https://synthea.mitre.org/downloads) or the downloaded ndjson from the [SMART Bulk data server](https://bulk-data.smarthealthit.org/). Use the 'Discussions' tab above for feature requests. ## Installation From da2b138ffd04e900ed468dcfe95d617296a32bd9 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 6 Feb 2023 21:42:45 +0000 Subject: [PATCH 31/31] Commit from GitHub Actions (Generate changelog) --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f278c40..f1c6cbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ # Changelog -## [Unreleased](https://github.com/dermatologist/fhiry/tree/HEAD) +## [2.0.0](https://github.com/dermatologist/fhiry/tree/2.0.0) (2022-03-17) -[Full Changelog](https://github.com/dermatologist/fhiry/compare/1.0.0...HEAD) +[Full Changelog](https://github.com/dermatologist/fhiry/compare/1.0.0...2.0.0) **Closed issues:**