From bd472bf375e719ffbb737890fa8118c963d5adef Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 6 May 2021 13:01:39 +1200
Subject: [PATCH 01/48] Prevent the app from crashing if SPARC_PORTAL_AWS_KEY
 and/or SPARC_PORTAL_AWS_SECRET cannot be found.

---
 app/main.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/app/main.py b/app/main.py
index 37d3dec..ddf9936 100644
--- a/app/main.py
+++ b/app/main.py
@@ -40,8 +40,11 @@
     region_name="us-east-1",
 )
 
-os.environ["AWS_ACCESS_KEY_ID"] = Config.SPARC_PORTAL_AWS_KEY
-os.environ["AWS_SECRET_ACCESS_KEY"] = Config.SPARC_PORTAL_AWS_SECRET
+try:
+    os.environ["AWS_ACCESS_KEY_ID"] = Config.SPARC_PORTAL_AWS_KEY
+    os.environ["AWS_SECRET_ACCESS_KEY"] = Config.SPARC_PORTAL_AWS_SECRET
+except:
+    pass
 
 biolucida_lock = Lock()
 

From 1727449f2d4e7d586795fb587708ec08c44c7a02 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 6 May 2021 15:31:26 +1200
Subject: [PATCH 02/48] Require the `osparc` Python package.

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 424d281..5d71d10 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,13 @@ Make sure you have python 3 installed `python3 --version`
 python3 -m venv ./venv
 . ./venv/bin/activate
 pip install -r requirements.txt
+pip install git+https://github.com/ITISFoundation/osparc-simcore-python-client.git
 pip install -r requirements-dev.txt
 gunicorn main:app
 ```
 
+**Note:** the latest version of the `osparc` package on [PyPI](https://pypi.org/project/osparc/) is version 0.3.10 while we need at least version 0.4.3, hence we currently need to install it off `osparc`'s [GitHub repository](https://github.com/ITISFoundation/osparc-simcore-python-client).
+
 # Testing
 
 If you do not have the NIH SPARC portal user environment variables setup already:

From 5760f3c2a2c62ceb3577e0a4f4c4c6e3042da76a Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 6 May 2021 17:15:24 +1200
Subject: [PATCH 03/48] Retrieve oSPARC-related configuration.

---
 app/config.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/app/config.py b/app/config.py
index 83616bb..cbbcc3a 100644
--- a/app/config.py
+++ b/app/config.py
@@ -41,4 +41,7 @@ class Config(object):
     CCB_HEAD_WRIKE_ID = os.environ.get("CCB_HEAD_WRIKE_ID")
     MODERATOR_WRIKE_ID = os.environ.get("MODERATOR_WRIKE_ID")
     MAILCHIMP_API_KEY = os.environ.get("MAILCHIMP_API_KEY")
+    OSPARC_API_URL=os.environ.get("OSPARC_API_URL", "https://api.staging.osparc.io")
+    OSPARC_API_KEY=os.environ.get("OSPARC_API_KEY")
+    OSPARC_API_SECRET=os.environ.get("OSPARC_API_SECRET")
 

From 84e380e733fa3bfe6adbaf6bdff02b4f262c60f9 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 6 May 2021 17:31:50 +1200
Subject: [PATCH 04/48] Updated the required version of various Python
 packages.

Indeed, there were various incompatibilities, especially after installing the `osparc` package.
---
 requirements.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5f6befa..61c2b6d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
 api==0.0.7
 pennsieve==6.1.1
-boto3==1.10.28
-botocore==1.13.34
+boto3==1.17.67
+botocore==1.20.67
 certifi==2019.11.28
 chardet==3.0.4
-Click==7.0
+Click==7.1.2
 docutils==0.15.2
 Flask==1.1.1
 flask-marshmallow==0.10.1
@@ -22,10 +22,10 @@ pymongo==3.8.0
 python-dateutil==2.8.0
 python-dotenv==0.10.3
 query-string==2019.4.13
-requests==2.22.0
-s3transfer==0.2.1
+requests==2.25.1
+s3transfer==0.4.2
 six==1.13.0
 SQLAlchemy==1.3.20
-urllib3==1.25.7
+urllib3==1.26.4
 Werkzeug==0.16.0
 psycopg2-binary==2.8.6

From 42b693dbb2a2668a63323ab299e9deb4ebeb28a3 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 6 May 2021 18:10:50 +1200
Subject: [PATCH 05/48] Retrieve oSPARC-related configuration.

Although only the staging site is useable, we will be using the official site at some point.
---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index cbbcc3a..f449009 100644
--- a/app/config.py
+++ b/app/config.py
@@ -41,7 +41,7 @@ class Config(object):
     CCB_HEAD_WRIKE_ID = os.environ.get("CCB_HEAD_WRIKE_ID")
     MODERATOR_WRIKE_ID = os.environ.get("MODERATOR_WRIKE_ID")
     MAILCHIMP_API_KEY = os.environ.get("MAILCHIMP_API_KEY")
-    OSPARC_API_URL=os.environ.get("OSPARC_API_URL", "https://api.staging.osparc.io")
+    OSPARC_API_URL=os.environ.get("OSPARC_API_URL", "https://api.osparc.io")
     OSPARC_API_KEY=os.environ.get("OSPARC_API_KEY")
     OSPARC_API_SECRET=os.environ.get("OSPARC_API_SECRET")
 

From 1ac51c520c3c0dad2b0a643a75962b0cadccc1c3 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 7 May 2021 12:28:27 +1200
Subject: [PATCH 06/48] Added an endpoint to run a simulation on oSPARC.

This requires some configuration information (as JSON), which is currently hard-coded.
---
 app/main.py   |  17 +++++++++
 app/osparc.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)
 create mode 100644 app/osparc.py

diff --git a/app/main.py b/app/main.py
index ddf9936..c524459 100644
--- a/app/main.py
+++ b/app/main.py
@@ -21,6 +21,8 @@
 from requests.auth import HTTPBasicAuth
 import os
 
+import app.osparc as osparc
+
 # from pymongo import MongoClient
 
 app = Flask(__name__)
@@ -501,3 +503,18 @@ def subscribe_to_mailchimp():
             return resp.json()
     else:
         abort(400, description="Missing email_address, first_name or last_name")
+
+
+@app.route("/simulation")
+def simulation():
+    res = osparc.run_simulation({
+        "simulation": {
+            "Ending point": 3,
+            "Point interval": 0.001
+        },
+        "output": [
+            "Membrane/V"
+        ]
+    })
+
+    return json.dumps(res)
diff --git a/app/osparc.py b/app/osparc.py
new file mode 100644
index 0000000..4e86e35
--- /dev/null
+++ b/app/osparc.py
@@ -0,0 +1,104 @@
+from app.config import Config
+import json
+import os
+import osparc
+import tempfile
+from time import sleep
+
+
+def run_simulation(config):
+    temp_config_file = tempfile.NamedTemporaryFile(mode="w+")
+
+    json.dump(config, temp_config_file)
+
+    temp_config_file.seek(0)
+
+    try:
+        api_client = osparc.ApiClient(osparc.Configuration(
+            host=Config.OSPARC_API_URL,
+            username=Config.OSPARC_API_KEY,
+            password=Config.OSPARC_API_SECRET,
+        ))
+
+        # Upload the configuration file.
+
+        files_api = osparc.FilesApi(api_client)
+
+        try:
+            config_file = files_api.upload_file(temp_config_file.name)
+        except:
+            raise Exception(
+                "the simulation configuration file could not be uploaded")
+
+        # Create the simulation.
+
+        solvers_api = osparc.SolversApi(api_client)
+
+        solver = solvers_api.get_solver_release(
+            "simcore/services/comp/opencor", "1.0.3")
+
+        job = solvers_api.create_job(
+            solver.id,
+            solver.version,
+            osparc.JobInputs({
+                "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml",
+                "config_file": config_file
+            }),
+        )
+
+        # Start the simulation job.
+
+        status = solvers_api.start_job(solver.id, solver.version, job.id)
+
+        if status.state != "PUBLISHED":
+            raise Exception("the simulation job could not be submitted")
+
+        # Wait for the simulation job to be complete (or to fail).
+
+        while True:
+            status = solvers_api.inspect_job(solver.id, solver.version, job.id)
+
+            if status.progress == 100:
+                break
+
+            sleep(1)
+
+        status = solvers_api.inspect_job(solver.id, solver.version, job.id)
+
+        if status.state != "SUCCESS":
+            raise Exception("the simulation failed")
+
+        # Retrieve the simulation job outputs.
+
+        try:
+            outputs = solvers_api.get_job_outputs(
+                solver.id, solver.version, job.id)
+        except:
+            raise Exception(
+                "the simulation job outputs could not be retrieved")
+
+        # Download the simulation results.
+
+        try:
+            results_filename = files_api.download_file(
+                outputs.results["output_1"].id)
+        except:
+            raise Exception("the simulation results could not be retrieved")
+
+        results_file = open(results_filename, "r")
+
+        res = {
+            "status": "ok",
+            "results": json.load(results_file)
+        }
+
+        results_file.close()
+    except Exception as e:
+        res = {
+            "status": "nok",
+            "description": e.args[0],
+        }
+
+    temp_config_file.close()
+
+    return res

From ebe54e3c96b1d2496fea48fc22b826da69a5fa9d Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 7 May 2021 12:43:14 +1200
Subject: [PATCH 07/48] Pass a model URL to run_simulation().

---
 app/main.py   | 19 ++++++++++---------
 app/osparc.py |  6 +++---
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/app/main.py b/app/main.py
index c524459..b4151eb 100644
--- a/app/main.py
+++ b/app/main.py
@@ -507,14 +507,15 @@ def subscribe_to_mailchimp():
 
 @app.route("/simulation")
 def simulation():
-    res = osparc.run_simulation({
-        "simulation": {
-            "Ending point": 3,
-            "Point interval": 0.001
-        },
-        "output": [
-            "Membrane/V"
-        ]
-    })
+    res = osparc.run_simulation("https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml",
+                                {
+                                    "simulation": {
+                                        "Ending point": 3,
+                                        "Point interval": 0.001
+                                    },
+                                    "output": [
+                                        "Membrane/V"
+                                    ]
+                                })
 
     return json.dumps(res)
diff --git a/app/osparc.py b/app/osparc.py
index 4e86e35..62ab93f 100644
--- a/app/osparc.py
+++ b/app/osparc.py
@@ -6,10 +6,10 @@
 from time import sleep
 
 
-def run_simulation(config):
+def run_simulation(model_url, config_json):
     temp_config_file = tempfile.NamedTemporaryFile(mode="w+")
 
-    json.dump(config, temp_config_file)
+    json.dump(config_json, temp_config_file)
 
     temp_config_file.seek(0)
 
@@ -41,7 +41,7 @@ def run_simulation(config):
             solver.id,
             solver.version,
             osparc.JobInputs({
-                "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml",
+                "model_url": model_url,
                 "config_file": config_file
             }),
         )

From 9c8326e0e57eed5629b4b9cfca54b554768503d0 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 7 May 2021 16:14:37 +1200
Subject: [PATCH 08/48] Some minor cleaning up.

---
 app/osparc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/osparc.py b/app/osparc.py
index 62ab93f..9055da7 100644
--- a/app/osparc.py
+++ b/app/osparc.py
@@ -6,10 +6,10 @@
 from time import sleep
 
 
-def run_simulation(model_url, config_json):
+def run_simulation(model_url, json_config):
     temp_config_file = tempfile.NamedTemporaryFile(mode="w+")
 
-    json.dump(config_json, temp_config_file)
+    json.dump(json_config, temp_config_file)
 
     temp_config_file.seek(0)
 

From 79c97d9ade21ff94eee4a32a4241d1f8de596d4e Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 7 May 2021 16:16:01 +1200
Subject: [PATCH 09/48] Add support for arguments to the run_simulation
 endpoint.

---
 app/main.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/app/main.py b/app/main.py
index b4151eb..18444c5 100644
--- a/app/main.py
+++ b/app/main.py
@@ -507,15 +507,10 @@ def subscribe_to_mailchimp():
 
 @app.route("/simulation")
 def simulation():
-    res = osparc.run_simulation("https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml",
-                                {
-                                    "simulation": {
-                                        "Ending point": 3,
-                                        "Point interval": 0.001
-                                    },
-                                    "output": [
-                                        "Membrane/V"
-                                    ]
-                                })
-
-    return json.dumps(res)
+    model_url = request.args.get("model_url")
+    json_config = request.args.get("json_config")
+
+    if (model_url != None) and (json_config != None):
+        return json.dumps(osparc.run_simulation(model_url, json.loads(json_config)))
+    else:
+        abort(400, description="Missing model URL and/or JSON configuration")

From 1bc81cbffa194da36e6ed39b43364fb16aabec4f Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Mon, 10 May 2021 21:09:59 +1200
Subject: [PATCH 10/48] Can now pip install the version we need of the oSPARC
 API.

---
 README.md        | 3 ---
 requirements.txt | 1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 5d71d10..424d281 100644
--- a/README.md
+++ b/README.md
@@ -10,13 +10,10 @@ Make sure you have python 3 installed `python3 --version`
 python3 -m venv ./venv
 . ./venv/bin/activate
 pip install -r requirements.txt
-pip install git+https://github.com/ITISFoundation/osparc-simcore-python-client.git
 pip install -r requirements-dev.txt
 gunicorn main:app
 ```
 
-**Note:** the latest version of the `osparc` package on [PyPI](https://pypi.org/project/osparc/) is version 0.3.10 while we need at least version 0.4.3, hence we currently need to install it off `osparc`'s [GitHub repository](https://github.com/ITISFoundation/osparc-simcore-python-client).
-
 # Testing
 
 If you do not have the NIH SPARC portal user environment variables setup already:
diff --git a/requirements.txt b/requirements.txt
index 61c2b6d..b1010ab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,6 +17,7 @@ jmespath==0.9.4
 MarkupSafe==1.1.1
 marshmallow==3.2.2
 nose==1.3.7
+osparc==0.4.3
 public==2019.4.13
 pymongo==3.8.0
 python-dateutil==2.8.0

From ebbeb82dc8b512b0836a34553f48f167acc8bca8 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Tue, 11 May 2021 16:18:57 +1200
Subject: [PATCH 11/48] Some minor cleaning up.

---
 app/osparc.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/app/osparc.py b/app/osparc.py
index 9055da7..fd58e23 100644
--- a/app/osparc.py
+++ b/app/osparc.py
@@ -1,6 +1,5 @@
 from app.config import Config
 import json
-import os
 import osparc
 import tempfile
 from time import sleep
@@ -17,7 +16,7 @@ def run_simulation(model_url, json_config):
         api_client = osparc.ApiClient(osparc.Configuration(
             host=Config.OSPARC_API_URL,
             username=Config.OSPARC_API_KEY,
-            password=Config.OSPARC_API_SECRET,
+            password=Config.OSPARC_API_SECRET
         ))
 
         # Upload the configuration file.
@@ -43,7 +42,7 @@ def run_simulation(model_url, json_config):
             osparc.JobInputs({
                 "model_url": model_url,
                 "config_file": config_file
-            }),
+            })
         )
 
         # Start the simulation job.
@@ -96,7 +95,7 @@ def run_simulation(model_url, json_config):
     except Exception as e:
         res = {
             "status": "nok",
-            "description": e.args[0],
+            "description": e.args[0]
         }
 
     temp_config_file.close()

From 0591f99326695186c527cd2100b1736af094a2aa Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Tue, 11 May 2021 16:42:27 +1200
Subject: [PATCH 12/48] Use the POST method for our simulation endpoint.

---
 app/main.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/app/main.py b/app/main.py
index 18444c5..46313cb 100644
--- a/app/main.py
+++ b/app/main.py
@@ -505,12 +505,11 @@ def subscribe_to_mailchimp():
         abort(400, description="Missing email_address, first_name or last_name")
 
 
-@app.route("/simulation")
+@app.route("/simulation", methods=["POST"])
 def simulation():
-    model_url = request.args.get("model_url")
-    json_config = request.args.get("json_config")
+    data = request.get_json()
 
-    if (model_url != None) and (json_config != None):
-        return json.dumps(osparc.run_simulation(model_url, json.loads(json_config)))
+    if data and "model_url" in data and "json_config" in data:
+        return json.dumps(osparc.run_simulation(data["model_url"], data["json_config"]))
     else:
         abort(400, description="Missing model URL and/or JSON configuration")

From 8c149f86a464e03884839c95cb2e4fbce10ceba1 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 12 May 2021 10:54:43 +1200
Subject: [PATCH 13/48] Slight improvements to README.md.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 424d281..b306f00 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,6 @@ Make sure you have python 3 installed `python3 --version`
 python3 -m venv ./venv
 . ./venv/bin/activate
 pip install -r requirements.txt
-pip install -r requirements-dev.txt
 gunicorn main:app
 ```
 
@@ -24,6 +23,7 @@ If you do not have the NIH SPARC portal user environment variables setup already
 After the previous steps or if you already have those environment variables setup, run:
 
 ```
+export PYTHONPATH=`pwd`
 pip install -r requirements-dev.txt
 pytest
 ```

From 943461b13f90fa450557597d7a398bfe851ea92d Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 12 May 2021 16:18:12 +1200
Subject: [PATCH 14/48] oSPARC: check whether the exception has at least one
 argument.

Indeed, during our testing (`tests/test_osparc.py`), there have been cases where an exception was raised, but not by us, and with no argument...?!
---
 app/osparc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/osparc.py b/app/osparc.py
index fd58e23..9658ae1 100644
--- a/app/osparc.py
+++ b/app/osparc.py
@@ -95,7 +95,7 @@ def run_simulation(model_url, json_config):
     except Exception as e:
         res = {
             "status": "nok",
-            "description": e.args[0]
+            "description": e.args[0] if len(e.args) > 0 else "unknown"
         }
 
     temp_config_file.close()

From fa5fbcadb03845d90eb1845488c5727aaad1042b Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 12 May 2021 16:24:34 +1200
Subject: [PATCH 15/48] Some tests for the `/simulation` endpoint.

---
 tests/test_osparc.py | 84 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 tests/test_osparc.py

diff --git a/tests/test_osparc.py b/tests/test_osparc.py
new file mode 100644
index 0000000..99138b7
--- /dev/null
+++ b/tests/test_osparc.py
@@ -0,0 +1,84 @@
+import json
+import pytest
+from app import app
+
+
+@pytest.fixture
+def client():
+    app.config['TESTING'] = True
+    return app.test_client()
+
+
+def test_osparc_no_post(client):
+    r = client.get('/simulation')
+    assert r.status_code == 405
+
+
+def test_osparc_empty_post(client):
+    r = client.post("/simulation", json={})
+    assert r.status_code == 400
+
+
+def test_osparc_no_json_config(client):
+    data = {
+        "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml"
+    }
+    r = client.post("/simulation", json=data)
+    assert r.status_code == 400
+
+
+def test_osparc_no_model_url(client):
+    data = {
+        "json_config": {
+            "simulation": {
+                "Ending point": 0.003,
+                "Point interval": 0.001,
+            },
+            "output": ["Membrane/V"]
+        }
+    }
+    r = client.post("/simulation", json=data)
+    assert r.status_code == 400
+
+
+def test_osparc_valid_data(client):
+    data = {
+        "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml",
+        "json_config": {
+            "simulation": {
+                "Ending point": 0.003,
+                "Point interval": 0.001,
+            },
+            "output": ["Membrane/V"]
+        }
+    }
+    res = {
+        "status": "ok",
+        "results": {
+            "environment/time": [0.0, 0.001, 0.002, 0.003],
+            "Membrane/V": [-47.787168, -47.74547155339473, -47.72515226841376, -47.71370033208329]
+        }
+    }
+    r = client.post("/simulation", json=data)
+    assert r.status_code == 200
+    assert json.dumps(json.loads(r.data), sort_keys=True) == json.dumps(res, sort_keys=True)
+
+
+def test_osparc_failing_simulation(client):
+    data = {
+        "model_url": "https://models.physiomeproject.org/e/611/HumanSAN_Fabbri_Fantini_Wilders_Severi_2017.cellml",
+        "json_config": {
+            "simulation": {
+                "Ending point": 3.0,
+                "Point interval": 1.0,
+            },
+            "output": ["Membrane/V"]
+        }
+    }
+    res = {
+        "status": "nok",
+        "description": "the simulation failed"
+    }
+    r = client.post("/simulation", json=data)
+    assert r.status_code == 200
+    assert json.dumps(json.loads(r.data), sort_keys=True) == json.dumps(res, sort_keys=True)

From 74badcffac911279a9e49d9b85813675877deea1 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Thu, 20 May 2021 16:09:10 +1200
Subject: [PATCH 16/48] Working implementation of scaffold search

---
 app/config.py             | 2 +-
 app/process_kb_results.py | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index 83616bb..17e59e1 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
     SIM_CORE_TECH_LEAD_WRIKE_ID = os.environ.get("SIM_CORE_TECH_LEAD_WRIKE_ID")
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 8632030..29a7026 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -137,6 +137,7 @@ def process_kb_results(results):
         attr = get_attributes(attributes, hit)
         attr['doi'] = convert_doi_to_url(attr['doi'])
         attr['csvFiles'] = find_csv_files(attr['csvFiles'])
+        attr['scaffolds'] = find_scaffold_json_files(hit['_source']['objects'])
         output.append(attr)
     return json.dumps({'numberOfHits': results['hits']['total'], 'results': output})
 
@@ -153,6 +154,12 @@ def find_csv_files(obj_list):
     return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'text/csv']
 
 
+def find_scaffold_json_files(obj_list):
+    if not obj_list:
+        return obj_list
+    return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
+
+
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict
 #  and cherrypick the attributes of interest
 def get_attributes(attributes, dataset):

From 5e3509607cd9421d1969a9156944f67d3f073e6c Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 1 Jun 2021 23:52:42 +1200
Subject: [PATCH 17/48] Add endpoint to retrieve doi from scicrunch

---
 app/config.py             |  2 +-
 app/main.py               | 14 ++++++++++++++
 app/process_kb_results.py | 22 ++++++++++++++++++++++
 main.py                   |  2 +-
 tests/test_scicrunch.py   |  5 +++++
 5 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/app/config.py b/app/config.py
index 4b2fa5c..0684428 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
diff --git a/app/main.py b/app/main.py
index 178ffd6..f3f1f17 100644
--- a/app/main.py
+++ b/app/main.py
@@ -210,6 +210,20 @@ def direct_download_url(path):
     resource = response["Body"].read()
     return resource
 
+# /scicrunch/: Returns scicrunch results for a given <search> query
+@app.route("/scicrunch-dataset/<doi1>/<doi2>")
+def sci_doi(doi1,doi2):
+    doi = doi1 + '/' + doi2
+    print(doi)
+    data = create_doi_request(doi)
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return response.json()
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
 
 # /search/: Returns scicrunch results for a given <search> query
 @app.route("/search/", defaults={'query': ''})
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 8632030..76e9612 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -17,6 +17,23 @@
     'csvFiles': ['objects']
 }
 
+def create_doi_request(doi):
+
+    query = {
+        "query": {
+            "bool": {
+                "must": [{"match_all": {}}],
+                "should": [],
+                "filter": {
+                    "term": {
+                        "_id": doi
+                    }
+                }
+            }
+        }
+    }
+
+    return query
 
 # create_facet_query(type): Generates facet search request data for scicrunch  given a 'type'; where
 # 'type' is either 'species', 'gender', or 'genotype' at this stage.
@@ -146,6 +163,11 @@ def convert_doi_to_url(doi):
         return doi
     return doi.replace('DOI:', 'https://doi.org/')
 
+def convert_url_to_doi(doi):
+    if not doi:
+        return doi
+    return doi.replace('https://doi.org/', 'DOI:')
+
 
 def find_csv_files(obj_list):
     if not obj_list:
diff --git a/main.py b/main.py
index aa5c9d1..475dcbd 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,4 @@
 from app.main import app
 
 if __name__ == "__main__":
-    app.run(host="0.0.0.0")
+    app.run(host="0.0.0.0", port=8000)
diff --git a/tests/test_scicrunch.py b/tests/test_scicrunch.py
index bdc650e..bd68c25 100644
--- a/tests/test_scicrunch.py
+++ b/tests/test_scicrunch.py
@@ -13,6 +13,11 @@ def test_scicrunch_keys(client):
     assert r.status_code == 200
     assert 'numberOfHits' in json.loads(r.data).keys()
 
+def test_scicrunch_dataset_doi(client):
+    r = client.get('/scicrunch-dataset/DOI%3A10.26275%2Fpzek-91wx')
+    assert json.loads(r.data)['hits']['hits'][0]['_id'] == "DOI:10.26275/pzek-91wx"
+
+
 def test_scicrunch_search(client):
     r = client.get('/search/heart')
     assert r.status_code == 200

From 9b37fc6f9e2a3a62871be6dad7fe06c069f0750b Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Wed, 2 Jun 2021 00:06:31 +1200
Subject: [PATCH 18/48] switch to more up to date index

---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index 0684428..bb89b04 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")

From 1e39cff59001617b3e5a0770ae595641ff8e5ccc Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 2 Jun 2021 13:30:43 +1200
Subject: [PATCH 19/48] Use the latest SciCrunch endpoint we were given.

---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index 9701910..adf2a50 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")

From dc2ab72a714aa29b9bcc6aa445254039e1ba5d35 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 2 Jun 2021 14:13:29 +1200
Subject: [PATCH 20/48] process_kb_results: prevent some warnings.

---
 app/process_kb_results.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 8632030..fd94883 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -62,7 +62,7 @@ def create_filter_request(query, terms, facets, size, start):
     if start is None:
         start = 0
 
-    if query is "" and len(terms) is 0 and len(facets) is 0:
+    if query == "" and len(terms) is 0 and len(facets) is 0:
         return {"size": size, "from": start}
 
     # Type map is used to map scicrunch paths to given facet
@@ -108,10 +108,10 @@ def facet_query_string(query, terms, facets, type_map):
 
     # Add search query if it exists
     qt = ""
-    if query is not "":
+    if query != "":
         qt = f'({query})'
 
-    if query is not "" and len(t) > 0:
+    if query != "" and len(t) > 0:
         qt += " AND "
 
     # Add the brackets and OR and AND parameters

From c206f58ff2085ef83c151da4e5fd7ed8c9539756 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 2 Jun 2021 14:20:40 +1200
Subject: [PATCH 21/48] Revert "process_kb_results: prevent some warnings."

This reverts commit dc2ab72a714aa29b9bcc6aa445254039e1ba5d35.
---
 app/process_kb_results.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index fd94883..8632030 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -62,7 +62,7 @@ def create_filter_request(query, terms, facets, size, start):
     if start is None:
         start = 0
 
-    if query == "" and len(terms) is 0 and len(facets) is 0:
+    if query is "" and len(terms) is 0 and len(facets) is 0:
         return {"size": size, "from": start}
 
     # Type map is used to map scicrunch paths to given facet
@@ -108,10 +108,10 @@ def facet_query_string(query, terms, facets, type_map):
 
     # Add search query if it exists
     qt = ""
-    if query != "":
+    if query is not "":
         qt = f'({query})'
 
-    if query != "" and len(t) > 0:
+    if query is not "" and len(t) > 0:
         qt += " AND "
 
     # Add the brackets and OR and AND parameters

From a15095a6a8b34ccb9c6a5639e68177d9f2812566 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 2 Jun 2021 14:22:10 +1200
Subject: [PATCH 22/48] process_kb_results: prevent some warnings.

---
 app/process_kb_results.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 8632030..168e51d 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -62,7 +62,7 @@ def create_filter_request(query, terms, facets, size, start):
     if start is None:
         start = 0
 
-    if query is "" and len(terms) is 0 and len(facets) is 0:
+    if not query and not terms and not facets:
         return {"size": size, "from": start}
 
     # Type map is used to map scicrunch paths to given facet
@@ -108,10 +108,10 @@ def facet_query_string(query, terms, facets, type_map):
 
     # Add search query if it exists
     qt = ""
-    if query is not "":
+    if query:
         qt = f'({query})'
 
-    if query is not "" and len(t) > 0:
+    if query and t:
         qt += " AND "
 
     # Add the brackets and OR and AND parameters

From 646975950038ed8363392883d28e8c147536ba4b Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 2 Jun 2021 14:46:14 +1200
Subject: [PATCH 23/48] Use the latest development version of the SciCrunch
 endpoint.

---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index adf2a50..656e01c 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")

From 20de86e5d7a2b4ef85a26330535c452cefe98d16 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 2 Jun 2021 15:25:41 +1200
Subject: [PATCH 24/48] process_kb_results: retrieve additional links.

---
 app/process_kb_results.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 168e51d..030d727 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -5,6 +5,7 @@
 #  samples: ['attributes','sample','subject'] will find and enter dict keys in the following order:
 #  attributes > sample > subject
 attributes = {
+    'additionalLinks': ['xrefs', 'additionalLinks'],
     'scaffolds': ['scaffolds'],
     'samples': ['attributes','sample','subject'],
     'name': ['item','name'],

From 35df8f3a20ab1ad9153eea7df02450dd24bb53a5 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 3 Jun 2021 11:13:55 +1200
Subject: [PATCH 25/48] Use SciCrunch rather than Scicrunch in error messages.

---
 app/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/main.py b/app/main.py
index 2f43b02..f231b5b 100644
--- a/app/main.py
+++ b/app/main.py
@@ -245,9 +245,9 @@ def filter_search(query):
         results = process_kb_results(response.json())
     except requests.exceptions.HTTPError as err:
         logging.error(err)
-        return jsonify({'error': str(err), 'message': 'Scicrunch is not currently reachable, please try again later'}), 502
+        return jsonify({'error': str(err), 'message': 'SciCrunch is not currently reachable, please try again later'}), 502
     except json.JSONDecodeError as e:
-        return jsonify({'message': 'Could not parse Scicrunch output, please try again later',
+        return jsonify({'message': 'Could not parse SciCrunch output, please try again later',
                         'error': 'JSONDecodeError'}), 502
     return results
 
@@ -270,7 +270,7 @@ def get_facets(type):
             json_result = response.json()
             results.append(json_result)
         except BaseException as e:
-            return jsonify({'message': 'Could not parse Scicrunch output, please try again later',
+            return jsonify({'message': 'Could not parse SciCrunch output, please try again later',
                             'error': 'JSONDecodeError'}), 502
 
     # Select terms from the results

From 80372298066fb5f150d356db3edb99c874a681ac Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 3 Jun 2021 18:42:06 +1200
Subject: [PATCH 26/48] Can now filter results for scaffold and/or simulation
 datasets.

---
 app/process_kb_results.py | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 030d727..45f48dc 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -117,16 +117,31 @@ def facet_query_string(query, terms, facets, type_map):
 
     # Add the brackets and OR and AND parameters
     for k in t:
-        qt += type_map[k][0] + ":("  # facet term path and opening bracket
-        for l in t[k]:
-            qt += f"({l})"  # bracket around terms incase there are spaces
-            if l is not t[k][-1]:
-                qt += " OR "  # 'OR' if more terms in this facet are coming
-            else:
-                qt += ") "
+        if k == "datasets":
+            needParentheses = (qt or len(t) > 1) and (len(t[k]) > 1)
+            if needParentheses:
+                qt += "("
+            for l in t[k]:
+                if l == "scaffolds":
+                    qt += "item.name:((scaffold))"
+                elif l == "simulations":
+                    qt += "xrefs.additionalLinks.description:((CellML))"
+                if l is not t[k][-1]:
+                    qt += " OR "  # 'OR' if more terms in this facet are coming
+            if needParentheses:
+                qt += ")"
+        else:
+            qt += type_map[k][0] + ":("  # facet term path and opening bracket
+            for l in t[k]:
+                qt += f"({l})"  # bracket around terms incase there are spaces
+                if l is not t[k][-1]:
+                    qt += " OR "  # 'OR' if more terms in this facet are coming
+                else:
+                    qt += ")"
 
         if k is not list(t.keys())[-1]:  # Add 'AND' if we are not at the last item
-                qt += " AND "
+            qt += " AND "
+    print(f'[qt] {qt}')
     return qt
 
 

From 2a14415613b6b0d5f1a444b8229e35152630a3e8 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 3 Jun 2021 18:42:44 +1200
Subject: [PATCH 27/48] Some minor cleaning up.

---
 app/process_kb_results.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 45f48dc..27a2b8b 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -141,7 +141,6 @@ def facet_query_string(query, terms, facets, type_map):
 
         if k is not list(t.keys())[-1]:  # Add 'AND' if we are not at the last item
             qt += " AND "
-    print(f'[qt] {qt}')
     return qt
 
 

From 070169c753c2da7e35ca1068dc454202ab10c8c6 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Thu, 3 Jun 2021 18:50:15 +1200
Subject: [PATCH 28/48] Filtering: can now filter for "All xxx".

---
 app/process_kb_results.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 27a2b8b..b622720 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -74,20 +74,23 @@ def create_filter_request(query, terms, facets, size, start):
     }
 
     # Data structure of a scicrunch search
-    data = {
-      "size": size,
-      "from": start,
-      "query": {
-          "query_string": {
-              "query": ""
-          }
-      }
-    }
-
     qs = facet_query_string(query, terms, facets, type_map)
-    data["query"]["query_string"]["query"] = qs
 
-    return data
+    if qs:
+        return {
+            "size": size,
+            "from": start,
+            "query": {
+                "query_string": {
+                    "query": qs
+                }
+            }
+        }
+
+    return {
+        "size": size,
+        "from": start,
+    }
 
 
 def facet_query_string(query, terms, facets, type_map):

From 56d4d14b8d1da4d078f390b8c373641fbbe322ba Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 4 Jun 2021 10:37:52 +1200
Subject: [PATCH 29/48] Simulation filtering: look for either "CellML" or
 "SED-ML".

---
 app/process_kb_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index b622720..b8b93ed 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -128,7 +128,7 @@ def facet_query_string(query, terms, facets, type_map):
                 if l == "scaffolds":
                     qt += "item.name:((scaffold))"
                 elif l == "simulations":
-                    qt += "xrefs.additionalLinks.description:((CellML))"
+                    qt += "xrefs.additionalLinks.description:((CellML) OR (SedML))"
                 if l is not t[k][-1]:
                     qt += " OR "  # 'OR' if more terms in this facet are coming
             if needParentheses:

From 60c7ce52dc87259da800c99c43b0509e755bcb37 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 4 Jun 2021 10:43:17 +1200
Subject: [PATCH 30/48] Simulation filtering: look for either "CellML" or
 "SED-ML".

Not sure why I originally wrote this "SedML"!
---
 app/process_kb_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index b8b93ed..155e885 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -128,7 +128,7 @@ def facet_query_string(query, terms, facets, type_map):
                 if l == "scaffolds":
                     qt += "item.name:((scaffold))"
                 elif l == "simulations":
-                    qt += "xrefs.additionalLinks.description:((CellML) OR (SedML))"
+                    qt += "xrefs.additionalLinks.description:((CellML) OR (SED-ML))"
                 if l is not t[k][-1]:
                     qt += " OR "  # 'OR' if more terms in this facet are coming
             if needParentheses:

From 3ddc1c6667e749eaf177911e9a5b52ef9594d566 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 4 Jun 2021 11:16:02 +1200
Subject: [PATCH 31/48] Added a couple of tests for the scaffolds and
 simulations filtering.

---
 tests/test_scicrunch.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_scicrunch.py b/tests/test_scicrunch.py
index bdc650e..5085ce7 100644
--- a/tests/test_scicrunch.py
+++ b/tests/test_scicrunch.py
@@ -26,6 +26,14 @@ def test_scicrunch_filter(client):
     r = client.get('/filter-search/', query_string={'term': 'genotype', 'facet': 'heart'})
     assert json.loads(r.data)['numberOfHits'] > 4
 
+def test_scicrunch_filter_scaffolds(client):
+    r = client.get('/filter-search/Heart/?facet=scaffolds&term=datasets')
+    assert json.loads(r.data)['numberOfHits'] > 10
+
+def test_scicrunch_filter_simulations(client):
+    r = client.get('/filter-search/Heart/?facet=simulations&term=datasets')
+    assert json.loads(r.data)['numberOfHits'] > 0
+
 def test_scicrunch_basic_search(client):
     r = client.get('/filter-search/Heart/?facet=All+Species&term=species')
     assert json.loads(r.data)['numberOfHits'] > 10

From 0c6df77f6610921a5046fd562523e273c6aa527e Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 4 Jun 2021 11:25:35 +1200
Subject: [PATCH 32/48] Added a couple of tests for the scaffolds and
 simulations filtering.

We were searching for "heart" hence our filtering for scaffolds was only returning 3 hits while it should return 14 hits if we want all the scaffolds.
---
 tests/test_scicrunch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_scicrunch.py b/tests/test_scicrunch.py
index 5085ce7..392e9cc 100644
--- a/tests/test_scicrunch.py
+++ b/tests/test_scicrunch.py
@@ -27,11 +27,11 @@ def test_scicrunch_filter(client):
     assert json.loads(r.data)['numberOfHits'] > 4
 
 def test_scicrunch_filter_scaffolds(client):
-    r = client.get('/filter-search/Heart/?facet=scaffolds&term=datasets')
+    r = client.get('/filter-search/?facet=scaffolds&term=datasets')
     assert json.loads(r.data)['numberOfHits'] > 10
 
 def test_scicrunch_filter_simulations(client):
-    r = client.get('/filter-search/Heart/?facet=simulations&term=datasets')
+    r = client.get('/filter-search/?facet=simulations&term=datasets')
     assert json.loads(r.data)['numberOfHits'] > 0
 
 def test_scicrunch_basic_search(client):

From 7698bc7363f89d36d1f7aff04656991ecec57c4d Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 4 Jun 2021 11:44:50 +1200
Subject: [PATCH 33/48] Revert the default value of SCI_CRUNCH_HOST.

If anything, one should create an environment variable for SCI_CRUNCH_HOST on their system.
---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index 656e01c..9701910 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")

From ec7d7f0e4276665dc0fff5aa9b0eb10976ced724 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Fri, 4 Jun 2021 14:10:21 +1200
Subject: [PATCH 34/48] Filtering: use the scaffold MIME type rather an item's
 name.

---
 app/process_kb_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 155e885..7b2f37a 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -126,7 +126,7 @@ def facet_query_string(query, terms, facets, type_map):
                 qt += "("
             for l in t[k]:
                 if l == "scaffolds":
-                    qt += "item.name:((scaffold))"
+                    qt += "scaffolds.object.mimetype.name:((inode%2fvnd.abi.scaffold+directory))"
                 elif l == "simulations":
                     qt += "xrefs.additionalLinks.description:((CellML) OR (SED-ML))"
                 if l is not t[k][-1]:

From 6efca8eb71112ea89289d3138af1c835c2f428ab Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 15 Jun 2021 15:53:38 +1200
Subject: [PATCH 35/48] Switch back to stable endpoint

---
 app/config.py | 2 +-
 main.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/config.py b/app/config.py
index bb89b04..0684428 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
diff --git a/main.py b/main.py
index 475dcbd..aa5c9d1 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,4 @@
 from app.main import app
 
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=8000)
+    app.run(host="0.0.0.0")

From d8547f770c607df7aecc869c88ababd3b69909b1 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Wed, 16 Jun 2021 14:39:55 +1200
Subject: [PATCH 36/48] Added "pmr_latest_exposure" to retrieve the latest
 exposure, if any, of a PMR workspace.

---
 app/main.py       | 27 +++++++++++++++++++++++++++
 tests/test_pmr.py | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 tests/test_pmr.py

diff --git a/app/main.py b/app/main.py
index f231b5b..bd9f72b 100644
--- a/app/main.py
+++ b/app/main.py
@@ -25,6 +25,7 @@
 import os
 
 import app.osparc as osparc
+import requests
 
 # from pymongo import MongoClient
 
@@ -577,3 +578,29 @@ def simulation():
         return json.dumps(osparc.run_simulation(data["model_url"], data["json_config"]))
     else:
         abort(400, description="Missing model URL and/or JSON configuration")
+
+
+@app.route("/pmr_latest_exposure", methods=["POST"])
+def pmr_latest_exposure():
+    data = request.get_json()
+
+    if data and "workspace_url" in data:
+        try:
+            resp = requests.get(data["workspace_url"],
+                                headers={"Accept": "application/vnd.physiome.pmr2.json.1"})
+            if resp.status_code == 200:
+                try:
+                    # Return the latest exposure for the given workspace.
+                    url = resp.json()["collection"]["items"][0]["links"][0]["href"]
+                except:
+                    # There is no latest exposure for the given workspace.
+                    url = ""
+                return jsonify(
+                    url=url
+                )
+            else:
+                return resp.json()
+        except:
+            abort(400, description="Invalid workspace URL")
+    else:
+        abort(400, description="Missing workspace URL")
diff --git a/tests/test_pmr.py b/tests/test_pmr.py
new file mode 100644
index 0000000..081bac8
--- /dev/null
+++ b/tests/test_pmr.py
@@ -0,0 +1,39 @@
+import json
+import pytest
+from app import app
+
+
+@pytest.fixture
+def client():
+    app.config['TESTING'] = True
+    return app.test_client()
+
+
+def test_pmr_latest_exposure_no_post(client):
+    r = client.get('/pmr_latest_exposure')
+    assert r.status_code == 405
+
+
+def test_pmr_latest_exposure_empty_post(client):
+    r = client.post("/pmr_latest_exposure", json={})
+    assert r.status_code == 400
+
+
+def test_pmr_latest_exposure_workspace_with_latest_exposure(client):
+    r = client.post("/pmr_latest_exposure", json={"workspace_url": "https://models.physiomeproject.org/workspace/486"})
+    assert r.status_code == 200
+    data = r.get_json()
+    assert data["url"] == "https://models.physiomeproject.org/e/611"
+
+
+def test_pmr_latest_exposure_workspace_without_latest_exposure(client):
+    r = client.post("/pmr_latest_exposure", json={"workspace_url": "https://models.physiomeproject.org/workspace/698"})
+    assert r.status_code == 200
+    data = r.get_json()
+    assert data["url"] == ""
+
+
+def test_pmr_latest_exposure_workspace_with_invalid_workspace_url(client):
+    r = client.post("/pmr_latest_exposure", json={"workspace_url": "https://some.url.com/"})
+    print(r.get_json())
+    assert r.status_code == 400

From 0a5872588fe995922bc507bd38fce8b52b52f08b Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Fri, 18 Jun 2021 16:21:07 +1200
Subject: [PATCH 37/48] Add endpoint for neuron queries

---
 app/main.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/app/main.py b/app/main.py
index f3f1f17..31a3abe 100644
--- a/app/main.py
+++ b/app/main.py
@@ -225,6 +225,34 @@ def sci_doi(doi1,doi2):
         logging.error(err)
         return json.dumps({'error': err})
 
+
+# /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
+@app.route("/scicrunch-organ-query/<curie>")
+def sci_organ(curie):
+    data = {
+        "size": 20,
+        "from": 0,
+        "query": {
+            "query_string": {
+                "fields": [
+                    "*organ.curie"
+                ],
+                "query": curie
+            }
+        }
+    }
+
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return process_kb_results(response.json())
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
+
+
+
 # /search/: Returns scicrunch results for a given <search> query
 @app.route("/search/", defaults={'query': ''})
 @app.route("/search/<query>")

From 61c87718d9d45c34f5a52b3570d63de319c9cd74 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 17:32:38 +1200
Subject: [PATCH 38/48] Switch to dev endpoint

---
 app/config.py             |  2 +-
 app/main.py               | 17 +++++++++++++++++
 app/process_kb_results.py |  4 ++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/app/config.py b/app/config.py
index 0684428..bb89b04 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")
diff --git a/app/main.py b/app/main.py
index 31a3abe..f6843b4 100644
--- a/app/main.py
+++ b/app/main.py
@@ -242,6 +242,23 @@ def sci_organ(curie):
         }
     }
 
+    # # /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
+    # @app.route("/scicrunch-query-string/<field>/<curie>")
+    # def sci_organ(field, curie):
+    #     # field example: "*organ.curie"
+    #     data = {
+    #         "size": 20,
+    #         "from": 0,
+    #         "query": {
+    #             "query_string": {
+    #                 "fields": [
+    #                     field
+    #                 ],
+    #                 "query": curie
+    #             }
+    #         }
+    #     }
+
     try:
         response = requests.post(
             f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 44241a8..4f7bb7c 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -173,13 +173,13 @@ def convert_url_to_doi(doi):
 def find_csv_files(obj_list):
     if not obj_list:
         return obj_list
-    return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'text/csv']
+    return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'text/csv']
 
 
 def find_scaffold_json_files(obj_list):
     if not obj_list:
         return obj_list
-    return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
+    return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
 
 
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict

From 17836d48c75e7dbad779c0a20b37a77aa3925c53 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 17:39:26 +1200
Subject: [PATCH 39/48] Adjust query endpoint to be more flexible

---
 app/main.py | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/app/main.py b/app/main.py
index f6843b4..b3381a5 100644
--- a/app/main.py
+++ b/app/main.py
@@ -226,39 +226,23 @@ def sci_doi(doi1,doi2):
         return json.dumps({'error': err})
 
 
-# /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
-@app.route("/scicrunch-organ-query/<curie>")
-def sci_organ(curie):
+# /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar
+@app.route("/scicrunch-query-string/<field>/<curie>")
+def sci_organ(field, curie):
+    # field example: "*organ.curie"
     data = {
         "size": 20,
         "from": 0,
         "query": {
             "query_string": {
                 "fields": [
-                    "*organ.curie"
+                    field
                 ],
                 "query": curie
             }
         }
     }
 
-    # # /scicrunch-organ-query/: Returns results for given organ curie. These can be processed by the sidebar
-    # @app.route("/scicrunch-query-string/<field>/<curie>")
-    # def sci_organ(field, curie):
-    #     # field example: "*organ.curie"
-    #     data = {
-    #         "size": 20,
-    #         "from": 0,
-    #         "query": {
-    #             "query_string": {
-    #                 "fields": [
-    #                     field
-    #                 ],
-    #                 "query": curie
-    #             }
-    #         }
-    #     }
-
     try:
         response = requests.post(
             f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',

From 10ce60f49fd4db85e74265f7476027c4ce5e2cb6 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 18:58:05 +1200
Subject: [PATCH 40/48] Bug fix and query change

 - Bug fix for some datasets not returning _any_ objects
 - Switch scicrunch-query-string from using the route to using URL
   params (this is now implemented in tehsurfer/map-side-bar)
---
 app/main.py               | 10 +++++-----
 app/process_kb_results.py |  5 +++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/app/main.py b/app/main.py
index b3381a5..b73ff64 100644
--- a/app/main.py
+++ b/app/main.py
@@ -227,17 +227,17 @@ def sci_doi(doi1,doi2):
 
 
 # /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar
-@app.route("/scicrunch-query-string/<field>/<curie>")
-def sci_organ(field, curie):
+@app.route("/scicrunch-query-string/")
+def sci_organ():
+    fields = request.args.getlist('field')
+    curie = request.args.get('curie')
     # field example: "*organ.curie"
     data = {
         "size": 20,
         "from": 0,
         "query": {
             "query_string": {
-                "fields": [
-                    field
-                ],
+                "fields": fields,
                 "query": curie
             }
         }
diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 4f7bb7c..12bdb14 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -153,8 +153,9 @@ def process_kb_results(results):
     for i, hit in enumerate(hits):
         attr = get_attributes(attributes, hit)
         attr['doi'] = convert_doi_to_url(attr['doi'])
-        attr['csvFiles'] = find_csv_files(attr['csvFiles'])
-        attr['scaffolds'] = find_scaffold_json_files(hit['_source']['objects'])
+        objects = find_csv_files(attr['csvFiles']) # Have to do this as not all datsets return objects
+        attr['csvFiles'] = objects
+        attr['scaffolds'] = find_scaffold_json_files(objects)
         output.append(attr)
     return json.dumps({'numberOfHits': results['hits']['total'], 'results': output})
 

From 300a85af2f9eb0be87a447a14dc157ac7c604b08 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 22 Jun 2021 19:12:57 +1200
Subject: [PATCH 41/48] Fix mistake in order of data processing

---
 app/process_kb_results.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 12bdb14..7031d7f 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -153,8 +153,8 @@ def process_kb_results(results):
     for i, hit in enumerate(hits):
         attr = get_attributes(attributes, hit)
         attr['doi'] = convert_doi_to_url(attr['doi'])
-        objects = find_csv_files(attr['csvFiles']) # Have to do this as not all datsets return objects
-        attr['csvFiles'] = objects
+        objects = attr['csvFiles']  # Have to do this as not all datsets return objects
+        attr['csvFiles'] = find_csv_files(objects)
         attr['scaffolds'] = find_scaffold_json_files(objects)
         output.append(attr)
     return json.dumps({'numberOfHits': results['hits']['total'], 'results': output})

From 2bc2d6a08c6b7a8a9af7aec4f9f638e3fbb01c3a Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Wed, 23 Jun 2021 17:47:16 +1200
Subject: [PATCH 42/48] Switch to using additional mimetypes as intented

---
 app/process_kb_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 7031d7f..4a28f10 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -180,7 +180,7 @@ def find_csv_files(obj_list):
 def find_scaffold_json_files(obj_list):
     if not obj_list:
         return obj_list
-    return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'application/json' and "metadata.json" in obj.get('dataset', 'none')['path']]
+    return [obj for obj in obj_list if obj.get('additional_mimetype', {}).get('name', 'none') == 'inode/vnd.abi.scaffold+file']
 
 
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict

From 7f4865e338848711836df72d43d6d71a7c0074e0 Mon Sep 17 00:00:00 2001
From: alan-wu <alan.wu@auckland.ac.nz>
Date: Thu, 24 Jun 2021 13:42:59 +1200
Subject: [PATCH 43/48] Rename genotype to organ.

---
 app/process_kb_results.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 4a28f10..0b29d6b 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -36,13 +36,13 @@ def create_doi_request(doi):
     return query
 
 # create_facet_query(type): Generates facet search request data for scicrunch  given a 'type'; where
-# 'type' is either 'species', 'gender', or 'genotype' at this stage.
+# 'type' is either 'species', 'gender', or 'organ' at this stage.
 #  Returns a tuple of the typemap and request data ( type_map, data )
 def create_facet_query(type):
     type_map = {
         'species': ['organisms.primary.species.name.aggregate', 'organisms.sample.species.name.aggregate'],
         'gender': ['attributes.subject.sex.value'],
-        'genotype': ['anatomy.organ.name.aggregate']
+        'organ': ['anatomy.organ.name.aggregate']
     }
 
     data = {
@@ -86,7 +86,7 @@ def create_filter_request(query, terms, facets, size, start):
     type_map = {
         'species': ['organisms.primary.species.name.aggregate', 'organisms.sample.species.name'],
         'gender': ['attributes.subject.sex.value', 'attributes.sample.sex.value'],
-        'genotype': ['anatomy.organ.name.aggregate']
+        'organ': ['anatomy.organ.name.aggregate']
     }
 
     # Data structure of a scicrunch search
@@ -180,6 +180,8 @@ def find_csv_files(obj_list):
 def find_scaffold_json_files(obj_list):
     if not obj_list:
         return obj_list
+    for obj in obj_list:
+        print(obj.get('name'), obj.get('additional_mimetype', {}).get('name', 'none'))
     return [obj for obj in obj_list if obj.get('additional_mimetype', {}).get('name', 'none') == 'inode/vnd.abi.scaffold+file']
 
 

From 6fab9e09b4a5f86c85583043ebfdde7091450764 Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Thu, 24 Jun 2021 14:10:54 +1200
Subject: [PATCH 44/48] Fix to hit processing

Previously there was no check to make sure we made it to the end of our
desired attributes. This has now been added along with some comments to
explain what is going on
---
 app/process_kb_results.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 4a28f10..abe7a1f 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -190,11 +190,12 @@ def get_attributes(attributes, dataset):
     for k, attr in attributes.items():
         subset = dataset['_source'] # set our subest to the full dataset result
         key_attr = False
-        for key in attr:
+        for n, key in enumerate(attr): # step through attributes
             if isinstance(subset, dict):
-                if key in subset.keys():
+                if key in subset.keys(): # continue if keys are found
                     subset = subset[key]
-                    key_attr = subset
+                    if n+1 is len(attr): # if we made it to the end, save this subset
+                        key_attr = subset
         found_attr[k] = key_attr
     return found_attr
 

From c72739e6871946fd17dd78c9a8c3ede5bee96289 Mon Sep 17 00:00:00 2001
From: alan-wu <alan.wu@auckland.ac.nz>
Date: Thu, 24 Jun 2021 14:35:51 +1200
Subject: [PATCH 45/48] Improve filters for dataset with scaffold.

---
 app/process_kb_results.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index a7e7faa..2e84944 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -143,7 +143,7 @@ def facet_query_string(query, terms, facets, type_map):
                 qt += "("
             for l in t[k]:
                 if l == "scaffolds":
-                    qt += "scaffolds.object.mimetype.name:((inode%2fvnd.abi.scaffold+directory))"
+                    qt += "objects.additional_mimetype.name:((inode%2fvnd.abi.scaffold) AND (file))"
                 elif l == "simulations":
                     qt += "xrefs.additionalLinks.description:((CellML) OR (SED-ML))"
                 if l is not t[k][-1]:
@@ -198,8 +198,6 @@ def find_csv_files(obj_list):
 def find_scaffold_json_files(obj_list):
     if not obj_list:
         return obj_list
-    for obj in obj_list:
-        print(obj.get('name'), obj.get('additional_mimetype', {}).get('name', 'none'))
     return [obj for obj in obj_list if obj.get('additional_mimetype', {}).get('name', 'none') == 'inode/vnd.abi.scaffold+file']
 
 

From 39f43f29ac143415fa62182f160bd015546e9ddd Mon Sep 17 00:00:00 2001
From: alan-wu <alan.wu@auckland.ac.nz>
Date: Tue, 3 Aug 2021 10:45:12 +1200
Subject: [PATCH 46/48] Add test for available scaffolds.

---
 app/process_kb_results.py |  3 ++-
 tests/test_scicrunch.py   | 15 +++++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
index 2e84944..d2b39be 100644
--- a/app/process_kb_results.py
+++ b/app/process_kb_results.py
@@ -15,7 +15,8 @@
     'organs': ['anatomy', 'organ'],
     'contributors': ['contributors'],
     'doi': ['item', 'curie'],
-    'csvFiles': ['objects']
+    'csvFiles': ['objects'],
+    'pennsieve': ['pennsieve']
 }
 
 def create_doi_request(doi):
diff --git a/tests/test_scicrunch.py b/tests/test_scicrunch.py
index d8e2e1f..1f2b902 100644
--- a/tests/test_scicrunch.py
+++ b/tests/test_scicrunch.py
@@ -28,7 +28,7 @@ def test_scicrunch_all_data(client):
     assert json.loads(r.data)['numberOfHits'] > 40
 
 def test_scicrunch_filter(client):
-    r = client.get('/filter-search/', query_string={'term': 'genotype', 'facet': 'heart'})
+    r = client.get('/filter-search/', query_string={'term': 'organ', 'facet': 'heart'})
     assert json.loads(r.data)['numberOfHits'] > 4
 
 def test_scicrunch_filter_scaffolds(client):
@@ -52,7 +52,18 @@ def test_scicrunch_combined_facet_text(client):
     assert json.loads(r.data)['numberOfHits'] > 1
 
 def test_getting_facets(client):
-    r = client.get('/get-facets/genotype')
+    r = client.get('/get-facets/organ')
     facet_results = json.loads(r.data)
     facets = [facet_result['key'] for facet_result in facet_results]
     assert 'heart' in facets
+
+def test_scaffold_files(client):
+    r = client.get('/filter-search/?facet=scaffolds&term=datasets&size=40')
+    results = json.loads(r.data)
+    assert results['numberOfHits'] > 0
+    for item in results['results']:
+        uri = item['pennsieve']['uri']
+        path = item['scaffolds'][0]['dataset']['path']
+        key = f"{uri}files/{path}".replace('s3://pennsieve-prod-discover-publish-use1/', '')
+        r = client.get(f"/s3-resource/{key}")
+        assert r.status_code == 200

From 2f59cf3796e32e1c47ddf319a3843c5ac6f92acd Mon Sep 17 00:00:00 2001
From: alan-wu <alan.wu@auckland.ac.nz>
Date: Tue, 3 Aug 2021 13:00:46 +1200
Subject: [PATCH 47/48] Change to use the production endpoint.

---
 app/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/config.py b/app/config.py
index 656e01c..adf2a50 100644
--- a/app/config.py
+++ b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_pr")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")

From fe65ca7e55b4f1e3e2fb8e9c6e8d4f921a6489dc Mon Sep 17 00:00:00 2001
From: Jesse Khorasanee <jessekhorasanee@gmail.com>
Date: Tue, 10 Aug 2021 13:39:37 +1200
Subject: [PATCH 48/48] Add pubmed proxy

---
 app/main.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/app/main.py b/app/main.py
index 048e219..c09518d 100644
--- a/app/main.py
+++ b/app/main.py
@@ -228,6 +228,16 @@ def sci_doi(doi1,doi2):
         logging.error(err)
         return json.dumps({'error': err})
 
+# /pubmed/<id> Used as a proxy for making requests to pubmed
+@app.route("/pubmed/<id>")
+@app.route("/pubmed/<id>/")
+def pubmed(id):
+    try:
+        response = requests.get(f'https://pubmed.ncbi.nlm.nih.gov/{id}/')
+        return response.text
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
 
 # /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar
 @app.route("/scicrunch-query-string/")