diff --git a/.env b/.env new file mode 100644 index 0000000000..222989c353 --- /dev/null +++ b/.env @@ -0,0 +1,4 @@ +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DATABASE=0 +REDIS_PASSWORD= diff --git a/.gitignore b/.gitignore index 9c0ccc6751..56245fde97 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,5 @@ target/ renku-*.bottle.json renku-*.bottle.tar.gz renku.rb + +.env diff --git a/.travis.yml b/.travis.yml index db5a1653d8..39b26c9563 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,200 +18,175 @@ notifications: email: false - git: depth: false - language: python - matrix: fast_finish: true - cache: - - pip - +- pip env: + matrix: - REQUIREMENTS=lowest - REQUIREMENTS=release - + global: + secure: qLF8toEnN3JECO8sZlpE7Q5zPZhVt0y+kZtB8Vd/9kICdQkWh+/NKD6Mw0W+BW9hf+BkaCK2LEvCbeuSNg5lFr1aGZDvveMf8t3KkI1Aem+L4ewSSEbxekVtPiXsRf+Am6WOxZftntCo415aPxUYD8Ny+kZsb0DoJ4sxodAGmitUeFEo9f8bYGurDMAH7OC7AukYnRA33x8OVImU4G5uUML9z8q1pGUXZIldEucyDb0207zxn7UBwQCfhQm+HHTovOmZO3gvZvT5AJU3IQaiu7ePoBiK+M3Vb4cyHl4FlWE+5NZMpk/c9aoFBpkO5aC9QrCDCfiq7lSULL7Gkte+uWnjBm7jJH74fLe4Ryclfodb8vKHpC7fYCwfOJHXjHRr5KDPSG/1KMCTv7r4sQ6GJCnN01bDuW64IV7VK+QJwoZZOJx4J3dFMeCJdB/tOtevmDE5bAFGoV0Ycr03g9N0aHbdr0me6vWPksqR7RVEfRsX5rXPARUZ+7kWkt2MFqgG/L8orPCzyxZtqrRPtbsltK1ZmJUR69v9Tb9y+EJxB2MGUkUs9DUOr99pAlJvdx29AOzHEK45IySTdYfxjeCMCw6J/1UlZRaYjW4mj5ag0R5cnS0907w864dp7FLV9wJ2Cg4iG8WLrt5GKUMMSpac9Y/Gwaf0vExciwK60cUewMU= dist: xenial - python: - - "3.5" - - "3.6" - - "3.7" - +- '3.5' +- '3.6' +- '3.7' stages: - - name: docs - - name: test - - name: integration - if: branch = master AND (type != pull_request) - - name: test OSX - if: branch = master AND (type != pull_request) - - name: publish 🐍 - if: type = push AND (branch = master OR tag IS present) - - name: brew 🍺 - if: type = push AND tag IS present AND tag =~ /^v\d\.\d\.\d$/ - +- name: docs +- name: test +- name: integration + if: branch = master AND (type != pull_request) +- name: test OSX + if: branch = master AND (type != pull_request) +- name: "publish \U0001F40D" + if: type = push AND (branch = master OR tag IS present) +- name: "brew \U0001F37A" + if: type = push AND tag IS present AND tag =~ /^v\d\.\d\.\d$/ before_install: - - git fetch --tags - - git config --global --add user.name "Renku @ SDSC" - - git config --global --add user.email "renku@datascience.ch" - - if [[ $TRAVIS_OS_NAME == 'linux' ]]; then - sudo apt-get update; - sudo apt-get -y install shellcheck; - travis_retry python -m pip install --upgrade pip setuptools py; - travis_retry python -m pip install twine wheel coveralls requirements-builder; - requirements-builder -e all --level=min setup.py > .travis-lowest-requirements.txt; - requirements-builder -e all --level=pypi setup.py > .travis-release-requirements.txt; - requirements-builder -e all --level=dev --req requirements-devel.txt setup.py > .travis-devel-requirements.txt; - requirements-builder -e nodocs --level=min setup.py > .travis-lowest-requirements-nodocs.txt; - requirements-builder -e nodocs --level=pypi setup.py > .travis-release-requirements-nodocs.txt; - requirements-builder -e nodocs --level=dev --req requirements-devel.txt setup.py > .travis-devel-requirements-nodocs.txt; - elif [[ $TRAVIS_OS_NAME == 'osx' ]]; then - ulimit -n 1024; - brew update; - brew upgrade -v python; - brew unlink python; - brew link python; - brew install -v git-lfs jq node pipenv shellcheck; - travis_wait brew upgrade node; - fi - +- git fetch --tags +- git config --global --add user.name "Renku @ SDSC" +- git config --global --add user.email "renku@datascience.ch" +- if [[ $TRAVIS_OS_NAME == 'linux' ]]; then sudo apt-get update; sudo apt-get -y install + shellcheck; travis_retry python -m pip install --upgrade six pip setuptools py; + travis_retry python -m pip install twine wheel coveralls requirements-builder; requirements-builder + -e all --level=min setup.py > .travis-lowest-requirements.txt; requirements-builder + -e all --level=pypi setup.py > .travis-release-requirements.txt; requirements-builder + -e all --level=dev --req requirements-devel.txt setup.py > .travis-devel-requirements.txt; + requirements-builder -e nodocs --level=min setup.py > .travis-lowest-requirements-nodocs.txt; + requirements-builder -e nodocs --level=pypi setup.py > .travis-release-requirements-nodocs.txt; + requirements-builder -e nodocs --level=dev --req requirements-devel.txt setup.py + > .travis-devel-requirements-nodocs.txt; elif [[ $TRAVIS_OS_NAME == 'osx' ]]; then + ulimit -n 1024; brew update; brew upgrade -v python; brew unlink python; brew link + python; brew install -v git-lfs jq node pipenv shellcheck; travis_wait brew upgrade + node; fi install: - - if [[ $TRAVIS_OS_NAME == 'linux' ]]; then - travis_retry python -m pip install -r .travis-${REQUIREMENTS}-requirements-nodocs.txt; - travis_retry python -m pip install -e .[nodocs]; - elif [[ $TRAVIS_OS_NAME == 'osx' ]]; then - travis_retry pipenv install --deploy; - source "$(pipenv --venv)/bin/activate"; - travis_retry pip install -e .[nodocs]; - fi - +- if [[ $TRAVIS_OS_NAME == 'linux' ]]; then travis_retry python -m pip install -r + .travis-${REQUIREMENTS}-requirements-nodocs.txt; travis_retry python -m pip install + -e .[nodocs]; elif [[ $TRAVIS_OS_NAME == 'osx' ]]; then travis_retry pipenv install + --deploy; source "$(pipenv --venv)/bin/activate"; travis_retry pip install -e .[nodocs]; + fi script: - - "./run-tests.sh -t -s" - +- "./run-tests.sh -t -s" after_success: - - coveralls - +- coveralls jobs: include: - - stage: docs - os: linux - dist: xenial - language: python - env: - - REQUIREMENTS=lowest - - REQUIREMENTS=release - install: - - travis_retry python -m pip install -r .travis-${REQUIREMENTS}-requirements-all.txt; - travis_retry python -m pip install -e .[all]; - script: ./run-tests.sh -d - - stage: integration - os: linux - dist: xenial - language: python - env: - - REQUIREMENTS=release - script: pytest -m integration -v - - stage: integration - python: "3.6" - os: linux - dist: xenial - language: python - env: - - REQUIREMENTS=release - script: pytest -m integration -v - - stage: integration - python: "3.7" - os: linux - dist: xenial - language: python - env: - - REQUIREMENTS=release - script: pytest -m integration -v - - stage: integration - os: linux - dist: xenial - language: python - env: - - REQUIREMENTS=lowest - script: pytest -m integration -v - - stage: integration - python: "3.6" - os: linux - dist: xenial - language: python - env: - - REQUIREMENTS=lowest - script: pytest -m integration -v - - stage: integration - python: "3.7" - os: linux - dist: xenial - language: python - env: - - REQUIREMENTS=lowest - script: pytest -m integration -v - - stage: test OSX - language: generic - sudo: true - os: osx - osx_image: xcode11.2 - - stage: test OSX - language: generic - sudo: true - os: osx - osx_image: xcode10.1 - - - stage: publish 🐍 - python: 3.6 - script: echo "Publishing on PyPI.io ..." - before_deploy: - if [[ -z $TRAVIS_TAG ]]; then - export TRAVIS_TAG=$(renku --version) && - git tag $TRAVIS_TAG; - fi - deploy: - - provider: pypi - user: - secure: "RPxGYNL+N6LQy1/TbXCFy9IDgZ05u/Qj6my/p1hSoBWG304se28njZ0zpTv5AGZF8G3dBeVjYbf4s+ytwUPoxK+1fBWlnnSuw4QUXWf339rzcMU4dDO2QX8/SvMDZFbJdE/1/6arlbXa8dTZobm1V0Q3AUTkl2AwXIg9jCaRcYb6n9lASRIiR1M2BK/+FTJG2qywB9kSyQ3giuobwwhQa4CAwDg5MoqL5KLFm2CxejvS0vpleaWqA/LJxeHTQqqd0CIk6V2NBETZ6V78MqdISai88nRafijT0iQ5LSWsy7R6CCpK7OFjHnvA7hGSYzs/BRpdABAk5a2yFbKKZErXoLvatjflMlj2OhHy/0Hlv6xEt1db1pwnjQQIiS62R/Gpx4DZAO8hGp6pT9g9xiifzlj4km9iOD4GY1g+A5A+ssEneBTvExJja4yAqJzAVu+XVDVqxVj+MOmpIcQkzT983+cVceoeczJ61sDuftQaAgcVqQACRE02fLszEtSJVFaq3vKu8dX2eMdiCk7GLdqNF9kfygagNC8eja6Yvr+Ft8kTwrjTBMC/D3xC584I8OTzmpNE/tfZHppfhiKXoU+FySdIGCPcSTGKUgljiz3sFk1JjjEBkGqBLAMaD8l5FsgQqR4zO/2IiwSst1Wx8owF+tiLlerDAC9b/ZFcqDpUab4=" - password: - secure: "f8f2175dg1FUt4mZCQ87tgwwUrFoxQ5CTcZkRnlCXJqagSMk9VmjR8/XXDE5cW48JPG0qdKQdvBtC81NCq+4MqC20HI9VtOdZWeT6Jej90XOZ742osC3pdIGMF4wrsd7+fs1AZkbzzjgB7FsJ42qW6rMa3vP4mXB4GJEel453Fs3Fw8tnR4PZ2ikOJ9fcmtMensjxh9oNMyAIMkYVGim5bWtpkpI1cddeThDBEKurj1IWEMonQw4lR2yLwZTEP6F/b43Cy4aNy+YkdJzJbM0KMJASLeeu8SxNTE7JCqrYc4irU3AzHzzy/FNVGdiw0F10kbK+yI7cPUqWfeoVjwqBJe8Hr0CXNNCsEYkjXkY9PE2m2x10z2UrTy+O1dNo/8sDoKvZBChuAPPtxp2I7/KujECWjzFvMunHEk3K940ZeSMo90xHKQllmA7umquNcPTXiM2l4nNHtolh5W2HdanvsEFhkY2Y2o9sRIytOa5bM+iS9RCL5GsZwLgGKdjfuqk0GF6KK4AIgi7WKtVM73vM7HQaAVRpyUWZ/h8Vu5IRdkORC20WwHZ/Jg6pUy3pkN1VlcKE5uanaPik/npq/uCGe9YC2bh5IoclmqdJUHrkzFPb+f8wRBvbof0zU1B6UMSdiug5oDk3a0Q8kk2AppfjTs7x8NHi3KYXdUphi9HrWQ=" - distributions: "sdist bdist_wheel" - on: - all_branches: true - # push the dev tag to github - - provider: releases - api_key: ${GITHUB_TOKEN} - on: - all_branches: true - tags: false - - - stage: brew 🍺 - language: generic - sudo: true - os: osx - osx_image: xcode11.2 - before_install: brew -v install jq - install: python -m pip install requests - script: - - echo PY_BREW_VERSION=$(python setup.py --version) - - make -e PY_BREW_VERSION=$(python setup.py --version) brew-commit-formula brew-build-bottle brew-commit-bottle - deploy: - - provider: releases - api_key: ${GITHUB_TOKEN} - file_glob: true - file: "*.bottle.*" - skip_cleanup: true - on: - all_branches: true - - provider: pages - skip-cleanup: true - github-token: ${GITHUB_TOKEN} - repo: swissdatasciencecenter/homebrew-renku - target-branch: master - keep-history: true - local-dir: /usr/local/Homebrew/Library/Taps/swissdatasciencecenter/homebrew-renku/ - on: - all_branches: true + - stage: docs + os: linux + dist: xenial + language: python + env: + - REQUIREMENTS=lowest + - REQUIREMENTS=release + install: + - travis_retry python -m pip install -r .travis-${REQUIREMENTS}-requirements-all.txt; + travis_retry python -m pip install -e .[all]; + script: "./run-tests.sh -d" + - stage: integration + os: linux + dist: xenial + language: python + env: + - REQUIREMENTS=release + script: pytest -m integration -v + - stage: integration + python: '3.6' + os: linux + dist: xenial + language: python + env: + - REQUIREMENTS=release + script: pytest -m integration -v + - stage: integration + python: '3.7' + os: linux + dist: xenial + language: python + env: + - REQUIREMENTS=release + script: pytest -m integration -v + - stage: integration + os: linux + dist: xenial + language: python + env: + - REQUIREMENTS=lowest + script: pytest -m integration -v + - stage: integration + python: '3.6' + os: linux + dist: xenial + language: python + env: + - REQUIREMENTS=lowest + script: pytest -m integration -v + - stage: integration + python: '3.7' + os: linux + dist: xenial + language: python + env: + - REQUIREMENTS=lowest + script: pytest -m integration -v + - stage: test OSX + language: generic + sudo: true + os: osx + osx_image: xcode11.2 + - stage: test OSX + language: generic + sudo: true + os: osx + osx_image: xcode10.1 + - stage: "publish \U0001F40D" + python: 3.6 + script: echo "Publishing on PyPI.io ..." + before_deploy: if [[ -z $TRAVIS_TAG ]]; then export TRAVIS_TAG=$(renku --version) + && git tag $TRAVIS_TAG; fi + deploy: + - provider: pypi + user: + secure: RPxGYNL+N6LQy1/TbXCFy9IDgZ05u/Qj6my/p1hSoBWG304se28njZ0zpTv5AGZF8G3dBeVjYbf4s+ytwUPoxK+1fBWlnnSuw4QUXWf339rzcMU4dDO2QX8/SvMDZFbJdE/1/6arlbXa8dTZobm1V0Q3AUTkl2AwXIg9jCaRcYb6n9lASRIiR1M2BK/+FTJG2qywB9kSyQ3giuobwwhQa4CAwDg5MoqL5KLFm2CxejvS0vpleaWqA/LJxeHTQqqd0CIk6V2NBETZ6V78MqdISai88nRafijT0iQ5LSWsy7R6CCpK7OFjHnvA7hGSYzs/BRpdABAk5a2yFbKKZErXoLvatjflMlj2OhHy/0Hlv6xEt1db1pwnjQQIiS62R/Gpx4DZAO8hGp6pT9g9xiifzlj4km9iOD4GY1g+A5A+ssEneBTvExJja4yAqJzAVu+XVDVqxVj+MOmpIcQkzT983+cVceoeczJ61sDuftQaAgcVqQACRE02fLszEtSJVFaq3vKu8dX2eMdiCk7GLdqNF9kfygagNC8eja6Yvr+Ft8kTwrjTBMC/D3xC584I8OTzmpNE/tfZHppfhiKXoU+FySdIGCPcSTGKUgljiz3sFk1JjjEBkGqBLAMaD8l5FsgQqR4zO/2IiwSst1Wx8owF+tiLlerDAC9b/ZFcqDpUab4= + password: + secure: f8f2175dg1FUt4mZCQ87tgwwUrFoxQ5CTcZkRnlCXJqagSMk9VmjR8/XXDE5cW48JPG0qdKQdvBtC81NCq+4MqC20HI9VtOdZWeT6Jej90XOZ742osC3pdIGMF4wrsd7+fs1AZkbzzjgB7FsJ42qW6rMa3vP4mXB4GJEel453Fs3Fw8tnR4PZ2ikOJ9fcmtMensjxh9oNMyAIMkYVGim5bWtpkpI1cddeThDBEKurj1IWEMonQw4lR2yLwZTEP6F/b43Cy4aNy+YkdJzJbM0KMJASLeeu8SxNTE7JCqrYc4irU3AzHzzy/FNVGdiw0F10kbK+yI7cPUqWfeoVjwqBJe8Hr0CXNNCsEYkjXkY9PE2m2x10z2UrTy+O1dNo/8sDoKvZBChuAPPtxp2I7/KujECWjzFvMunHEk3K940ZeSMo90xHKQllmA7umquNcPTXiM2l4nNHtolh5W2HdanvsEFhkY2Y2o9sRIytOa5bM+iS9RCL5GsZwLgGKdjfuqk0GF6KK4AIgi7WKtVM73vM7HQaAVRpyUWZ/h8Vu5IRdkORC20WwHZ/Jg6pUy3pkN1VlcKE5uanaPik/npq/uCGe9YC2bh5IoclmqdJUHrkzFPb+f8wRBvbof0zU1B6UMSdiug5oDk3a0Q8kk2AppfjTs7x8NHi3KYXdUphi9HrWQ= + distributions: sdist bdist_wheel + on: + all_branches: true + - provider: releases + api_key: "${GITHUB_TOKEN}" + on: + all_branches: true + tags: false + - stage: "brew \U0001F37A" + language: generic + sudo: true + os: osx + osx_image: xcode11.2 + before_install: brew -v install jq + install: python -m pip install requests + script: + - echo PY_BREW_VERSION=$(python setup.py --version) + - make -e PY_BREW_VERSION=$(python setup.py --version) brew-commit-formula brew-build-bottle + brew-commit-bottle + deploy: + - provider: releases + api_key: "${GITHUB_TOKEN}" + file_glob: true + file: "*.bottle.*" + skip_cleanup: true + on: + all_branches: true + - provider: pages + skip-cleanup: true + github-token: "${GITHUB_TOKEN}" + repo: swissdatasciencecenter/homebrew-renku + target-branch: master + keep-history: true + local-dir: "/usr/local/Homebrew/Library/Taps/swissdatasciencecenter/homebrew-renku/" + on: + all_branches: true diff --git a/Dockerfile b/Dockerfile.cli similarity index 96% rename from Dockerfile rename to Dockerfile.cli index 770a6e39ee..6e63b77185 100644 --- a/Dockerfile +++ b/Dockerfile.cli @@ -1,4 +1,4 @@ -FROM python:3.6-alpine as base +FROM python:3.7-alpine as base RUN apk add --no-cache git && \ pip install --no-cache --upgrade pip diff --git a/Dockerfile.svc b/Dockerfile.svc new file mode 100644 index 0000000000..dc7283df4d --- /dev/null +++ b/Dockerfile.svc @@ -0,0 +1,18 @@ +FROM python:3.7-alpine + +RUN apk add --update --no-cache alpine-sdk g++ gcc linux-headers libxslt-dev python3-dev build-base openssl-dev libffi-dev git bash && \ + pip install --no-cache --upgrade pip setuptools pipenv requirements-builder + +RUN apk add --no-cache --allow-untrusted \ + --repository http://dl-cdn.alpinelinux.org/alpine/latest-stable/community \ + --repository http://dl-cdn.alpinelinux.org/alpine/latest-stable/main \ + --repository http://nl.alpinelinux.org/alpine/edge/community \ + git-lfs && \ + git lfs install + +COPY . /code/renku +WORKDIR /code/renku +RUN requirements-builder -e all --level=pypi setup.py > requirements.txt && pip install -r requirements.txt && pip install -e . && pip install gunicorn + + +ENTRYPOINT ["gunicorn", "renku.service.entrypoint:app", "-b", "0.0.0.0:8080"] diff --git a/MANIFEST.in b/MANIFEST.in index 8c9e058fa7..eb8dad3260 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -17,15 +17,17 @@ # limitations under the License. # Check manifest will not automatically add these two files: +include renku/service/.env-example include .dockerignore include .editorconfig include .tx/config include *.md prune docs/_build recursive-include renku *.po *.pot *.mo - +recursive-include renku *.py # added by check_manifest.py include *.py +include *.yml include *.rst include *.sh include *.txt @@ -60,3 +62,4 @@ recursive-include renku *.json recursive-include renku Dockerfile recursive-include tests *.py *.gz *.yml *.json prune .github +prune .env diff --git a/Makefile b/Makefile index 2602ce90e6..9e04a816c8 100644 --- a/Makefile +++ b/Makefile @@ -69,3 +69,6 @@ brew-commit-bottle: *.bottle.json brew-release: open "https://github.com/SwissDataScienceCenter/renku-python/releases/new?tag=v$(shell brew info --json=v1 renku | jq -r '.[0].versions.stable')" + +service-container: + docker build -f Dockerfile.svc -t renku-svc:`git rev-parse --short HEAD` . diff --git a/Pipfile.lock b/Pipfile.lock index e98aa3648d..abb015e8c0 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "52441bdeddfbfff03386744d707cac4b4f9105ba71cda8acbee56aa506e9e639" + "sha256": "a3e161caf52b39ed8aa1de4a3306a163ca1043dfa219203a4cc1b11463c9007b" }, "pipfile-spec": 6, "requires": { - "python_version": "3.8" + "python_version": "3.7" }, "sources": [ { @@ -30,6 +30,13 @@ ], "version": "==1.5" }, + "apispec": { + "hashes": [ + "sha256:5fdaa1173b32515cc83f9d413a49a6c37fafc2b87f6b40e95923d3e85f0942c5", + "sha256:9e88c51517a6515612e818459f61c1bc06c00f2313e5187828bdbabaa7461473" + ], + "version": "==3.0.0" + }, "appdirs": { "hashes": [ "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92", @@ -144,40 +151,39 @@ }, "coverage": { "hashes": [ - "sha256:08907593569fe59baca0bf152c43f3863201efb6113ecb38ce7e97ce339805a6", - "sha256:0be0f1ed45fc0c185cfd4ecc19a1d6532d72f86a2bac9de7e24541febad72650", - "sha256:141f08ed3c4b1847015e2cd62ec06d35e67a3ac185c26f7635f4406b90afa9c5", - "sha256:19e4df788a0581238e9390c85a7a09af39c7b539b29f25c89209e6c3e371270d", - "sha256:23cc09ed395b03424d1ae30dcc292615c1372bfba7141eb85e11e50efaa6b351", - "sha256:245388cda02af78276b479f299bbf3783ef0a6a6273037d7c60dc73b8d8d7755", - "sha256:331cb5115673a20fb131dadd22f5bcaf7677ef758741312bee4937d71a14b2ef", - "sha256:386e2e4090f0bc5df274e720105c342263423e77ee8826002dcffe0c9533dbca", - "sha256:3a794ce50daee01c74a494919d5ebdc23d58873747fa0e288318728533a3e1ca", - "sha256:60851187677b24c6085248f0a0b9b98d49cba7ecc7ec60ba6b9d2e5574ac1ee9", - "sha256:63a9a5fc43b58735f65ed63d2cf43508f462dc49857da70b8980ad78d41d52fc", - "sha256:6b62544bb68106e3f00b21c8930e83e584fdca005d4fffd29bb39fb3ffa03cb5", - "sha256:6ba744056423ef8d450cf627289166da65903885272055fb4b5e113137cfa14f", - "sha256:7494b0b0274c5072bddbfd5b4a6c6f18fbbe1ab1d22a41e99cd2d00c8f96ecfe", - "sha256:826f32b9547c8091679ff292a82aca9c7b9650f9fda3e2ca6bf2ac905b7ce888", - "sha256:93715dffbcd0678057f947f496484e906bf9509f5c1c38fc9ba3922893cda5f5", - "sha256:9a334d6c83dfeadae576b4d633a71620d40d1c379129d587faa42ee3e2a85cce", - "sha256:af7ed8a8aa6957aac47b4268631fa1df984643f07ef00acd374e456364b373f5", - "sha256:bf0a7aed7f5521c7ca67febd57db473af4762b9622254291fbcbb8cd0ba5e33e", - "sha256:bf1ef9eb901113a9805287e090452c05547578eaab1b62e4ad456fcc049a9b7e", - "sha256:c0afd27bc0e307a1ffc04ca5ec010a290e49e3afbe841c5cafc5c5a80ecd81c9", - "sha256:dd579709a87092c6dbee09d1b7cfa81831040705ffa12a1b248935274aee0437", - "sha256:df6712284b2e44a065097846488f66840445eb987eb81b3cc6e4149e7b6982e1", - "sha256:e07d9f1a23e9e93ab5c62902833bf3e4b1f65502927379148b6622686223125c", - "sha256:e2ede7c1d45e65e209d6093b762e98e8318ddeff95317d07a27a2140b80cfd24", - "sha256:e4ef9c164eb55123c62411f5936b5c2e521b12356037b6e1c2617cef45523d47", - "sha256:eca2b7343524e7ba246cab8ff00cab47a2d6d54ada3b02772e908a45675722e2", - "sha256:eee64c616adeff7db37cc37da4180a3a5b6177f5c46b187894e633f088fb5b28", - "sha256:ef824cad1f980d27f26166f86856efe11eff9912c4fed97d3804820d43fa550c", - "sha256:efc89291bd5a08855829a3c522df16d856455297cf35ae827a37edac45f466a7", - "sha256:fa964bae817babece5aa2e8c1af841bebb6d0b9add8e637548809d040443fee0", - "sha256:ff37757e068ae606659c28c3bd0d923f9d29a85de79bf25b2b34b148473b5025" - ], - "version": "==4.5.4" + "sha256:0cd13a6e98c37b510a2d34c8281d5e1a226aaf9b65b7d770ef03c63169965351", + "sha256:1a4b6b6a2a3a6612e6361130c2cc3dc4378d8c221752b96167ccbad94b47f3cd", + "sha256:2ee55e6dba516ddf6f484aa83ccabbb0adf45a18892204c23486938d12258cde", + "sha256:3be5338a2eb4ef03c57f20917e1d12a1fd10e3853fed060b6d6b677cb3745898", + "sha256:44b783b02db03c4777d8cf71bae19eadc171a6f2a96777d916b2c30a1eb3d070", + "sha256:475bf7c4252af0a56e1abba9606f1e54127cdf122063095c75ab04f6f99cf45e", + "sha256:47c81ee687eafc2f1db7f03fbe99aab81330565ebc62fb3b61edfc2216a550c8", + "sha256:4a7f8e72b18f2aca288ff02255ce32cc830bc04d993efbc87abf6beddc9e56c0", + "sha256:50197163a22fd17f79086e087a787883b3ec9280a509807daf158dfc2a7ded02", + "sha256:56b13000acf891f700f5067512b804d1ec8c301d627486c678b903859d07f798", + "sha256:79388ae29c896299b3567965dbcd93255f175c17c6c7bca38614d12718c47466", + "sha256:79fd5d3d62238c4f583b75d48d53cdae759fe04d4fb18fe8b371d88ad2b6f8be", + "sha256:7fe3e2fde2bf1d7ce25ebcd2d3de3650b8d60d9a73ce6dcef36e20191291613d", + "sha256:81042a24f67b96e4287774014fa27220d8a4d91af1043389e4d73892efc89ac6", + "sha256:81326f1095c53111f8afc95da281e1414185f4a538609a77ca50bdfa39a6c207", + "sha256:8873dc0d8f42142ea9f20c27bbdc485190fff93823c6795be661703369e5877d", + "sha256:88d2cbcb0a112f47eef71eb95460b6995da18e6f8ca50c264585abc2c473154b", + "sha256:91f2491aeab9599956c45a77c5666d323efdec790bfe23fcceafcd91105d585a", + "sha256:979daa8655ae5a51e8e7a24e7d34e250ae8309fd9719490df92cbb2fe2b0422b", + "sha256:9c871b006c878a890c6e44a5b2f3c6291335324b298c904dc0402ee92ee1f0be", + "sha256:a6d092545e5af53e960465f652e00efbf5357adad177b2630d63978d85e46a72", + "sha256:b5ed7837b923d1d71c4f587ae1539ccd96bfd6be9788f507dbe94dab5febbb5d", + "sha256:ba259f68250f16d2444cbbfaddaa0bb20e1560a4fdaad50bece25c199e6af864", + "sha256:be1d89614c6b6c36d7578496dc8625123bda2ff44f224cf8b1c45b810ee7383f", + "sha256:c1b030a79749aa8d1f1486885040114ee56933b15ccfc90049ba266e4aa2139f", + "sha256:c95bb147fab76f2ecde332d972d8f4138b8f2daee6c466af4ff3b4f29bd4c19e", + "sha256:d52c1c2d7e856cecc05aa0526453cb14574f821b7f413cc279b9514750d795c1", + "sha256:d609a6d564ad3d327e9509846c2c47f170456344521462b469e5cb39e48ba31c", + "sha256:e1bad043c12fb58e8c7d92b3d7f2f49977dcb80a08a6d1e7a5114a11bf819fca", + "sha256:e5a675f6829c53c87d79117a8eb656cc4a5f8918185a32fc93ba09778e90f6db", + "sha256:fec32646b98baf4a22fdceb08703965bd16dea09051fbeb31a04b5b6e72b846c" + ], + "version": "==5.0" }, "cryptography": { "hashes": [ @@ -255,6 +261,13 @@ ], "version": "==1.7.1" }, + "fakeredis": { + "hashes": [ + "sha256:169598943dc10aadd62871a34b2867bb5e24f9da7ebc97a2058c3f35c760241e", + "sha256:1db27ec3a5c964b9fb9f36ec1b9770a81204c54e84f83c763f36689eef4a5fd4" + ], + "version": "==1.1.0" + }, "filelock": { "hashes": [ "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", @@ -269,6 +282,26 @@ ], "version": "==3.7.9" }, + "flask": { + "hashes": [ + "sha256:13f9f196f330c7c2c5d7a5cf91af894110ca0215ac051b5844701f2bfd934d52", + "sha256:45eb5a6fd193d6cf7e0cf5d8a5b31f83d5faae0293695626f539a823e93b13f6" + ], + "version": "==1.1.1" + }, + "flask-apispec": { + "hashes": [ + "sha256:46bb89f8c4be3547d3f48536100f88a2a249ae59b050589cff57a0ec8e25d000", + "sha256:b97a9d7200293021ff11fa393157f51736dc12d6b4fc4502140561fb3cf64a16" + ], + "version": "==0.8.3" + }, + "flask-swagger-ui": { + "hashes": [ + "sha256:3282c770764c8053360f33b2fc120e1d169ecca2138537d0e6e1135b1f9d4ff2" + ], + "version": "==3.20.9" + }, "freezegun": { "hashes": [ "sha256:2a4d9c8cd3c04a201e20c313caf8b6338f1cfa4cda43f46a94cc4a9fd13ea5e7", @@ -317,6 +350,14 @@ ], "version": "==1.1.0" }, + "importlib-metadata": { + "hashes": [ + "sha256:073a852570f92da5f744a3472af1b61e28e9f78ccf0c9117658dc32b15de7b45", + "sha256:d95141fbfa7ef2ec65cfd945e2af7e5a6ddbd7c8d9a25e66ff3be8e3daf9f60f" + ], + "markers": "python_version < '3.8'", + "version": "==1.3.0" + }, "isodate": { "hashes": [ "sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8", @@ -332,6 +373,13 @@ ], "version": "==4.3.4" }, + "itsdangerous": { + "hashes": [ + "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", + "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" + ], + "version": "==1.1.0" + }, "jinja2": { "hashes": [ "sha256:74320bb91f31270f9551d46522e33af46a80c3d619f4a4bf42b3164d30b5911f", @@ -410,6 +458,14 @@ ], "version": "==1.1.1" }, + "marshmallow": { + "hashes": [ + "sha256:1a358beb89c2b4d5555272065a9533591a3eb02f1b854f3c4002d88d8f2a1ddb", + "sha256:eb97c42c5928b5720812c9268865fe863d4807bc1a8b48ddd7d5c9e1779a6af0" + ], + "markers": "python_version >= '3'", + "version": "==3.2.2" + }, "mccabe": { "hashes": [ "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", @@ -595,10 +651,10 @@ }, "pytest": { "hashes": [ - "sha256:63344a2e3bce2e4d522fd62b4fdebb647c019f1f9e4ca075debbd13219db4418", - "sha256:f67403f33b2b1d25a6756184077394167fe5e2f9d8bdaab30707d19ccec35427" + "sha256:6b571215b5a790f9b41f19f3531c53a45cf6bb8ef2988bc1ff9afb38270b25fa", + "sha256:e41d489ff43948babd0fad7ad5e49b8735d5d55e26628a58673c39ff61d95de4" ], - "version": "==5.3.1" + "version": "==5.3.2" }, "pytest-cache": { "hashes": [ @@ -692,6 +748,13 @@ ], "version": "==0.4.0" }, + "redis": { + "hashes": [ + "sha256:3613daad9ce5951e426f460deddd5caf469e08a3af633e9578fc77d362becf62", + "sha256:8d0fc278d3f5e1249967cba2eb4a5632d19e45ce5c09442b8422d15ee2c22cc2" + ], + "version": "==3.3.11" + }, "renku": { "editable": true, "extras": [ @@ -727,6 +790,31 @@ ], "version": "==0.16.0" }, + "ruamel.yaml.clib": { + "hashes": [ + "sha256:1e77424825caba5553bbade750cec2277ef130647d685c2b38f68bc03453bac6", + "sha256:392b7c371312abf27fb549ec2d5e0092f7ef6e6c9f767bfb13e83cb903aca0fd", + "sha256:4d55386129291b96483edcb93b381470f7cd69f97585829b048a3d758d31210a", + "sha256:550168c02d8de52ee58c3d8a8193d5a8a9491a5e7b2462d27ac5bf63717574c9", + "sha256:57933a6986a3036257ad7bf283529e7c19c2810ff24c86f4a0cfeb49d2099919", + "sha256:615b0396a7fad02d1f9a0dcf9f01202bf9caefee6265198f252c865f4227fcc6", + "sha256:77556a7aa190be9a2bd83b7ee075d3df5f3c5016d395613671487e79b082d784", + "sha256:7aee724e1ff424757b5bd8f6c5bbdb033a570b2b4683b17ace4dbe61a99a657b", + "sha256:8073c8b92b06b572e4057b583c3d01674ceaf32167801fe545a087d7a1e8bf52", + "sha256:9c6d040d0396c28d3eaaa6cb20152cb3b2f15adf35a0304f4f40a3cf9f1d2448", + "sha256:a0ff786d2a7dbe55f9544b3f6ebbcc495d7e730df92a08434604f6f470b899c5", + "sha256:b1b7fcee6aedcdc7e62c3a73f238b3d080c7ba6650cd808bce8d7761ec484070", + "sha256:b66832ea8077d9b3f6e311c4a53d06273db5dc2db6e8a908550f3c14d67e718c", + "sha256:be018933c2f4ee7de55e7bd7d0d801b3dfb09d21dad0cce8a97995fd3e44be30", + "sha256:d0d3ac228c9bbab08134b4004d748cf9f8743504875b3603b3afbb97e3472947", + "sha256:d10e9dd744cf85c219bf747c75194b624cc7a94f0c80ead624b06bfa9f61d3bc", + "sha256:ea4362548ee0cbc266949d8a441238d9ad3600ca9910c3fe4e82ee3a50706973", + "sha256:ed5b3698a2bb241b7f5cbbe277eaa7fe48b07a58784fba4f75224fd066d253ad", + "sha256:f9dcc1ae73f36e8059589b601e8e4776b9976effd76c21ad6a855a74318efd6e" + ], + "markers": "platform_python_implementation == 'CPython' and python_version < '3.8'", + "version": "==0.2.0" + }, "scandir": { "hashes": [ "sha256:2586c94e907d99617887daed6c1d102b5ca28f1085f90446554abf1faf73123e", @@ -799,12 +887,19 @@ ], "version": "==2.0.0" }, + "sortedcontainers": { + "hashes": [ + "sha256:974e9a32f56b17c1bac2aebd9dcf197f3eb9cd30553c5852a3187ad162e1a03a", + "sha256:d9e96492dd51fae31e60837736b38fe42a187b5404c16606ff7ee7cd582d4c60" + ], + "version": "==2.1.0" + }, "sphinx": { "hashes": [ - "sha256:3b16e48e791a322d584489ab28d8800652123d1fbfdd173e2965a31d40bf22d7", - "sha256:559c1a8ed1365a982f77650720b41114414139a635692a23c2990824d0a84cf2" + "sha256:0a11e2fd31fe5c7e64b4fc53c2c022946512f021d603eb41ac6ae51d5fcbb574", + "sha256:138e39aa10f28d52aa5759fc6d1cba2be6a4b750010974047fa7d0e31addcf63" ], - "version": "==2.2.2" + "version": "==2.3.0" }, "sphinxcontrib-applehelp": { "hashes": [ @@ -902,6 +997,14 @@ ], "version": "==0.1.7" }, + "webargs": { + "hashes": [ + "sha256:3beca296598067cec24a0b6f91c0afcc19b6e3c4d84ab026b931669628bb47b4", + "sha256:3f9dc15de183d356c9a0acc159c100ea0506c0c240c1e6f1d8b308c5fed4dbbd", + "sha256:fa4ad3ad9b38bedd26c619264fdc50d7ae014b49186736bca851e5b5228f2a1b" + ], + "version": "==5.5.2" + }, "werkzeug": { "hashes": [ "sha256:7280924747b5733b246fe23972186c6b348f9ae29724135a6dfc1e53cea433e7", @@ -915,6 +1018,13 @@ "sha256:613deba14233623ff3432d9d5032631b5f600be97b39f66932cbe67648bfa8ea" ], "version": "==0.27.0" + }, + "zipp": { + "hashes": [ + "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e", + "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335" + ], + "version": "==0.6.0" } }, "develop": {} diff --git a/conftest.py b/conftest.py index a0e4fa49bd..5c6d301204 100644 --- a/conftest.py +++ b/conftest.py @@ -25,8 +25,10 @@ import tempfile import time import urllib +import uuid from pathlib import Path +import fakeredis import pytest import responses import yaml @@ -510,3 +512,162 @@ def remote_project(data_repository, directory_tree): assert 0 == result.exit_code yield runner, project_path + + +@pytest.fixture(scope='function') +def datapack_zip(directory_tree): + """Returns dummy data folder as a zip archive.""" + from renku.core.utils.contexts import chdir + workspace_dir = tempfile.TemporaryDirectory() + with chdir(workspace_dir.name): + shutil.make_archive('datapack', 'zip', str(directory_tree)) + + yield Path(workspace_dir.name) / 'datapack.zip' + + +@pytest.fixture(scope='function') +def datapack_tar(directory_tree): + """Returns dummy data folder as a tar archive.""" + from renku.core.utils.contexts import chdir + workspace_dir = tempfile.TemporaryDirectory() + with chdir(workspace_dir.name): + shutil.make_archive('datapack', 'tar', str(directory_tree)) + + yield Path(workspace_dir.name) / 'datapack.tar' + + +@pytest.fixture(scope='function') +def mock_redis(monkeypatch): + """Monkey patch service cache with mocked redis.""" + from renku.service.cache import ServiceCache + with monkeypatch.context() as m: + m.setattr(ServiceCache, 'cache', fakeredis.FakeRedis()) + yield + + +@pytest.fixture(scope='function') +def svc_client(mock_redis): + """Renku service client.""" + from renku.service.entrypoint import create_app + + flask_app = create_app() + + testing_client = flask_app.test_client() + testing_client.testing = True + + ctx = flask_app.app_context() + ctx.push() + + yield testing_client + + ctx.pop() + + +@pytest.fixture(scope='function') +def svc_client_with_repo(svc_client, mock_redis): + """Renku service remote repository.""" + remote_url = 'https://dev.renku.ch/gitlab/contact/integration-tests' + headers = { + 'Content-Type': 'application/json', + 'Renku-User-Id': 'b4b4de0eda0f471ab82702bd5c367fa7', + 'Renku-User-FullName': 'Just Sam', + 'Renku-User-Email': 'contact@justsam.io', + 'Authorization': 'Bearer {0}'.format(os.getenv('IT_OAUTH_GIT_TOKEN')), + } + + payload = {'git_url': remote_url} + + response = svc_client.post( + '/cache.project_clone', + data=json.dumps(payload), + headers=headers, + ) + + assert response + assert 'result' in response.json + assert 'error' not in response.json + project_id = response.json['result']['project_id'] + assert isinstance(uuid.UUID(project_id), uuid.UUID) + + yield svc_client, headers, project_id + + +@pytest.fixture( + params=[ + { + 'url': '/cache.files_list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/cache.files_upload', + 'allowed_method': 'POST', + 'headers': {} + }, + { + 'url': '/cache.project_clone', + 'allowed_method': 'POST', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/cache.project_list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets.add', + 'allowed_method': 'POST', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets.create', + 'allowed_method': 'POST', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets.files_list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + { + 'url': '/datasets.list', + 'allowed_method': 'GET', + 'headers': { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + }, + ] +) +def service_allowed_endpoint(request, svc_client, mock_redis): + """Ensure allowed methods and correct headers.""" + methods = { + 'GET': svc_client.get, + 'POST': svc_client.post, + 'HEAD': svc_client.head, + 'PUT': svc_client.put, + 'DELETE': svc_client.delete, + 'OPTIONS': svc_client.options, + 'TRACE': svc_client.trace, + 'PATCH': svc_client.patch, + } + + yield methods, request.param, svc_client diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..140132e5a8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,11 @@ +version: '3' + +services: + redis: + image: redis:5.0.3-alpine + + renku-svc: + image: renku-svc:latest + env_file: .env + ports: + - "8080:8080" diff --git a/renku/core/commands/clone.py b/renku/core/commands/clone.py index b7f098416f..fe435b89d6 100644 --- a/renku/core/commands/clone.py +++ b/renku/core/commands/clone.py @@ -29,8 +29,11 @@ def renku_clone( path=None, install_githooks=True, skip_smudge=True, + recursive=True, + depth=None, progress=None, - commit_message=None + config=None, + raise_git_except=False, ): """Clone Renku project repo, install Git hooks and LFS.""" install_lfs = client.use_external_storage @@ -40,5 +43,9 @@ def renku_clone( install_githooks=install_githooks, install_lfs=install_lfs, skip_smudge=skip_smudge, - progress=progress + recursive=recursive, + depth=depth, + progress=progress, + config=config, + raise_git_except=raise_git_except, ) diff --git a/renku/core/management/clone.py b/renku/core/management/clone.py index b3993a6bd7..41120bb04c 100644 --- a/renku/core/management/clone.py +++ b/renku/core/management/clone.py @@ -18,6 +18,7 @@ """Clone a Renku repo along with all Renku-specific initializations.""" import os +from pathlib import Path from git import GitCommandError, Repo @@ -34,23 +35,49 @@ def clone( skip_smudge=True, recursive=True, depth=None, - progress=None + progress=None, + config=None, + raise_git_except=False, ): """Clone Renku project repo, install Git hooks and LFS.""" from renku.core.management.client import LocalClient path = path or GitURL.parse(url).name + + if isinstance(path, Path): + path = str(path) + # Clone the project if skip_smudge: os.environ['GIT_LFS_SKIP_SMUDGE'] = '1' + try: repo = Repo.clone_from( url, path, recursive=recursive, depth=depth, progress=progress ) except GitCommandError as e: - raise errors.GitError( - 'Cannot clone remote Renku project: {}'.format(url) - ) from e + if not raise_git_except: + raise errors.GitError( + 'Cannot clone remote Renku project: {}'.format(url) + ) from e + + raise e + + if config: + config_writer = repo.config_writer() + + for key, value in config.items(): + key_path = key.split('.') + key = key_path.pop() + + if not key_path or not key: + raise errors.GitError( + 'Cannot write to config. Section path or key is invalid.' + ) + + config_writer.set_value('.'.join(key_path), key, value) + + config_writer.release() client = LocalClient(path) diff --git a/renku/core/management/repository.py b/renku/core/management/repository.py index a937390738..114081ed40 100644 --- a/renku/core/management/repository.py +++ b/renku/core/management/repository.py @@ -47,13 +47,18 @@ def default_path(): return '.' +def path_converter(path): + """Converter for path in PathMixin.""" + return Path(path).resolve() + + @attr.s class PathMixin: """Define a default path attribute.""" path = attr.ib( default=default_path, - converter=lambda arg: Path(arg).resolve().absolute(), + converter=path_converter, ) @path.validator diff --git a/renku/core/utils/contexts.py b/renku/core/utils/contexts.py index 77de0bc61b..06131ade45 100644 --- a/renku/core/utils/contexts.py +++ b/renku/core/utils/contexts.py @@ -26,6 +26,9 @@ @contextlib.contextmanager def chdir(path): """Change the current working directory.""" + if isinstance(path, Path): + path = str(path) + cwd = os.getcwd() os.chdir(path) try: diff --git a/renku/service/.env-example b/renku/service/.env-example new file mode 100644 index 0000000000..45f635b6c3 --- /dev/null +++ b/renku/service/.env-example @@ -0,0 +1,7 @@ +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DATABASE=0 +REDIS_PASSWORD= + +CACHE_DIR= +PROJECT_CLONE_DEPTH_DEFAULT=1 diff --git a/renku/service/__init__.py b/renku/service/__init__.py new file mode 100644 index 0000000000..1928b35350 --- /dev/null +++ b/renku/service/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service.""" diff --git a/renku/service/cache/__init__.py b/renku/service/cache/__init__.py new file mode 100644 index 0000000000..ca18ae78ec --- /dev/null +++ b/renku/service/cache/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache management for files.""" +from renku.service.cache.files import FileManagementCache +from renku.service.cache.projects import ProjectManagementCache + + +class ServiceCache(FileManagementCache, ProjectManagementCache): + """Service cache manager.""" + + pass diff --git a/renku/service/cache/base.py b/renku/service/cache/base.py new file mode 100644 index 0000000000..c38f3f8d79 --- /dev/null +++ b/renku/service/cache/base.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache management.""" +import json + +import redis +from redis import RedisError + +from renku.service.cache.config import REDIS_DATABASE, REDIS_HOST, \ + REDIS_PASSWORD, REDIS_PORT + + +class BaseCache: + """Cache management.""" + + cache = redis.Redis( + host=REDIS_HOST, + port=REDIS_PORT, + db=REDIS_DATABASE, + password=REDIS_PASSWORD + ) + + def set_record(self, name, key, value): + """Insert a record to hash set.""" + if isinstance(value, dict): + value = json.dumps(value) + + self.cache.hset(name, key, value) + + def invalidate_key(self, name, key): + """Invalidate cache `key` in users hash set.""" + try: + self.cache.hdel(name, key) + except RedisError: + pass + + def get_record(self, name, key): + """Return record values from hash set.""" + result = self.cache.hget(name, key) + if result: + return json.loads(result.decode('utf-8')) + + def get_all_records(self, name): + """Return all record values from hash set.""" + return [ + json.loads(record.decode('utf-8')) + for record in self.cache.hgetall(name).values() + ] diff --git a/renku/service/cache/config.py b/renku/service/cache/config.py new file mode 100644 index 0000000000..7afb2d6b68 --- /dev/null +++ b/renku/service/cache/config.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache configuration.""" +import os + +REDIS_HOST = os.getenv('REDIS_HOST', '0.0.0.0') +REDIS_PORT = int(os.getenv('REDIS_PORT', 6379)) +REDIS_DATABASE = int(os.getenv('REDIS_DATABASE', 0)) +REDIS_PASSWORD = os.getenv('REDIS_PASSWORD') diff --git a/renku/service/cache/files.py b/renku/service/cache/files.py new file mode 100644 index 0000000000..cb344e1ed5 --- /dev/null +++ b/renku/service/cache/files.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service files cache management.""" +from renku.service.cache.base import BaseCache + + +class FileManagementCache(BaseCache): + """File management cache.""" + + FILES_SUFFIX = 'files' + + def files_cache_key(self, user): + """Construct cache key based on user and files suffix.""" + return '{0}_{1}'.format(user['user_id'], self.FILES_SUFFIX) + + def set_file(self, user, file_id, metadata): + """Cache file metadata under user hash set.""" + self.set_record(self.files_cache_key(user), file_id, metadata) + + def set_files(self, user, files): + """Cache a list of metadata files under user hash set.""" + for file_ in files: + self.set_file(user, file_['file_id'], file_) + + def get_files(self, user): + """Get all user cached files.""" + return self.get_all_records(self.files_cache_key(user)) + + def get_file(self, user, file_id): + """Get user cached file.""" + result = self.get_record(self.files_cache_key(user), file_id) + return result + + def invalidate_file(self, user, file_id): + """Remove file record from hash set.""" + self.invalidate_key(self.files_cache_key(user), file_id) diff --git a/renku/service/cache/projects.py b/renku/service/cache/projects.py new file mode 100644 index 0000000000..ebd092cc4e --- /dev/null +++ b/renku/service/cache/projects.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service project cache management.""" +from renku.service.cache.base import BaseCache + + +class ProjectManagementCache(BaseCache): + """Project management cache.""" + + PROJECTS_SUFFIX = 'projects' + + def projects_cache_key(self, user): + """Construct cache key based on user and projects suffix.""" + return '{0}_{1}'.format(user['user_id'], self.PROJECTS_SUFFIX) + + def set_project(self, user, project_id, metadata): + """Cache project metadata under user hash set.""" + self.set_record(self.projects_cache_key(user), project_id, metadata) + + def get_projects(self, user): + """Get all user cache projects.""" + return self.get_all_records(self.projects_cache_key(user)) + + def get_project(self, user, project_id): + """Get user cached project.""" + result = self.get_record(self.projects_cache_key(user), project_id) + return result + + def invalidate_project(self, user, project_id): + """Remove project record from hash set.""" + self.invalidate_key(self.projects_cache_key(user), project_id) diff --git a/renku/service/config.py b/renku/service/config.py new file mode 100644 index 0000000000..774f2c775a --- /dev/null +++ b/renku/service/config.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service config.""" +import os +import tempfile +from pathlib import Path + +GIT_ACCESS_DENIED_ERROR_CODE = -32000 +GIT_UNKNOWN_ERROR_CODE = -32001 + +RENKU_EXCEPTION_ERROR_CODE = -32100 +REDIS_EXCEPTION_ERROR_CODE = -32200 + +INVALID_HEADERS_ERROR_CODE = -32601 +INVALID_PARAMS_ERROR_CODE = -32602 +INTERNAL_FAILURE_ERROR_CODE = -32603 + +SERVICE_NAME = 'Renku Service' +OPENAPI_VERSION = '2.0' +API_VERSION = 'v1' + +SWAGGER_URL = '/api/docs' +API_SPEC_URL = os.getenv( + 'RENKU_SVC_SWAGGER_URL', '/api/{0}/spec'.format(API_VERSION) +) + +PROJECT_CLONE_DEPTH_DEFAULT = int(os.getenv('PROJECT_CLONE_DEPTH_DEFAULT', 1)) + +CACHE_DIR = os.getenv('CACHE_DIR', tempfile.TemporaryDirectory().name) +CACHE_UPLOADS_PATH = Path(CACHE_DIR) / Path('uploads') +CACHE_UPLOADS_PATH.mkdir(parents=True, exist_ok=True) + +CACHE_PROJECTS_PATH = Path(CACHE_DIR) / Path('projects') +CACHE_PROJECTS_PATH.mkdir(parents=True, exist_ok=True) + +TAR_ARCHIVE_CONTENT_TYPE = 'application/x-tar' +ZIP_ARCHIVE_CONTENT_TYPE = 'application/zip' + +SUPPORTED_ARCHIVES = [ + TAR_ARCHIVE_CONTENT_TYPE, + ZIP_ARCHIVE_CONTENT_TYPE, +] diff --git a/renku/service/entrypoint.py b/renku/service/entrypoint.py new file mode 100644 index 0000000000..7f645c9e90 --- /dev/null +++ b/renku/service/entrypoint.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service entry point.""" +import os +import uuid + +from apispec import APISpec +from apispec.ext.marshmallow import MarshmallowPlugin +from flask import Flask +from flask_apispec import FlaskApiSpec +from flask_swagger_ui import get_swaggerui_blueprint + +from renku.service.cache import ServiceCache +from renku.service.config import API_SPEC_URL, API_VERSION, CACHE_DIR, \ + CACHE_PROJECTS_PATH, CACHE_UPLOADS_PATH, OPENAPI_VERSION, SERVICE_NAME, \ + SWAGGER_URL +from renku.service.views.cache import CACHE_BLUEPRINT_TAG, cache_blueprint, \ + list_projects_view, list_uploaded_files_view, project_clone, \ + upload_file_view +from renku.service.views.datasets import DATASET_BLUEPRINT_TAG, \ + add_file_to_dataset_view, create_dataset_view, dataset_blueprint, \ + list_dataset_files_view, list_datasets_view + + +def make_cache(): + """Create cache structure.""" + sub_dirs = [CACHE_UPLOADS_PATH, CACHE_PROJECTS_PATH] + + for subdir in sub_dirs: + if not subdir.exists(): + subdir.mkdir() + + return ServiceCache() + + +def create_app(): + """Creates a Flask app with necessary configuration.""" + app = Flask(__name__) + app.secret_key = os.getenv('RENKU_SVC_SERVICE_KEY', uuid.uuid4().hex) + + app.config['UPLOAD_FOLDER'] = CACHE_DIR + app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 + + cache = make_cache() + app.config['cache'] = cache + + build_routes(app) + return app + + +def build_routes(app): + """Register routes to given app instance.""" + app.config.update({ + 'APISPEC_SPEC': + APISpec( + title=SERVICE_NAME, + openapi_version=OPENAPI_VERSION, + version=API_VERSION, + plugins=[MarshmallowPlugin()], + ), + 'APISPEC_SWAGGER_URL': API_SPEC_URL, + }) + app.register_blueprint(cache_blueprint) + app.register_blueprint(dataset_blueprint) + + swaggerui_blueprint = get_swaggerui_blueprint( + SWAGGER_URL, API_SPEC_URL, config={'app_name': 'Renku Service'} + ) + app.register_blueprint(swaggerui_blueprint, url_prefix=SWAGGER_URL) + + docs = FlaskApiSpec(app) + + docs.register(upload_file_view, blueprint=CACHE_BLUEPRINT_TAG) + docs.register(list_uploaded_files_view, blueprint=CACHE_BLUEPRINT_TAG) + docs.register(project_clone, blueprint=CACHE_BLUEPRINT_TAG) + docs.register(list_projects_view, blueprint=CACHE_BLUEPRINT_TAG) + + docs.register(create_dataset_view, blueprint=DATASET_BLUEPRINT_TAG) + docs.register(add_file_to_dataset_view, blueprint=DATASET_BLUEPRINT_TAG) + docs.register(list_datasets_view, blueprint=DATASET_BLUEPRINT_TAG) + docs.register(list_dataset_files_view, blueprint=DATASET_BLUEPRINT_TAG) + + +app = create_app() + +if __name__ == '__main__': + app.run() diff --git a/renku/service/serializers/__init__.py b/renku/service/serializers/__init__.py new file mode 100644 index 0000000000..362f6221d1 --- /dev/null +++ b/renku/service/serializers/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service serializers.""" diff --git a/renku/service/serializers/cache.py b/renku/service/serializers/cache.py new file mode 100644 index 0000000000..c42415245e --- /dev/null +++ b/renku/service/serializers/cache.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache serializers.""" +import time +import uuid +from urllib.parse import urlparse + +from marshmallow import Schema, ValidationError, fields, post_load, pre_load, \ + validates +from werkzeug.utils import secure_filename + +from renku.core.errors import ConfigurationError +from renku.core.models.git import GitURL +from renku.service.config import PROJECT_CLONE_DEPTH_DEFAULT +from renku.service.serializers.rpc import JsonRPCResponse + + +def extract_file(request): + """Extract file from Flask request. + + :raises: `ValidationError` + """ + files = request.files + if 'file' not in files: + raise ValidationError('missing key: file') + + file = files['file'] + if file and not file.filename: + raise ValidationError('wrong filename: {0}'.format(file.filename)) + + if file: + file.filename = secure_filename(file.filename) + return file + + +class FileUploadRequest(Schema): + """Request schema for file upload.""" + + override_existing = fields.Boolean(missing=False) + unpack_archive = fields.Boolean(missing=False) + + +class FileUploadContext(Schema): + """Context schema for file upload.""" + + file_id = fields.String(missing=lambda: uuid.uuid4().hex) + + # measured in ms + timestamp = fields.Integer(missing=time.time() * 1e+3) + + content_type = fields.String(missing='unknown') + file_name = fields.String(required=True) + + # measured in bytes (comes from stat() - st_size) + file_size = fields.Integer(required=True) + + relative_path = fields.String(required=True) + is_archive = fields.Boolean(missing=False) + unpack_archive = fields.Boolean(missing=False) + + +class FileUploadResponse(Schema): + """Response schema for file upload.""" + + files = fields.List(fields.Nested(FileUploadContext), required=True) + + +class FileUploadResponseRPC(JsonRPCResponse): + """RPC response schema for file upload response.""" + + result = fields.Nested(FileUploadResponse) + + +class FileListResponse(Schema): + """Response schema for files listing.""" + + files = fields.List(fields.Nested(FileUploadContext), required=True) + + +class FileListResponseRPC(JsonRPCResponse): + """RPC response schema for files listing.""" + + result = fields.Nested(FileListResponse) + + +class ProjectCloneRequest(Schema): + """Request schema for project clone.""" + + git_url = fields.String(required=True) + depth = fields.Integer(missing=PROJECT_CLONE_DEPTH_DEFAULT) + + +class ProjectCloneContext(ProjectCloneRequest): + """Context schema for project clone.""" + + project_id = fields.String(missing=lambda: uuid.uuid4().hex) + name = fields.String(required=True) + fullname = fields.String(required=True) + email = fields.String(required=True) + owner = fields.String(required=True) + token = fields.String(required=True) + + @validates('git_url') + def validate_git_url(self, value): + """Validates git url.""" + try: + GitURL.parse(value) + except ConfigurationError as e: + raise ValidationError(str(e)) + + return value + + @post_load() + def format_url(self, data, **kwargs): + """Format URL with username and password.""" + git_url = urlparse(data['git_url']) + + url = 'oauth2:{0}@{1}'.format(data['token'], git_url.netloc) + data['url_with_auth'] = git_url._replace(netloc=url).geturl() + + return data + + @pre_load() + def set_owner_name(self, data, **kwargs): + """Set owner and name fields.""" + git_url = GitURL.parse(data['git_url']) + + data['owner'] = git_url.owner + data['name'] = git_url.name + + return data + + +class ProjectCloneResponse(Schema): + """Response schema for project clone.""" + + project_id = fields.String(required=True) + git_url = fields.String(required=True) + + +class ProjectCloneResponseRPC(JsonRPCResponse): + """RPC response schema for project clone response.""" + + result = fields.Nested(ProjectCloneResponse) + + +class ProjectListResponse(Schema): + """Response schema for project listing.""" + + projects = fields.List(fields.Nested(ProjectCloneResponse), required=True) + + +class ProjectListResponseRPC(JsonRPCResponse): + """RPC response schema for project listing.""" + + result = fields.Nested(ProjectListResponse) diff --git a/renku/service/serializers/datasets.py b/renku/service/serializers/datasets.py new file mode 100644 index 0000000000..e752e1c101 --- /dev/null +++ b/renku/service/serializers/datasets.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service datasets serializers.""" +from marshmallow import Schema, fields, post_load, pre_load + +from renku.service.serializers.rpc import JsonRPCResponse + + +class DatasetAuthors(Schema): + """Schema for the dataset authors.""" + + name = fields.String(required=True) + affiliation = fields.String() + + +class DatasetCreateRequest(Schema): + """Request schema for dataset create view.""" + + authors = fields.List(fields.Nested(DatasetAuthors)) + commit_message = fields.String() + dataset_name = fields.String(required=True) + description = fields.String() + project_id = fields.String(required=True) + + @pre_load() + def default_commit_message(self, data, **kwargs): + """Set default commit message.""" + if not data.get('commit_message'): + data['commit_message'] = 'service: dataset create {0}'.format( + data['dataset_name'] + ) + + return data + + +class DatasetCreateResponse(Schema): + """Response schema for dataset create view.""" + + dataset_name = fields.String(required=True) + + +class DatasetCreateResponseRPC(JsonRPCResponse): + """RPC response schema for dataset create view.""" + + result = fields.Nested(DatasetCreateResponse) + + +class DatasetAddFile(Schema): + """Schema for dataset add file view.""" + + file_id = fields.String(required=True) + + +class DatasetAddRequest(Schema): + """Request schema for dataset add file view.""" + + commit_message = fields.String() + dataset_name = fields.String(required=True) + create_dataset = fields.Boolean(missing=False) + project_id = fields.String(required=True) + files = fields.List(fields.Nested(DatasetAddFile), required=True) + + @post_load() + def default_commit_message(self, data, **kwargs): + """Set default commit message.""" + if not data.get('commit_message'): + data['commit_message'] = 'service: dataset add {0}'.format( + data['dataset_name'] + ) + + return data + + +class DatasetAddResponse(Schema): + """Response schema for dataset add file view.""" + + dataset_name = fields.String(required=True) + project_id = fields.String(required=True) + files = fields.List(fields.Nested(DatasetAddFile), required=True) + + +class DatasetAddResponseRPC(JsonRPCResponse): + """RPC schema for dataset add.""" + + result = fields.Nested(DatasetAddResponse) + + +class DatasetListRequest(Schema): + """Request schema for dataset list view.""" + + project_id = fields.String(required=True) + + +class DatasetDetails(Schema): + """Serialize dataset to response object.""" + + identifier = fields.String(required=True) + name = fields.String(required=True) + version = fields.String(allow_none=True) + created = fields.String(allow_none=True) + + +class DatasetListResponse(Schema): + """Response schema for dataset list view.""" + + datasets = fields.List(fields.Nested(DatasetDetails), required=True) + + +class DatasetListResponseRPC(JsonRPCResponse): + """RPC response schema for dataset list view.""" + + result = fields.Nested(DatasetListResponse) + + +class DatasetFilesListRequest(Schema): + """Request schema for dataset files list view.""" + + project_id = fields.String(required=True) + dataset_name = fields.String(required=True) + + +class DatasetFileDetails(Schema): + """Serialzie dataset files to response object.""" + + name = fields.String(required=True) + + +class DatasetFilesListResponse(Schema): + """Response schema for dataset files list view.""" + + dataset_name = fields.String(required=True) + files = fields.List(fields.Nested(DatasetFileDetails), required=True) + + +class DatasetFilesListResponseRPC(JsonRPCResponse): + """RPC schema for dataset files list view.""" + + result = fields.Nested(DatasetFilesListResponse) diff --git a/renku/service/serializers/headers.py b/renku/service/serializers/headers.py new file mode 100644 index 0000000000..c67abdf179 --- /dev/null +++ b/renku/service/serializers/headers.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service headers serializers.""" +from marshmallow import Schema, ValidationError, fields, pre_load +from werkzeug.utils import secure_filename + + +class UserIdentityHeaders(Schema): + """User identity schema.""" + + user_id = fields.String(required=True, data_key='renku-user-id') + fullname = fields.String(data_key='renku-user-fullname') + email = fields.String(data_key='renku-user-email') + token = fields.String(data_key='authorization') + + def extract_token(self, data): + """Extract token.""" + value = data.get('authorization', '') + components = value.split(' ') + + rfc_compliant = value.lower().startswith('bearer') + rfc_compliant &= len(components) == 2 + + if not rfc_compliant: + raise ValidationError('authorization value contains invalid value') + + return components[-1] + + @pre_load() + def set_fields(self, data, **kwargs): + """Set fields for serialization.""" + expected_keys = [field.data_key for field in self.fields.values()] + + data = { + key.lower(): value + for key, value in data.items() if key.lower() in expected_keys + } + + if {'renku-user-id', 'authorization'}.issubset(set(data.keys())): + data['renku-user-id'] = secure_filename(data['renku-user-id']) + data['authorization'] = self.extract_token(data) + + return data diff --git a/renku/service/serializers/rpc.py b/renku/service/serializers/rpc.py new file mode 100644 index 0000000000..6512d26001 --- /dev/null +++ b/renku/service/serializers/rpc.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service JSON-RPC serializers.""" +from marshmallow import Schema, fields + + +class JsonRPCResponse(Schema): + """JsonRPC response schema.""" + + error = fields.Dict() diff --git a/renku/service/utils/__init__.py b/renku/service/utils/__init__.py new file mode 100644 index 0000000000..d731a1ba3e --- /dev/null +++ b/renku/service/utils/__init__.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service utility functions.""" +from git import Repo + +from renku.service.config import CACHE_PROJECTS_PATH, CACHE_UPLOADS_PATH + + +def make_project_path(user, project): + """Construct full path for cached project.""" + valid_user = user and 'user_id' in user + valid_project = project and 'owner' in project and 'name' in project + + if valid_user and valid_project: + return ( + CACHE_PROJECTS_PATH / user['user_id'] / project['owner'] / + project['name'] + ) + + +def make_file_path(user, cached_file): + """Construct full path for cache file.""" + valid_user = user and 'user_id' in user + valid_file = cached_file and 'file_name' in cached_file + + if valid_user and valid_file: + return ( + CACHE_UPLOADS_PATH / user['user_id'] / cached_file['relative_path'] + ) + + +def repo_sync(repo_path, remote_names=('origin', )): + """Sync the repo with the remotes.""" + repo = Repo(repo_path) + is_pushed = False + + for remote in repo.remotes: + if remote.name in remote_names: + remote.push() + is_pushed = True + + return is_pushed diff --git a/renku/service/views/__init__.py b/renku/service/views/__init__.py new file mode 100644 index 0000000000..fbe49ab1d7 --- /dev/null +++ b/renku/service/views/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service views.""" diff --git a/renku/service/views/cache.py b/renku/service/views/cache.py new file mode 100644 index 0000000000..34c2b19074 --- /dev/null +++ b/renku/service/views/cache.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache views.""" +import os +import shutil +from pathlib import Path + +import patoolib +from flask import Blueprint, jsonify, request +from flask_apispec import marshal_with, use_kwargs +from marshmallow import EXCLUDE +from patoolib.util import PatoolError + +from renku.core.commands.clone import renku_clone +from renku.service.config import CACHE_UPLOADS_PATH, \ + INVALID_PARAMS_ERROR_CODE, SUPPORTED_ARCHIVES +from renku.service.serializers.cache import FileListResponse, \ + FileListResponseRPC, FileUploadContext, FileUploadRequest, \ + FileUploadResponse, FileUploadResponseRPC, ProjectCloneContext, \ + ProjectCloneRequest, ProjectCloneResponse, ProjectCloneResponseRPC, \ + ProjectListResponse, ProjectListResponseRPC, extract_file +from renku.service.utils import make_file_path, make_project_path +from renku.service.views.decorators import accepts_json, handle_base_except, \ + handle_git_except, handle_renku_except, handle_validation_except, \ + header_doc, requires_cache, requires_identity + +CACHE_BLUEPRINT_TAG = 'cache' +cache_blueprint = Blueprint('cache', __name__) + + +@marshal_with(FileListResponseRPC) +@header_doc(description='List uploaded files.', tags=(CACHE_BLUEPRINT_TAG, )) +@cache_blueprint.route( + '/cache.files_list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_uploaded_files_view(user, cache): + """List uploaded files ready to be added to projects.""" + files = [ + f for f in cache.get_files(user) if make_file_path(user, f).exists() + ] + + response = FileListResponseRPC().load({ + 'result': FileListResponse().load({'files': files}) + }) + return jsonify(response) + + +@use_kwargs(FileUploadRequest) +@marshal_with(FileUploadResponseRPC) +@header_doc( + description='Upload file or archive of files.', + tags=(CACHE_BLUEPRINT_TAG, ), +) +@cache_blueprint.route( + '/cache.files_upload', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def upload_file_view(user, cache): + """Upload file or archive of files.""" + file = extract_file(request) + + response_builder = { + 'file_name': file.filename, + 'content_type': file.content_type, + 'is_archive': file.content_type in SUPPORTED_ARCHIVES + } + response_builder.update(FileUploadRequest().load(request.args)) + + user_cache_dir = CACHE_UPLOADS_PATH / user['user_id'] + user_cache_dir.mkdir(exist_ok=True) + + file_path = user_cache_dir / file.filename + if file_path.exists(): + if response_builder.get('override_existing', False): + file_path.unlink() + else: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'file exists', + } + ) + + file.save(str(file_path)) + + files = [] + if response_builder['unpack_archive'] and response_builder['is_archive']: + unpack_dir = '{0}.unpacked'.format(file_path.name) + temp_dir = file_path.parent / Path(unpack_dir) + if temp_dir.exists(): + shutil.rmtree(str(temp_dir)) + temp_dir.mkdir(exist_ok=True) + + try: + patoolib.extract_archive(str(file_path), outdir=str(temp_dir)) + except PatoolError: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'unable to unpack archive' + } + ) + + for file_ in temp_dir.glob('**/*'): + file_obj = { + 'file_name': file_.name, + 'file_size': os.stat(str(file_path)).st_size, + 'relative_path': + str( + file_.relative_to( + CACHE_UPLOADS_PATH / user['user_id'] + ) + ) + } + + files.append(FileUploadContext().load(file_obj, unknown=EXCLUDE)) + + else: + response_builder['file_size'] = os.stat(str(file_path)).st_size + response_builder['relative_path'] = str( + file_path.relative_to(CACHE_UPLOADS_PATH / user['user_id']) + ) + + files.append( + FileUploadContext().load(response_builder, unknown=EXCLUDE) + ) + + response = FileUploadResponseRPC().load({ + 'result': FileUploadResponse().load({'files': files}) + }) + cache.set_files(user, files) + + return jsonify(response) + + +@use_kwargs(ProjectCloneRequest) +@marshal_with(ProjectCloneResponseRPC) +@header_doc( + 'Clone a remote project. If the project is cached already, ' + 'new clone operation will override the old cache state.', + tags=(CACHE_BLUEPRINT_TAG, ) +) +@cache_blueprint.route( + '/cache.project_clone', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +@accepts_json +def project_clone(user, cache): + """Clone a remote repository.""" + ctx = ProjectCloneContext().load( + (lambda a, b: a.update(b) or a)(request.json, user), + unknown=EXCLUDE, + ) + + local_path = make_project_path(user, ctx) + + if local_path.exists(): + shutil.rmtree(str(local_path)) + + for project in cache.get_projects(user): + if project['git_url'] == ctx['git_url']: + cache.invalidate_project(user, project['project_id']) + + local_path.mkdir(parents=True, exist_ok=True) + renku_clone( + ctx['url_with_auth'], + local_path, + depth=ctx['depth'], + raise_git_except=True, + config={ + 'user.name': ctx['fullname'], + 'user.email': ctx['email'], + } + ) + cache.set_project(user, ctx['project_id'], ctx) + + response = ProjectCloneResponseRPC().load({ + 'result': ProjectCloneResponse().load(ctx, unknown=EXCLUDE) + }) + return jsonify(response) + + +@marshal_with(ProjectListResponseRPC) +@header_doc( + 'List cached projects.', + tags=(CACHE_BLUEPRINT_TAG, ), +) +@cache_blueprint.route( + '/cache.project_list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_projects_view(user, cache): + """List cached projects.""" + projects = cache.get_projects(user) + projects = [ + ProjectCloneResponse().load(p, unknown=EXCLUDE) + for p in projects if make_project_path(user, p).exists() + ] + + response = ProjectListResponseRPC().load({ + 'result': ProjectListResponse().load({'projects': projects}) + }) + return jsonify(response) diff --git a/renku/service/views/datasets.py b/renku/service/views/datasets.py new file mode 100644 index 0000000000..38c9e5c3d1 --- /dev/null +++ b/renku/service/views/datasets.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service datasets view.""" +import json + +from flask import Blueprint, jsonify, request +from flask_apispec import marshal_with, use_kwargs +from marshmallow import EXCLUDE + +from renku.core.commands.dataset import add_file, create_dataset, \ + dataset_parent, list_files +from renku.core.utils.contexts import chdir +from renku.service.config import INTERNAL_FAILURE_ERROR_CODE, \ + INVALID_PARAMS_ERROR_CODE +from renku.service.serializers.datasets import DatasetAddRequest, \ + DatasetAddResponse, DatasetAddResponseRPC, DatasetCreateRequest, \ + DatasetCreateResponse, DatasetCreateResponseRPC, DatasetDetails, \ + DatasetFileDetails, DatasetFilesListRequest, DatasetFilesListResponse, \ + DatasetFilesListResponseRPC, DatasetListRequest, DatasetListResponse, \ + DatasetListResponseRPC +from renku.service.utils import make_file_path, make_project_path, repo_sync +from renku.service.views.decorators import accepts_json, handle_base_except, \ + handle_git_except, handle_renku_except, handle_validation_except, \ + header_doc, requires_cache, requires_identity + +DATASET_BLUEPRINT_TAG = 'datasets' +dataset_blueprint = Blueprint(DATASET_BLUEPRINT_TAG, __name__) + + +@use_kwargs(DatasetListRequest, locations=['query']) +@marshal_with(DatasetListResponseRPC) +@header_doc('List all datasets in project.', tags=(DATASET_BLUEPRINT_TAG, )) +@dataset_blueprint.route( + '/datasets.list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_datasets_view(user, cache): + """List all datasets in project.""" + req = DatasetListRequest().load(request.args) + project = cache.get_project(user, req['project_id']) + project_path = make_project_path(user, project) + + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'invalid project_id argument', + } + ) + + with chdir(project_path): + datasets = [ + DatasetDetails().load(ds, unknown=EXCLUDE) + # TODO: fix core interface to address this issue (add ticket ref) + for ds in json.loads(dataset_parent(None, 'data', 'json-ld')) + ] + + response = DatasetListResponse().load({'datasets': datasets}) + return jsonify(DatasetListResponseRPC().load({'result': response})) + + +@use_kwargs(DatasetFilesListRequest, locations=['query']) +@marshal_with(DatasetFilesListResponseRPC) +@header_doc('List files in a dataset.', tags=(DATASET_BLUEPRINT_TAG, )) +@dataset_blueprint.route( + '/datasets.files_list', + methods=['GET'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@requires_cache +@requires_identity +def list_dataset_files_view(user, cache): + """List files in a dataset.""" + ctx = DatasetFilesListRequest().load(request.args) + project = cache.get_project(user, ctx['project_id']) + project_path = make_project_path(user, project) + + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': 'invalid project_id argument', + } + ) + + with chdir(project_path): + dataset_files = json.loads( + # TODO: fix core interface to address this issue (add ticket ref) + list_files(ctx['dataset_name'], None, None, None, 'json-ld') + ) + ctx['files'] = [ + DatasetFileDetails().load(ds, unknown=EXCLUDE) + for ds in dataset_files + ] + + response = DatasetFilesListResponse().load(ctx, unknown=EXCLUDE) + return jsonify(DatasetFilesListResponseRPC().load({'result': response})) + + +@use_kwargs(DatasetAddRequest) +@marshal_with(DatasetAddResponseRPC) +@header_doc( + 'Add uploaded file to cloned repository.', tags=(DATASET_BLUEPRINT_TAG, ) +) +@dataset_blueprint.route( + '/datasets.add', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@accepts_json +@requires_cache +@requires_identity +def add_file_to_dataset_view(user, cache): + """Add uploaded file to cloned repository.""" + ctx = DatasetAddRequest().load(request.json) + project = cache.get_project(user, ctx['project_id']) + project_path = make_project_path(user, project) + + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'message': 'invalid project_id: {0}'.format(ctx['project_id']), + } + ) + + if not ctx['commit_message']: + ctx['commit_message'] = 'service: dataset add {0}'.format( + ctx['dataset_name'] + ) + + local_paths = [] + for file_ in ctx['files']: + file = cache.get_file(user, file_['file_id']) + local_path = make_file_path(user, file) + if not local_path or not local_path.exists(): + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'message': 'invalid file_id: {0}'.format(file_['file_id']) + } + ) + + ctx['commit_message'] += ' {0}'.format(local_path.name) + local_paths.append(str(local_path)) + + with chdir(project_path): + add_file( + local_paths, + ctx['dataset_name'], + create=ctx['create_dataset'], + commit_message=ctx['commit_message'] + ) + + if not repo_sync(project_path): + return jsonify( + error={ + 'code': INTERNAL_FAILURE_ERROR_CODE, + 'message': 'repo sync failed' + } + ) + + return jsonify( + DatasetAddResponseRPC().load({ + 'result': DatasetAddResponse().load(ctx, unknown=EXCLUDE) + }) + ) + + +@use_kwargs(DatasetCreateRequest) +@marshal_with(DatasetCreateResponseRPC) +@header_doc( + 'Create a new dataset in a project.', tags=(DATASET_BLUEPRINT_TAG, ) +) +@dataset_blueprint.route( + '/datasets.create', + methods=['POST'], + provide_automatic_options=False, +) +@handle_base_except +@handle_git_except +@handle_renku_except +@handle_validation_except +@accepts_json +@requires_cache +@requires_identity +def create_dataset_view(user, cache): + """Create a new dataset in a project.""" + ctx = DatasetCreateRequest().load(request.json) + project = cache.get_project(user, ctx['project_id']) + + project_path = make_project_path(user, project) + if not project_path: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'message': 'invalid project_id argument', + } + ) + + with chdir(project_path): + create_dataset( + ctx['dataset_name'], commit_message=ctx['commit_message'] + ) + + if not repo_sync(project_path): + return jsonify( + error={ + 'code': INTERNAL_FAILURE_ERROR_CODE, + 'reason': 'push to remote failed silently - try again' + } + ) + + return jsonify( + DatasetCreateResponseRPC().load({ + 'result': DatasetCreateResponse().load(ctx, unknown=EXCLUDE) + }) + ) diff --git a/renku/service/views/decorators.py b/renku/service/views/decorators.py new file mode 100644 index 0000000000..517574bedc --- /dev/null +++ b/renku/service/views/decorators.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service view decorators.""" +from functools import wraps + +from flask import current_app, jsonify, request +from flask_apispec import doc +from git import GitCommandError +from marshmallow import ValidationError +from redis import RedisError + +from renku.core.errors import RenkuException +from renku.service.config import GIT_ACCESS_DENIED_ERROR_CODE, \ + GIT_UNKNOWN_ERROR_CODE, INTERNAL_FAILURE_ERROR_CODE, \ + INVALID_HEADERS_ERROR_CODE, INVALID_PARAMS_ERROR_CODE, \ + REDIS_EXCEPTION_ERROR_CODE, RENKU_EXCEPTION_ERROR_CODE +from renku.service.serializers.headers import UserIdentityHeaders + + +def requires_identity(f): + """Wrapper which indicates that route requires user identification.""" + # noqa + @wraps(f) + def decorated_function(*args, **kws): + """Represents decorated function.""" + try: + user = UserIdentityHeaders().load(request.headers) + except (ValidationError, KeyError): + err_message = 'user identification is incorrect or missing' + return jsonify( + error={ + 'code': INVALID_HEADERS_ERROR_CODE, + 'reason': err_message + } + ) + + return f(user, *args, **kws) + + return decorated_function + + +def handle_redis_except(f): + """Wrapper which handles Redis exceptions.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except (RedisError, OSError) as e: + error_code = REDIS_EXCEPTION_ERROR_CODE + + return jsonify(error={ + 'code': error_code, + 'reason': e.messages, + }) + + return decorated_function + + +@handle_redis_except +def requires_cache(f): + """Wrapper which injects cache object into view.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + return f(current_app.config.get('cache'), *args, **kwargs) + + return decorated_function + + +def handle_validation_except(f): + """Wrapper which handles marshmallow `ValidationError`.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except ValidationError as e: + return jsonify( + error={ + 'code': INVALID_PARAMS_ERROR_CODE, + 'reason': e.messages, + } + ) + + return decorated_function + + +def handle_renku_except(f): + """Wrapper which handles `RenkuException`.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except RenkuException as e: + return jsonify( + error={ + 'code': RENKU_EXCEPTION_ERROR_CODE, + 'reason': str(e), + } + ) + + return decorated_function + + +def handle_git_except(f): + """Wrapper which handles `RenkuException`.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except GitCommandError as e: + + error_code = GIT_ACCESS_DENIED_ERROR_CODE \ + if 'Access denied' in e.stderr else GIT_UNKNOWN_ERROR_CODE + + return jsonify( + error={ + 'code': error_code, + 'reason': + 'git error: {0}'. + format(' '.join(e.stderr.strip().split('\n'))), + } + ) + + return decorated_function + + +def accepts_json(f): + """Wrapper which ensures only JSON payload can be in request.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + if 'Content-Type' not in request.headers: + return jsonify( + error={ + 'code': INVALID_HEADERS_ERROR_CODE, + 'reason': 'invalid request headers' + } + ) + + header_check = request.headers['Content-Type'] == 'application/json' + + if not request.is_json or not header_check: + return jsonify( + error={ + 'code': INVALID_HEADERS_ERROR_CODE, + 'reason': 'invalid request payload' + } + ) + + return f(*args, **kwargs) + + return decorated_function + + +def handle_base_except(f): + """Wrapper which handles base exceptions.""" + # noqa + @wraps(f) + def decorated_function(*args, **kwargs): + """Represents decorated function.""" + try: + return f(*args, **kwargs) + except (Exception, BaseException, OSError) as e: + error_code = INTERNAL_FAILURE_ERROR_CODE + + return jsonify( + error={ + 'code': error_code, + 'reason': + 'internal error: {0}'. + format(' '.join(e.stderr.strip().split('\n'))), + } + ) + + return decorated_function + + +def header_doc(description, tags=()): + """Wrap additional OpenAPI header description for an endpoint.""" + return doc( + description=description, + params={ + 'Authorization': { + 'description': ( + 'Used for users git oauth2 access. ' + 'For example: ' + '```Authorization: Bearer asdf-qwer-zxcv```' + ), + 'in': 'header', + 'type': 'string', + 'required': True + }, + 'Renku-User-Id': { + 'description': ( + 'Used for identification of the users. ' + 'For example: ' + '```Renku-User-Id: sasdsa-sadsd-gsdsdh-gfdgdsd```' + ), + 'in': 'header', + 'type': 'string', + 'required': True + }, + 'Renku-User-FullName': { + 'description': ( + 'Used for commit author signature. ' + 'For example: ' + '```Renku-User-FullName: Rok Roskar```' + ), + 'in': 'header', + 'type': 'string', + 'required': True + }, + 'Renku-User-Email': { + 'description': ( + 'Used for commit author signature. ' + 'For example: ' + '```Renku-User-Email: dev@renkulab.io```' + ), + 'in': 'header', + 'type': 'string', + 'required': True + }, + }, + tags=list(tags), + ) diff --git a/setup.py b/setup.py index 9d9c6e2e21..83dac542b9 100644 --- a/setup.py +++ b/setup.py @@ -27,9 +27,11 @@ tests_require = [ 'check-manifest>=0.37', 'coverage>=4.5.3', + 'fakeredis==1.1.0', 'flake8>=3.5', 'freezegun>=0.3.12', 'isort==4.3.4', + 'six>=1.13.0', 'pydocstyle>=3.0.0', 'pytest-cache>=1.0', 'pytest-cov>=2.5.1', @@ -73,6 +75,7 @@ install_requires = [ 'appdirs>=1.4.3', + 'apispec==3.0.0', 'attrs>=18.2.0', 'click-completion>=0.5.0', 'click>=7.0', @@ -80,6 +83,9 @@ 'cwltool==1.0.20191206125148', 'environ_config>=18.2.0', 'filelock>=3.0.0', + 'flask==1.1.1', + 'flask-apispec==0.8.3', + 'flask-swagger-ui==3.20.9', 'gitpython==3.0.3', 'patool>=1.12', 'psutil>=5.4.7', @@ -90,10 +96,12 @@ 'pyshacl>=0.11.3.post1', 'python-dateutil>=2.6.1', 'python-editor>=1.0.4', + 'redis==3.3.11', 'rdflib==4.2.2', 'rdflib-jsonld>=0.4.0', 'requests>=2.21.0', 'ndg-httpsclient>=0.5.1', + 'marshmallow==3.2.2', 'idna>=2.8', 'setuptools_scm>=3.1.0', 'tabulate>=0.7.7', diff --git a/tests/cli/test_datasets.py b/tests/cli/test_datasets.py index 31d34d57af..3b45096bbd 100644 --- a/tests/cli/test_datasets.py +++ b/tests/cli/test_datasets.py @@ -1296,8 +1296,8 @@ def test_avoid_empty_commits(runner, client, directory_tree): def test_add_removes_credentials(runner, client): """Test credentials are removed when adding to a dataset.""" - URL = 'https://username:password@example.com/index.html' - result = runner.invoke(cli, ['dataset', 'add', '-c', 'my-dataset', URL]) + url = 'https://username:password@example.com/index.html' + result = runner.invoke(cli, ['dataset', 'add', '-c', 'my-dataset', url]) assert 0 == result.exit_code with client.with_dataset('my-dataset') as dataset: diff --git a/tests/cli/test_integration_datasets.py b/tests/cli/test_integration_datasets.py index 8743d811fd..f913950c62 100644 --- a/tests/cli/test_integration_datasets.py +++ b/tests/cli/test_integration_datasets.py @@ -24,6 +24,8 @@ import pytest from renku.cli import cli +from renku.core.commands.clone import renku_clone +from renku.core.utils.contexts import chdir @pytest.mark.parametrize( @@ -928,6 +930,29 @@ def test_renku_clone(runner, monkeypatch): assert 1 == result.exit_code +@pytest.mark.integration +def test_renku_clone_with_config(tmpdir): + """Test cloning of a Renku repo and existence of required settings.""" + REMOTE = 'https://dev.renku.ch/gitlab/virginiafriedrich/datasets-test.git' + + with chdir(tmpdir): + renku_clone( + REMOTE, + config={ + 'user.name': 'sam', + 'user.email': 's@m.i', + 'filter.lfs.custom': '0' + } + ) + + repo = git.Repo('datasets-test') + reader = repo.config_reader() + reader.values() + + lfs_config = dict(reader.items('filter.lfs')) + assert '0' == lfs_config.get('custom') + + @pytest.mark.integration @pytest.mark.parametrize( 'path,expected_path', [('', 'datasets-test'), ('.', '.'), diff --git a/tests/service/test_cache_views.py b/tests/service/test_cache_views.py new file mode 100644 index 0000000000..7735758aad --- /dev/null +++ b/tests/service/test_cache_views.py @@ -0,0 +1,601 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service cache view tests.""" +import io +import json +import os +import uuid + +import pytest + +from renku.core.models.git import GitURL +from renku.service.config import INVALID_HEADERS_ERROR_CODE, \ + INVALID_PARAMS_ERROR_CODE + +REMOTE_URL = 'https://dev.renku.ch/gitlab/contact/integration-tests' +IT_GIT_ACCESS_TOKEN = os.getenv('IT_OAUTH_GIT_TOKEN') + + +@pytest.mark.service +def test_serve_api_spec(svc_client): + """Check serving of service spec.""" + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + response = svc_client.get('/api/v1/spec', headers=headers) + + assert 0 != len(response.json.keys()) + assert 200 == response.status_code + + +@pytest.mark.service +def test_list_upload_files_all(svc_client): + """Check list uploaded files view.""" + headers_user = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Renku-User-Id': 'user' + } + response = svc_client.get('/cache.files_list', headers=headers_user) + + assert {'result'} == set(response.json.keys()) + + assert 0 == len(response.json['result']['files']) + assert 200 == response.status_code + + +@pytest.mark.service +def test_list_upload_files_all_no_auth(svc_client): + """Check error response on list uploaded files view.""" + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + } + response = svc_client.get( + '/cache.files_list', + headers=headers, + ) + + assert 200 == response.status_code + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + +@pytest.mark.service +def test_file_upload(svc_client): + """Check successful file upload.""" + headers_user = {'Renku-User-Id': '{0}'.format(uuid.uuid4().hex)} + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + + +@pytest.mark.service +def test_file_upload_override(svc_client): + """Check successful file upload.""" + headers_user = {'Renku-User-Id': '{0}'.format(uuid.uuid4().hex)} + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + old_file_id = response.json['result']['files'][0]['file_id'] + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'error'} == set(response.json.keys()) + assert INVALID_PARAMS_ERROR_CODE == response.json['error']['code'] + assert 'file exists' == response.json['error']['reason'] + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + query_string={'override_existing': True}, + headers=headers_user, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + assert old_file_id != response.json['result']['files'][0]['file_id'] + + +@pytest.mark.service +def test_file_upload_same_file(svc_client): + """Check successful file upload with same file uploaded twice.""" + headers_user1 = {'Renku-User-Id': '{0}'.format(uuid.uuid4().hex)} + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user1, + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + + assert isinstance( + uuid.UUID(response.json['result']['files'][0]['file_id']), uuid.UUID + ) + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + headers=headers_user1, + ) + + assert response + assert 200 == response.status_code + assert {'error'} == set(response.json.keys()) + assert INVALID_PARAMS_ERROR_CODE == response.json['error']['code'] + assert 'file exists' == response.json['error']['reason'] + + +@pytest.mark.service +def test_file_upload_no_auth(svc_client): + """Check failed file upload.""" + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile.txt'), ), + ) + + assert response + assert 200 == response.status_code + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + +@pytest.mark.service +def test_file_upload_with_users(svc_client): + """Check successful file upload and listing based on user auth header.""" + headers_user1 = {'Renku-User-Id': '{0}'.format(uuid.uuid4().hex)} + headers_user2 = {'Renku-User-Id': '{0}'.format(uuid.uuid4().hex)} + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile1.txt'), ), + headers=headers_user1 + ) + + assert {'result'} == set(response.json.keys()) + + file_id = response.json['result']['files'][0]['file_id'] + assert file_id + assert 200 == response.status_code + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile1.txt'), ), + headers=headers_user2 + ) + + assert response + assert {'result'} == set(response.json.keys()) + + response = svc_client.get('/cache.files_list', headers=headers_user1) + + assert response + + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + + file = response.json['result']['files'][0] + assert file_id == file['file_id'] + assert 0 < file['file_size'] + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_no_auth(svc_client): + """Check error on cloning of remote repository.""" + payload = { + 'git_url': REMOTE_URL, + } + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload) + ) + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + err_message = 'user identification is incorrect or missing' + assert err_message == response.json['error']['reason'] + + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Renku-User-Id': '{0}'.format(uuid.uuid4().hex), + 'Renku-User-FullName': 'Just Sam', + 'Renku-User-Email': 'contact@justsam.io', + 'Authorization': 'Bearer notatoken', + } + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload), headers=headers + ) + + assert response + assert {'result'} == set(response.json.keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_with_auth(svc_client): + """Check cloning of remote repository.""" + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Renku-User-Id': '{0}'.format(uuid.uuid4().hex), + 'Renku-User-FullName': 'Just Sam', + 'Renku-User-Email': 'contact@justsam.io', + 'Authorization': 'Bearer {0}'.format(IT_GIT_ACCESS_TOKEN), + } + + payload = { + 'git_url': REMOTE_URL, + } + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload), headers=headers + ) + + assert response + assert {'result'} == set(response.json.keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_multiple(svc_client): + """Check multiple cloning of remote repository.""" + project_ids = [] + + headers = { + 'Content-Type': 'application/json', + 'Renku-User-Id': '{0}'.format(uuid.uuid4().hex), + 'Renku-User-FullName': 'Just Sam', + 'Renku-User-Email': 'contact@justsam.io', + 'Authorization': 'Bearer {0}'.format(IT_GIT_ACCESS_TOKEN), + } + + payload = { + 'git_url': REMOTE_URL, + } + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload), headers=headers + ) + + assert response + + assert {'result'} == set(response.json.keys()) + project_ids.append(response.json['result']) + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload), headers=headers + ) + + assert response + assert {'result'} == set(response.json.keys()) + project_ids.append(response.json['result']) + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload), headers=headers + ) + + assert response + assert {'result'} == set(response.json.keys()) + project_ids.append(response.json['result']) + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload), headers=headers + ) + + assert response + assert {'result'} == set(response.json.keys()) + last_pid = response.json['result']['project_id'] + + response = svc_client.get('/cache.project_list', headers=headers) + + assert response + assert {'result'} == set(response.json.keys()) + + pids = [p['project_id'] for p in response.json['result']['projects']] + assert last_pid in pids + + for inserted in project_ids: + assert inserted['project_id'] not in pids + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_list_view_errors(svc_client): + """Check cache state of cloned projects with no headers.""" + headers = { + 'Content-Type': 'application/json', + 'Renku-User-Id': '{0}'.format(uuid.uuid4().hex), + 'Renku-User-FullName': 'Just Sam', + 'Renku-User-Email': 'contact@justsam.io', + 'Authorization': 'Bearer {0}'.format(IT_GIT_ACCESS_TOKEN), + } + + payload = { + 'git_url': REMOTE_URL, + } + + response = svc_client.post( + '/cache.project_clone', data=json.dumps(payload), headers=headers + ) + assert response + assert {'result'} == set(response.json.keys()) + + assert isinstance( + uuid.UUID(response.json['result']['project_id']), uuid.UUID + ) + + response = svc_client.get( + '/cache.project_list', + # no auth headers, expected error + ) + assert response + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + response = svc_client.get('/cache.project_list', headers=headers) + assert response + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['projects']) + + project = response.json['result']['projects'][0] + assert isinstance(uuid.UUID(project['project_id']), uuid.UUID) + assert isinstance(GitURL.parse(project['git_url']), GitURL) + + +@pytest.mark.service +@pytest.mark.integration +def test_clone_projects_invalid_headers(svc_client): + """Check cache state of cloned projects with invalid headers.""" + headers = { + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Renku-User-Id': '{0}'.format(uuid.uuid4().hex), + 'Renku-User-FullName': 'Not Sam', + 'Renku-User-Email': 'not@sam.io', + 'Authorization': 'Bearer not-a-token', + } + + payload = { + 'git_url': REMOTE_URL, + } + + response = svc_client.post( + '/cache.project_clone', + data=json.dumps(payload), + headers=headers, + ) + assert response + assert {'result'} == set(response.json.keys()) + + response = svc_client.get( + '/cache.project_list', + # no auth headers, expected error + ) + + assert response + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + response = svc_client.get('/cache.project_list', headers=headers) + assert response + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['projects']) + + +@pytest.mark.service +def test_upload_zip_unpack_archive(datapack_zip, svc_client_with_repo): + """Upload zip archive with unpack.""" + svc_client, headers, project_id = svc_client_with_repo + headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict( + file=(io.BytesIO(datapack_zip.read_bytes()), datapack_zip.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers=headers + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 3 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert not file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_upload_zip_archive(datapack_zip, svc_client_with_repo): + """Upload zip archive.""" + svc_client, headers, project_id = svc_client_with_repo + headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict( + file=(io.BytesIO(datapack_zip.read_bytes()), datapack_zip.name), + ), + query_string={ + 'unpack_archive': False, + 'override_existing': True, + }, + headers=headers + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_upload_tar_unpack_archive(datapack_tar, svc_client_with_repo): + """Upload zip archive with unpack.""" + svc_client, headers, project_id = svc_client_with_repo + headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict( + file=(io.BytesIO(datapack_tar.read_bytes()), datapack_tar.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers=headers + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 3 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert not file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_upload_tar_archive(datapack_tar, svc_client_with_repo): + """Upload zip archive.""" + svc_client, headers, project_id = svc_client_with_repo + headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict( + file=(io.BytesIO(datapack_tar.read_bytes()), datapack_tar.name), + ), + query_string={ + 'unpack_archive': False, + 'override_existing': True, + }, + headers=headers + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert file_['is_archive'] + assert not file_['unpack_archive'] + + +@pytest.mark.service +def test_field_upload_resp_fields(datapack_tar, svc_client_with_repo): + """Check response fields.""" + svc_client, headers, project_id = svc_client_with_repo + headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict( + file=(io.BytesIO(datapack_tar.read_bytes()), datapack_tar.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers=headers + ) + + assert response + + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert 3 == len(response.json['result']['files']) + assert { + 'content_type', + 'file_id', + 'file_name', + 'file_size', + 'is_archive', + 'timestamp', + 'is_archive', + 'unpack_archive', + 'relative_path', + } == set(response.json['result']['files'][0].keys()) + + assert not response.json['result']['files'][0]['is_archive'] + assert not response.json['result']['files'][0]['unpack_archive'] + + rel_path = response.json['result']['files'][0]['relative_path'] + assert rel_path.startswith(datapack_tar.name) and 'unpacked' in rel_path diff --git a/tests/service/test_dataset_views.py b/tests/service/test_dataset_views.py new file mode 100644 index 0000000000..1864b7bfec --- /dev/null +++ b/tests/service/test_dataset_views.py @@ -0,0 +1,614 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service dataset view tests.""" +import io +import json +import uuid + +import pytest + +from renku.service.config import INVALID_HEADERS_ERROR_CODE, \ + INVALID_PARAMS_ERROR_CODE, RENKU_EXCEPTION_ERROR_CODE + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_view(svc_client_with_repo): + """Create new dataset successfully.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + } + + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_commit_msg(svc_client_with_repo): + """Create new dataset successfully with custom commit message.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + 'commit_message': 'my awesome dataset' + } + + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_view_dataset_exists(svc_client_with_repo): + """Create new dataset which already exists.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': 'mydataset', + } + + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + assert {'error'} == set(response.json.keys()) + + assert RENKU_EXCEPTION_ERROR_CODE == response.json['error']['code'] + assert 'Dataset exists' in response.json['error']['reason'] + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_view_unknown_param(svc_client_with_repo): + """Create new dataset by specifying unknown parameters.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': 'mydata', + 'remote_name': 'origin' + } + + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + assert {'error'} == set(response.json.keys()) + + assert INVALID_PARAMS_ERROR_CODE == response.json['error']['code'] + assert {'remote_name'} == set(response.json['error']['reason'].keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_create_dataset_with_no_identity(svc_client_with_repo): + """Create new dataset with no identification provided.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': 'mydata', + 'remote_name': 'origin', + } + + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers={'Content-Type': headers['Content-Type']} + # no user identity, expect error + ) + + assert response + assert {'error'} == response.json.keys() + + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + err_message = 'user identification is incorrect or missing' + assert err_message == response.json['error']['reason'] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_file_view_with_no_identity(svc_client_with_repo): + """Check identity error raise in dataset add.""" + svc_client, headers, project_id = svc_client_with_repo + payload = { + 'project_id': project_id, + 'dataset_name': 'mydata', + 'remote_name': 'origin', + } + + response = svc_client.post( + '/datasets.add', + data=json.dumps(payload), + headers={'Content-Type': headers['Content-Type']} + # no user identity, expect error + ) + assert response + + assert {'error'} == set(response.json.keys()) + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + err_message = 'user identification is incorrect or missing' + assert err_message == response.json['error']['reason'] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_file_view(svc_client_with_repo): + """Check adding of uploaded file to dataset.""" + svc_client, headers, project_id = svc_client_with_repo + content_type = headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile1.txt'), ), + query_string={'override_existing': True}, + headers=headers + ) + + assert response + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + + file_id = response.json['result']['files'][0]['file_id'] + assert isinstance(uuid.UUID(file_id), uuid.UUID) + + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + 'create_dataset': True, + 'files': [{ + 'file_id': file_id, + }, ] + } + headers['Content-Type'] = content_type + + response = svc_client.post( + '/datasets.add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'project_id', + 'files'} == set(response.json['result'].keys()) + + assert 1 == len(response.json['result']['files']) + assert file_id == response.json['result']['files'][0]['file_id'] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_file_commit_msg(svc_client_with_repo): + """Check adding of uploaded file to dataset with custom commit message.""" + svc_client, headers, project_id = svc_client_with_repo + content_type = headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), 'datafile1.txt'), ), + query_string={'override_existing': True}, + headers=headers + ) + + file_id = response.json['result']['files'][0]['file_id'] + assert isinstance(uuid.UUID(file_id), uuid.UUID) + + payload = { + 'commit_message': 'my awesome data file', + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + 'create_dataset': True, + 'files': [{ + 'file_id': file_id, + }, ] + } + headers['Content-Type'] = content_type + response = svc_client.post( + '/datasets.add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'project_id', + 'files'} == set(response.json['result'].keys()) + + assert 1 == len(response.json['result']['files']) + assert file_id == response.json['result']['files'][0]['file_id'] + + +@pytest.mark.service +@pytest.mark.integration +def test_list_datasets_view(svc_client_with_repo): + """Check listing of existing datasets.""" + svc_client, headers, project_id = svc_client_with_repo + + params = { + 'project_id': project_id, + } + + response = svc_client.get( + '/datasets.list', + query_string=params, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'datasets'} == set(response.json['result'].keys()) + assert 0 != len(response.json['result']['datasets']) + assert {'identifier', 'name', 'version', + 'created'} == set(response.json['result']['datasets'][0].keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_list_datasets_view_no_auth(svc_client_with_repo): + """Check listing of existing datasets with no auth.""" + svc_client, headers, project_id = svc_client_with_repo + + params = { + 'project_id': project_id, + } + + response = svc_client.get( + '/datasets.list', + query_string=params, + ) + + assert response + assert {'error'} == set(response.json.keys()) + + +@pytest.mark.service +@pytest.mark.integration +def test_create_and_list_datasets_view(svc_client_with_repo): + """Create and list created dataset.""" + svc_client, headers, project_id = svc_client_with_repo + + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + } + + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + params_list = { + 'project_id': project_id, + } + + response = svc_client.get( + '/datasets.list', + query_string=params_list, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'datasets'} == set(response.json['result'].keys()) + assert 0 != len(response.json['result']['datasets']) + assert {'identifier', 'name', 'version', + 'created'} == set(response.json['result']['datasets'][0].keys()) + + assert payload['dataset_name'] in [ + ds['name'] for ds in response.json['result']['datasets'] + ] + + +@pytest.mark.service +@pytest.mark.integration +def test_list_dataset_files(svc_client_with_repo): + """Check listing of dataset files""" + svc_client, headers, project_id = svc_client_with_repo + content_type = headers.pop('Content-Type') + + file_name = '{0}'.format(uuid.uuid4().hex) + response = svc_client.post( + '/cache.files_upload', + data=dict(file=(io.BytesIO(b'this is a test'), file_name), ), + query_string={'override_existing': True}, + headers=headers + ) + + assert response + assert 200 == response.status_code + + assert {'result'} == set(response.json.keys()) + assert 1 == len(response.json['result']['files']) + file_id = response.json['result']['files'][0]['file_id'] + assert isinstance(uuid.UUID(file_id), uuid.UUID) + + payload = { + 'project_id': project_id, + 'dataset_name': 'mydata', + 'files': [{ + 'file_id': file_id + }, ], + } + headers['Content-Type'] = content_type + + response = svc_client.post( + '/datasets.add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files', + 'project_id'} == set(response.json['result'].keys()) + assert file_id == response.json['result']['files'][0]['file_id'] + + params = { + 'project_id': project_id, + 'dataset_name': 'mydata', + } + + response = svc_client.get( + '/datasets.files_list', + query_string=params, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files'} == set(response.json['result'].keys()) + + assert params['dataset_name'] == response.json['result']['dataset_name'] + assert file_name in [ + file['name'] for file in response.json['result']['files'] + ] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_with_unpacked_archive(datapack_zip, svc_client_with_repo): + """Upload archive and add it to a dataset.""" + svc_client, headers, project_id = svc_client_with_repo + content_type = headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict( + file=(io.BytesIO(datapack_zip.read_bytes()), datapack_zip.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers=headers + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 3 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert not file_['is_archive'] + assert not file_['unpack_archive'] + + file_id = file_['file_id'] + assert file_id + + file_ = response.json['result']['files'][0] + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + } + + headers['Content-Type'] = content_type + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + payload = { + 'project_id': project_id, + 'dataset_name': 'mydata', + 'files': [{ + 'file_id': file_['file_id'] + }, ] + } + + response = svc_client.post( + '/datasets.add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files', + 'project_id'} == set(response.json['result'].keys()) + assert file_['file_id'] == response.json['result']['files'][0]['file_id'] + + params = { + 'project_id': project_id, + 'dataset_name': 'mydata', + } + + response = svc_client.get( + '/datasets.files_list', + query_string=params, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files'} == set(response.json['result'].keys()) + + assert params['dataset_name'] == response.json['result']['dataset_name'] + assert file_['file_name'] in [ + file['name'] for file in response.json['result']['files'] + ] + + +@pytest.mark.service +@pytest.mark.integration +def test_add_with_unpacked_archive_all(datapack_zip, svc_client_with_repo): + """Upload archive and add its contents to a dataset.""" + svc_client, headers, project_id = svc_client_with_repo + content_type = headers.pop('Content-Type') + + response = svc_client.post( + '/cache.files_upload', + data=dict( + file=(io.BytesIO(datapack_zip.read_bytes()), datapack_zip.name), + ), + query_string={ + 'unpack_archive': True, + 'override_existing': True, + }, + headers=headers + ) + + assert response + + assert 200 == response.status_code + assert {'result'} == set(response.json.keys()) + assert 3 == len(response.json['result']['files']) + + for file_ in response.json['result']['files']: + assert not file_['is_archive'] + assert not file_['unpack_archive'] + + file_id = file_['file_id'] + assert file_id + + files = [{ + 'file_id': file_['file_id'] + } for file_ in response.json['result']['files']] + + payload = { + 'project_id': project_id, + 'dataset_name': '{0}'.format(uuid.uuid4().hex), + } + + headers['Content-Type'] = content_type + response = svc_client.post( + '/datasets.create', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name'} == set(response.json['result'].keys()) + assert payload['dataset_name'] == response.json['result']['dataset_name'] + + payload = { + 'project_id': project_id, + 'dataset_name': payload['dataset_name'], + 'files': files, + } + + response = svc_client.post( + '/datasets.add', + data=json.dumps(payload), + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files', + 'project_id'} == set(response.json['result'].keys()) + assert files == response.json['result']['files'] + + params = { + 'project_id': project_id, + 'dataset_name': payload['dataset_name'], + } + + response = svc_client.get( + '/datasets.files_list', + query_string=params, + headers=headers, + ) + + assert response + + assert {'result'} == set(response.json.keys()) + assert {'dataset_name', 'files'} == set(response.json['result'].keys()) + + assert params['dataset_name'] == response.json['result']['dataset_name'] + assert file_['file_name'] in [ + file['name'] for file in response.json['result']['files'] + ] diff --git a/tests/service/test_exceptions.py b/tests/service/test_exceptions.py new file mode 100644 index 0000000000..d5bd5b4c1f --- /dev/null +++ b/tests/service/test_exceptions.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku service exception tests for all endpoints.""" +import pytest + +from renku.service.config import INVALID_HEADERS_ERROR_CODE + + +@pytest.mark.service +def test_allowed_methods_exc(service_allowed_endpoint): + """Check allowed methods for every endpoint.""" + methods, request, svc_client = service_allowed_endpoint + + method = request['allowed_method'] + if method == 'GET': # if GET remove sister method HEAD + methods.pop(method) + methods.pop('HEAD') + else: + methods.pop(method) + + for method, fn in methods.items(): + response = fn(request['url']) + assert 405 == response.status_code + + +@pytest.mark.service +def test_auth_headers_exc(service_allowed_endpoint): + """Check correct headers for every endpoint.""" + methods, request, svc_client = service_allowed_endpoint + + method = request['allowed_method'] + if method == 'GET': # if GET remove sister method HEAD + client_method = methods.pop(method) + methods.pop('HEAD') + else: + client_method = methods.pop(method) + + response = client_method( + request['url'], + headers=request['headers'], + ) + + assert 200 == response.status_code + assert INVALID_HEADERS_ERROR_CODE == response.json['error']['code'] + + err_message = 'user identification is incorrect or missing' + assert err_message == response.json['error']['reason']