diff --git a/.github/workflows/pr-tests-stack.yml b/.github/workflows/pr-tests-stack.yml index 1161909779f..35c4e40823a 100644 --- a/.github/workflows/pr-tests-stack.yml +++ b/.github/workflows/pr-tests-stack.yml @@ -620,6 +620,66 @@ jobs: if: steps.changes.outputs.syft == 'true' run: | tox -e migration.test + pr-tests-scenarios-migrations: + strategy: + max-parallel: 99 + matrix: + os: [ubuntu-latest] + python-version: ["3.12"] + + runs-on: ${{ matrix.os }} + steps: + - name: "clean .git/config" + if: matrix.os == 'windows-latest' + continue-on-error: true + shell: bash + run: | + echo "deleting ${GITHUB_WORKSPACE}/.git/config" + rm ${GITHUB_WORKSPACE}/.git/config + + - uses: actions/checkout@v4 + + - name: Check for file changes + uses: dorny/paths-filter@v3 + id: changes + with: + base: ${{ github.ref }} + token: ${{ github.token }} + filters: .github/file-filters.yml + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + if: steps.changes.outputs.syft == 'true' + with: + python-version: ${{ matrix.python-version }} + + - name: Install pip packages + if: steps.changes.outputs.syft == 'true' + run: | + python -m pip install --upgrade pip + pip install uv==0.4.1 tox==4.18.0 tox-uv==1.11.2 + uv --version + + - name: Get uv cache dir + id: pip-cache + if: steps.changes.outputs.syft == 'true' + shell: bash + run: | + echo "dir=$(uv cache dir)" >> $GITHUB_OUTPUT + + - name: Load github cache + uses: actions/cache@v4 + if: steps.changes.outputs.syft == 'true' + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-${{ hashFiles('setup.cfg') }} + restore-keys: | + ${{ runner.os }}-uv-py${{ matrix.python-version }}- + + - name: Run migration tests + if: steps.changes.outputs.syft == 'true' + run: | + tox -e migration.scenarios.test pr-tests-migrations-k8s: strategy: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 521e3f9b60c..646698814a0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: exclude: ^(packages/grid/frontend/|.vscode) - id: check-added-large-files always_run: true - exclude: ^(packages/grid/backend/wheels/.*|docs/img/header.png|docs/img/terminalizer.gif) + exclude: ^(packages/grid/backend/wheels/.*|docs/img/header.png|docs/img/terminalizer.gif|^notebooks/scenarios/bigquery/upgradability/sync/migration_.*\.blob) - id: check-yaml always_run: true exclude: ^(packages/grid/k8s/rendered/|packages/grid/helm/) diff --git a/notebooks/api/0.8/00-load-data.ipynb b/notebooks/api/0.8/00-load-data.ipynb index 8c3bb05b93b..372746f368a 100644 --- a/notebooks/api/0.8/00-load-data.ipynb +++ b/notebooks/api/0.8/00-load-data.ipynb @@ -711,6 +711,11 @@ } ], "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -721,7 +726,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.12.4" }, "toc": { "base_numbering": 1, diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/__init__.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/__init__.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/__init__.py new file mode 100644 index 00000000000..7231b580696 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/__init__.py @@ -0,0 +1,23 @@ +# stdlib +import os + +# syft absolute +from syft.util.util import str_to_bool + +# relative +from .submit_query import make_submit_query + +env_var = "TEST_BIGQUERY_APIS_LIVE" +use_live = str_to_bool(str(os.environ.get(env_var, "False"))) +env_name = "Live" if use_live else "Mock" +print(f"Using {env_name} API Code, this will query BigQuery. ${env_var}=={use_live}") + + +if use_live: + # relative + from .live.schema import make_schema + from .live.test_query import make_test_query +else: + # relative + from .mock.schema import make_schema + from .mock.test_query import make_test_query diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/__init__.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/schema.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/schema.py new file mode 100644 index 00000000000..03012c7d0cf --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/schema.py @@ -0,0 +1,106 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy +from syft import test_settings +from syft.rate_limiter import is_within_rate_limit + + +def make_schema(settings: dict, worker_pool: str) -> Callable: + updated_settings = { + "calls_per_min": 5, + "rate_limiter_enabled": True, + "credentials": test_settings.gce_service_account.to_dict(), + "region": test_settings.gce_region, + "project_id": test_settings.gce_project_id, + "dataset_1": test_settings.dataset_1, + "table_1": test_settings.table_1, + "table_2": test_settings.table_2, + } | settings + + @sy.api_endpoint( + path="bigquery.schema", + description="This endpoint allows for visualising the metadata of tables available in BigQuery.", + settings=updated_settings, + helper_functions=[ + is_within_rate_limit + ], # Adds ratelimit as this is also a method available to data scientists + worker_pool=worker_pool, + ) + def live_schema( + context, + ) -> str: + # stdlib + import datetime + + # third party + from google.cloud import bigquery # noqa: F811 + from google.oauth2 import service_account + import pandas as pd + + # syft absolute + from syft import SyftException + + # Auth for Bigquer based on the workload identity + credentials = service_account.Credentials.from_service_account_info( + context.settings["credentials"] + ) + scoped_credentials = credentials.with_scopes( + ["https://www.googleapis.com/auth/cloud-platform"] + ) + + client = bigquery.Client( + credentials=scoped_credentials, + location=context.settings["region"], + ) + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + try: + # Formats the data schema in a data frame format + # Warning: the only supported format types are primitives, np.ndarrays and pd.DataFrames + + data_schema = [] + for table_id in [ + f"{context.settings['dataset_1']}.{context.settings['table_1']}", + f"{context.settings['dataset_1']}.{context.settings['table_2']}", + ]: + table = client.get_table(table_id) + for schema in table.schema: + data_schema.append( + { + "project": str(table.project), + "dataset_id": str(table.dataset_id), + "table_id": str(table.table_id), + "schema_name": str(schema.name), + "schema_field": str(schema.field_type), + "description": str(table.description), + "num_rows": str(table.num_rows), + } + ) + return pd.DataFrame(data_schema) + + except Exception as e: + # not a bigquery exception + if not hasattr(e, "_errors"): + output = f"got exception e: {type(e)} {str(e)}" + raise SyftException( + public_message=f"An error occured executing the API call {output}" + ) + + # Should add appropriate error handling for what should be exposed to the data scientists. + raise SyftException( + public_message="An error occured executing the API call, please contact the domain owner." + ) + + return live_schema diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/test_query.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/test_query.py new file mode 100644 index 00000000000..ccd3c75b599 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/live/test_query.py @@ -0,0 +1,111 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy +from syft import test_settings +from syft.rate_limiter import is_within_rate_limit + + +def make_test_query(settings) -> Callable: + updated_settings = { + "calls_per_min": 10, + "rate_limiter_enabled": True, + "credentials": test_settings.gce_service_account.to_dict(), + "region": test_settings.gce_region, + "project_id": test_settings.gce_project_id, + } | settings + + # these are the same if you allow the rate limiter to be turned on and off + @sy.api_endpoint_method( + settings=updated_settings, + helper_functions=[is_within_rate_limit], + ) + def live_test_query( + context, + sql_query: str, + ) -> str: + # stdlib + import datetime + + # third party + from google.cloud import bigquery # noqa: F811 + from google.oauth2 import service_account + + # syft absolute + from syft import SyftException + + # Auth for Bigquer based on the workload identity + credentials = service_account.Credentials.from_service_account_info( + context.settings["credentials"] + ) + scoped_credentials = credentials.with_scopes( + ["https://www.googleapis.com/auth/cloud-platform"] + ) + + client = bigquery.Client( + credentials=scoped_credentials, + location=context.settings["region"], + ) + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + try: + rows = client.query_and_wait( + sql_query, + project=context.settings["project_id"], + ) + + if rows.total_rows > 1_000_000: + raise SyftException( + public_message="Please only write queries that gather aggregate statistics" + ) + + return rows.to_dataframe() + + except Exception as e: + # not a bigquery exception + if not hasattr(e, "_errors"): + output = f"got exception e: {type(e)} {str(e)}" + raise SyftException( + public_message=f"An error occured executing the API call {output}" + ) + + # Treat all errors that we would like to be forwarded to the data scientists + # By default, any exception is only visible to the data owner. + + if e._errors[0]["reason"] in [ + "badRequest", + "blocked", + "duplicate", + "invalidQuery", + "invalid", + "jobBackendError", + "jobInternalError", + "notFound", + "notImplemented", + "rateLimitExceeded", + "resourceInUse", + "resourcesExceeded", + "tableUnavailable", + "timeout", + ]: + raise SyftException( + public_message="Error occured during the call: " + + e._errors[0]["message"] + ) + else: + raise SyftException( + public_message="An error occured executing the API call, please contact the domain owner." + ) + + return live_test_query diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/__init__.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/data.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/data.py new file mode 100644 index 00000000000..82262bf7a01 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/data.py @@ -0,0 +1,268 @@ +# stdlib +from math import nan + +schema_dict = { + "project": { + 0: "example-project", + 1: "example-project", + 2: "example-project", + 3: "example-project", + 4: "example-project", + 5: "example-project", + 6: "example-project", + 7: "example-project", + 8: "example-project", + 9: "example-project", + 10: "example-project", + 11: "example-project", + 12: "example-project", + 13: "example-project", + 14: "example-project", + 15: "example-project", + 16: "example-project", + 17: "example-project", + 18: "example-project", + 19: "example-project", + 20: "example-project", + 21: "example-project", + 22: "example-project", + }, + "dataset_id": { + 0: "test_1gb", + 1: "test_1gb", + 2: "test_1gb", + 3: "test_1gb", + 4: "test_1gb", + 5: "test_1gb", + 6: "test_1gb", + 7: "test_1gb", + 8: "test_1gb", + 9: "test_1gb", + 10: "test_1gb", + 11: "test_1gb", + 12: "test_1gb", + 13: "test_1gb", + 14: "test_1gb", + 15: "test_1gb", + 16: "test_1gb", + 17: "test_1gb", + 18: "test_1gb", + 19: "test_1gb", + 20: "test_1gb", + 21: "test_1gb", + 22: "test_1gb", + }, + "table_id": { + 0: "posts", + 1: "posts", + 2: "posts", + 3: "posts", + 4: "posts", + 5: "posts", + 6: "posts", + 7: "comments", + 8: "comments", + 9: "comments", + 10: "comments", + 11: "comments", + 12: "comments", + 13: "comments", + 14: "comments", + 15: "comments", + 16: "comments", + 17: "comments", + 18: "comments", + 19: "comments", + 20: "comments", + 21: "comments", + 22: "comments", + }, + "schema_name": { + 0: "int64_field_0", + 1: "id", + 2: "name", + 3: "subscribers_count", + 4: "permalink", + 5: "nsfw", + 6: "spam", + 7: "int64_field_0", + 8: "id", + 9: "body", + 10: "parent_id", + 11: "created_at", + 12: "last_modified_at", + 13: "gilded", + 14: "permalink", + 15: "score", + 16: "comment_id", + 17: "post_id", + 18: "author_id", + 19: "spam", + 20: "deleted", + 21: "upvote_raio", + 22: "collapsed_in_crowd_control", + }, + "schema_field": { + 0: "INTEGER", + 1: "STRING", + 2: "STRING", + 3: "INTEGER", + 4: "STRING", + 5: "FLOAT", + 6: "BOOLEAN", + 7: "INTEGER", + 8: "STRING", + 9: "STRING", + 10: "STRING", + 11: "INTEGER", + 12: "INTEGER", + 13: "BOOLEAN", + 14: "STRING", + 15: "INTEGER", + 16: "STRING", + 17: "STRING", + 18: "STRING", + 19: "BOOLEAN", + 20: "BOOLEAN", + 21: "FLOAT", + 22: "BOOLEAN", + }, + "description": { + 0: "None", + 1: "None", + 2: "None", + 3: "None", + 4: "None", + 5: "None", + 6: "None", + 7: "None", + 8: "None", + 9: "None", + 10: "None", + 11: "None", + 12: "None", + 13: "None", + 14: "None", + 15: "None", + 16: "None", + 17: "None", + 18: "None", + 19: "None", + 20: "None", + 21: "None", + 22: "None", + }, + "num_rows": { + 0: "2000000", + 1: "2000000", + 2: "2000000", + 3: "2000000", + 4: "2000000", + 5: "2000000", + 6: "2000000", + 7: "2000000", + 8: "2000000", + 9: "2000000", + 10: "2000000", + 11: "2000000", + 12: "2000000", + 13: "2000000", + 14: "2000000", + 15: "2000000", + 16: "2000000", + 17: "2000000", + 18: "2000000", + 19: "2000000", + 20: "2000000", + 21: "2000000", + 22: "2000000", + }, +} + + +query_dict = { + "int64_field_0": { + 0: 4, + 1: 5, + 2: 10, + 3: 16, + 4: 17, + 5: 23, + 6: 24, + 7: 25, + 8: 27, + 9: 40, + }, + "id": { + 0: "t5_via1x", + 1: "t5_cv9gn", + 2: "t5_8p2tq", + 3: "t5_8fcro", + 4: "t5_td5of", + 5: "t5_z01fv", + 6: "t5_hmqjk", + 7: "t5_1flyj", + 8: "t5_5rwej", + 9: "t5_uurcv", + }, + "name": { + 0: "/channel/mylittlepony", + 1: "/channel/polyamory", + 2: "/channel/Catholicism", + 3: "/channel/cordcutters", + 4: "/channel/stevenuniverse", + 5: "/channel/entitledbitch", + 6: "/channel/engineering", + 7: "/channel/nottheonion", + 8: "/channel/FoodPorn", + 9: "/channel/puppysmiles", + }, + "subscribers_count": { + 0: 4323081, + 1: 2425929, + 2: 4062607, + 3: 7543226, + 4: 2692168, + 5: 2709080, + 6: 8766144, + 7: 2580984, + 8: 7784809, + 9: 3715991, + }, + "permalink": { + 0: "/channel//channel/mylittlepony", + 1: "/channel//channel/polyamory", + 2: "/channel//channel/Catholicism", + 3: "/channel//channel/cordcutters", + 4: "/channel//channel/stevenuniverse", + 5: "/channel//channel/entitledbitch", + 6: "/channel//channel/engineering", + 7: "/channel//channel/nottheonion", + 8: "/channel//channel/FoodPorn", + 9: "/channel//channel/puppysmiles", + }, + "nsfw": { + 0: nan, + 1: nan, + 2: nan, + 3: nan, + 4: nan, + 5: nan, + 6: nan, + 7: nan, + 8: nan, + 9: nan, + }, + "spam": { + 0: False, + 1: False, + 2: False, + 3: False, + 4: False, + 5: False, + 6: False, + 7: False, + 8: False, + 9: False, + }, +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/schema.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/schema.py new file mode 100644 index 00000000000..a95e04f2f1d --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/schema.py @@ -0,0 +1,52 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy + +# relative +from ..rate_limiter import is_within_rate_limit +from .data import schema_dict + + +def make_schema(settings, worker_pool) -> Callable: + updated_settings = { + "calls_per_min": 5, + "rate_limiter_enabled": True, + "schema_dict": schema_dict, + } | settings + + @sy.api_endpoint( + path="bigquery.schema", + description="This endpoint allows for visualising the metadata of tables available in BigQuery.", + settings=updated_settings, + helper_functions=[is_within_rate_limit], + worker_pool=worker_pool, + ) + def mock_schema( + context, + ) -> str: + # syft absolute + from syft import SyftException + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + # stdlib + import datetime + + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + # third party + import pandas as pd + + df = pd.DataFrame(context.settings["schema_dict"]) + return df + + return mock_schema diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/test_query.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/test_query.py new file mode 100644 index 00000000000..ae028a8cf36 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/mock/test_query.py @@ -0,0 +1,138 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy + +# relative +from ..rate_limiter import is_within_rate_limit +from .data import query_dict + + +def extract_limit_value(sql_query: str) -> int: + # stdlib + import re + + limit_pattern = re.compile(r"\bLIMIT\s+(\d+)\b", re.IGNORECASE) + match = limit_pattern.search(sql_query) + if match: + return int(match.group(1)) + return None + + +def is_valid_sql(query: str) -> bool: + # stdlib + import sqlite3 + + # Prepare an in-memory SQLite database + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + + try: + # Use the EXPLAIN QUERY PLAN command to get the query plan + cursor.execute(f"EXPLAIN QUERY PLAN {query}") + except sqlite3.Error as e: + if "no such table" in str(e).lower(): + return True + return False + finally: + conn.close() + + +def adjust_dataframe_rows(df, target_rows: int): + # third party + import pandas as pd + + current_rows = len(df) + + if target_rows > current_rows: + # Repeat rows to match target_rows + repeat_times = (target_rows + current_rows - 1) // current_rows + df_expanded = pd.concat([df] * repeat_times, ignore_index=True).head( + target_rows + ) + else: + # Truncate rows to match target_rows + df_expanded = df.head(target_rows) + + return df_expanded + + +def make_test_query(settings: dict) -> Callable: + updated_settings = { + "calls_per_min": 10, + "rate_limiter_enabled": True, + "query_dict": query_dict, + } | settings + + # these are the same if you allow the rate limiter to be turned on and off + @sy.api_endpoint_method( + settings=updated_settings, + helper_functions=[ + is_within_rate_limit, + extract_limit_value, + is_valid_sql, + adjust_dataframe_rows, + ], + ) + def mock_test_query( + context, + sql_query: str, + ) -> str: + # stdlib + import datetime + + # third party + from google.api_core.exceptions import BadRequest + + # syft absolute + from syft import SyftException + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + bad_table = "invalid_table" + bad_post = ( + "BadRequest: 400 POST " + "https://bigquery.googleapis.com/bigquery/v2/projects/project-id/" + "queries?prettyPrint=false: " + ) + if bad_table in sql_query: + try: + raise BadRequest( + f'{bad_post} Table "{bad_table}" must be qualified ' + "with a dataset (e.g. dataset.table)." + ) + except Exception as e: + raise SyftException( + public_message=f"*must be qualified with a dataset*. {e}" + ) + + if not context.code.is_valid_sql(sql_query): + raise BadRequest( + f'{bad_post} Syntax error: Unexpected identifier "{sql_query}" at [1:1]' + ) + + # third party + import pandas as pd + + limit = context.code.extract_limit_value(sql_query) + if limit > 1_000_000: + raise SyftException( + public_message="Please only write queries that gather aggregate statistics" + ) + + base_df = pd.DataFrame(context.settings["query_dict"]) + + df = context.code.adjust_dataframe_rows(base_df, limit) + return df + + return mock_test_query diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/rate_limiter.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/rate_limiter.py new file mode 100644 index 00000000000..8ce319b61f4 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/rate_limiter.py @@ -0,0 +1,16 @@ +def is_within_rate_limit(context) -> bool: + """Rate limiter for custom API calls made by users.""" + # stdlib + import datetime + + state = context.state + settings = context.settings + email = context.user.email + + current_time = datetime.datetime.now() + calls_last_min = [ + 1 if (current_time - call_time).seconds < 60 else 0 + for call_time in state[email] + ] + + return sum(calls_last_min) < settings.get("calls_per_min", 5) diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/submit_query.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/submit_query.py new file mode 100644 index 00000000000..a0125ee009b --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/apis/submit_query.py @@ -0,0 +1,42 @@ +# syft absolute +import syft as sy + + +def make_submit_query(settings, worker_pool): + updated_settings = {"user_code_worker": worker_pool} | settings + + @sy.api_endpoint( + path="bigquery.submit_query", + description="API endpoint that allows you to submit SQL queries to run on the private data.", + worker_pool=worker_pool, + settings=updated_settings, + ) + def submit_query( + context, + func_name: str, + query: str, + ) -> str: + # syft absolute + import syft as sy + + @sy.syft_function( + name=func_name, + input_policy=sy.MixedInputPolicy( + endpoint=sy.Constant( + val=context.admin_client.api.services.bigquery.test_query + ), + query=sy.Constant(val=query), + client=context.admin_client, + ), + worker_pool_name=context.settings["user_code_worker"], + ) + def execute_query(query: str, endpoint): + res = endpoint(sql_query=query) + return res + + request = context.user_client.code.request_code_execution(execute_query) + context.admin_client.requests.set_tags(request, ["autosync"]) + + return f"Query submitted {request}. Use `client.code.{func_name}()` to run your query" + + return submit_query diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/email_helpers.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/email_helpers.py new file mode 100644 index 00000000000..dda08c21866 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/email_helpers.py @@ -0,0 +1,338 @@ +# stdlib +import asyncio +from dataclasses import dataclass +from dataclasses import field +import json +import re +import time +from typing import Any + +# third party +from aiosmtpd.controller import Controller +from faker import Faker + +# syft absolute +from syft.service.user.user_roles import ServiceRole + +fake = Faker() + + +@dataclass +class Email: + email_from: str + email_to: str + email_content: str + + def to_dict(self) -> dict: + output = {} + for k, v in self.__dict__.items(): + output[k] = v + return output + + def __iter__(self): + yield from self.to_dict().items() + + def __getitem__(self, key): + return self.to_dict()[key] + + def __repr__(self) -> str: + return f"{self.email_to}\n{self.email_from}\n\n{self.email_content}" + + +class EmailServer: + def __init__(self, filepath="./emails.json"): + self.filepath = filepath + self._emails: dict[str, list[Email]] = self.load_emails() + + def load_emails(self) -> dict[str, list[Email]]: + try: + with open(self.filepath) as f: + data = json.load(f) + return {k: [Email(**email) for email in v] for k, v in data.items()} + except Exception as e: + print("Issues reading email file", e) + return {} + + def save_emails(self) -> None: + with open(self.filepath, "w") as f: + data = { + k: [email.to_dict() for email in v] for k, v in self._emails.items() + } + f.write(json.dumps(data)) + + def add_email_for_user(self, user_email: str, email: Email) -> None: + if user_email not in self._emails: + self._emails[user_email] = [] + self._emails[user_email].append(email) + self.save_emails() + + def get_emails_for_user(self, user_email: str) -> list[Email]: + self._emails: dict[str, list[Email]] = self.load_emails() + return self._emails.get(user_email, []) + + def reset_emails(self) -> None: + self._emails = {} + self.save_emails() + + +SENDER = "noreply@openmined.org" + + +def get_token(email) -> str: + # stdlib + import re + + pattern = r"syft_client\.reset_password\(token='(.*?)', new_password=.*?\)" + try: + token = re.search(pattern, email.email_content).group(1) + except Exception: + raise Exception(f"No token found in email: {email.email_content}") + return token + + +@dataclass +class TestUser: + name: str + email: str + password: str + role: ServiceRole + new_password: str | None = None + email_disabled: bool = False + reset_password: bool = False + reset_token: str | None = None + _client_cache: Any | None = field(default=None, repr=False, init=False) + _email_server: EmailServer | None = None + + @property + def latest_password(self) -> str: + if self.new_password: + return self.new_password + return self.password + + def make_new_password(self) -> str: + self.new_password = fake.password() + return self.new_password + + @property + def client(self): + return self._client_cache + + def relogin(self) -> None: + self.client = self.client + + @client.setter + def client(self, client): + client = client.login(email=self.email, password=self.latest_password) + self._client_cache = client + + def to_dict(self) -> dict: + output = {} + for k, v in self.__dict__.items(): + if k.startswith("_"): + continue + if k == "role": + v = str(v) + output[k] = v + return output + + def __iter__(self): + for key, val in self.to_dict().items(): + if not key.startswith("_"): + yield key, val + + def __getitem__(self, key): + if key.startswith("_"): + return None + return self.to_dict()[key] + + def update_password(self): + self.password = self.new_password + self.new_password = None + + @property + def emails(self) -> list[Email]: + if not self._email_server: + print("Not connected to email server object") + return [] + return self._email_server.get_emails_for_user(self.email) + + def get_token(self) -> str: + for email in reversed(self.emails): + token = None + try: + token = get_token(email) + break + except Exception: # nosec + pass + self.reset_token = token + return token + + +def save_users(users): + user_dicts = [] + for user in users: + user_dicts.append(user.to_dict()) + print(user_dicts) + with open("./users.json", "w") as f: + f.write(json.dumps(user_dicts)) + + +def load_users(high_client: None, path="./users.json"): + users = [] + with open(path) as f: + data = f.read() + user_dicts = json.loads(data) + for user in user_dicts: + test_user = TestUser(**user) + if high_client: + test_user.client = high_client + users.append(test_user) + return users + + +def make_user( + name: str | None = None, + email: str | None = None, + password: str | None = None, + role: ServiceRole = ServiceRole.DATA_SCIENTIST, +): + fake = Faker() + if name is None: + name = fake.name() + if email is None: + ascii_string = re.sub(r"[^a-zA-Z\s]", "", name).lower() + dashed_string = ascii_string.replace(" ", "-") + email = f"{dashed_string}-fake@openmined.org" + if password is None: + password = fake.password() + + return TestUser(name=name, email=email, password=password, role=role) + + +def user_exists(root_client, email: str) -> bool: + users = root_client.api.services.user + for user in users: + if user.email == email: + return True + return False + + +class SMTPTestServer: + def __init__(self, email_server): + self.port = 9025 + self.hostname = "0.0.0.0" # nosec: B104 + self._stop_event = asyncio.Event() + + # Simple email handler class + class SimpleHandler: + async def handle_DATA(self, server, session, envelope): + try: + print(f"> SMTPTestServer got an email for {envelope.rcpt_tos}") + email = Email( + email_from=envelope.mail_from, + email_to=envelope.rcpt_tos, + email_content=envelope.content.decode( + "utf-8", errors="replace" + ), + ) + email_server.add_email_for_user(envelope.rcpt_tos[0], email) + email_server.save_emails() + return "250 Message accepted for delivery" + except Exception as e: + print(f"> Error handling email: {e}") + return "550 Internal Server Error" + + try: + self.handler = SimpleHandler() + self.controller = Controller( + self.handler, hostname=self.hostname, port=self.port + ) + except Exception as e: + print(f"> Error initializing SMTPTestServer Controller: {e}") + + def start(self): + print(f"> Starting SMTPTestServer on: {self.hostname}:{self.port}") + asyncio.create_task(self.async_loop()) + + async def async_loop(self): + try: + print(f"> Starting SMTPTestServer on: {self.hostname}:{self.port}") + self.controller.start() + await ( + self._stop_event.wait() + ) # Wait until the event is set to stop the server + except Exception as e: + print(f"> Error with SMTPTestServer: {e}") + + def stop(self): + try: + print("> Stopping SMTPTestServer") + loop = asyncio.get_running_loop() + if loop.is_running(): + loop.create_task(self.async_stop()) + else: + asyncio.run(self.async_stop()) + except Exception as e: + print(f"> Error stopping SMTPTestServer: {e}") + + async def async_stop(self): + self.controller.stop() + self._stop_event.set() # Stop the server by setting the event + + +class TimeoutError(Exception): + pass + + +class Timeout: + def __init__(self, timeout_duration): + if timeout_duration > 60: + raise ValueError("Timeout duration cannot exceed 60 seconds.") + self.timeout_duration = timeout_duration + + def run_with_timeout(self, condition_func, *args, **kwargs): + start_time = time.time() + result = None + + while True: + elapsed_time = time.time() - start_time + if elapsed_time > self.timeout_duration: + raise TimeoutError( + f"Function execution exceeded {self.timeout_duration} seconds." + ) + + # Check if the condition is met + try: + if condition_func(): + print("Condition met, exiting early.") + break + except Exception as e: + print(f"Exception in target function: {e}") + break # Exit the loop if an exception occurs in the function + time.sleep(1) + + return result + + +def get_email_server(reset=False): + email_server = EmailServer() + if reset: + email_server.reset_emails() + smtp_server = SMTPTestServer(email_server) + smtp_server.start() + return email_server, smtp_server + + +def create_user(root_client, test_user): + if not user_exists(root_client, test_user.email): + fake = Faker() + root_client.register( + name=test_user.name, + email=test_user.email, + password=test_user.password, + password_verify=test_user.password, + institution=fake.company(), + website=fake.url(), + ) + else: + print("User already exists", test_user) diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/job_helpers.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/job_helpers.py new file mode 100644 index 00000000000..e34a2eb10c7 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/job_helpers.py @@ -0,0 +1,400 @@ +# stdlib +from collections import defaultdict +from collections.abc import Callable +from dataclasses import dataclass +from dataclasses import field +import json +import random +import re +import secrets +import textwrap +from typing import Any + +# third party +from email_helpers import TestUser + +# syft absolute +from syft import test_settings + +from syft.client.client import SyftClient # noqa + +dataset_1 = test_settings.get("dataset_1", default="dataset_1") +dataset_2 = test_settings.get("dataset_2", default="dataset_2") +table_1 = test_settings.get("table_1", default="table_1") +table_2 = test_settings.get("table_2", default="table_2") +table_1_col_id = test_settings.get("table_1_col_id", default="table_id") +table_1_col_score = test_settings.get("table_1_col_score", default="colname") +table_2_col_id = test_settings.get("table_2_col_id", default="table_id") +table_2_col_score = test_settings.get("table_2_col_score", default="colname") + + +@dataclass +class TestJob: + user_email: str + func_name: str + query: str + job_type: str + settings: dict # make a type so we can rely on attributes + should_succeed: bool + should_submit: bool = True + code_path: str | None = field(default=None) + admin_reviewed: bool = False + result_as_expected: bool | None = None + + _client_cache: SyftClient | None = field(default=None, repr=False, init=False) + + @property + def is_submitted(self) -> bool: + return self.code_path is not None + + @property + def client(self): + return self._client_cache + + @client.setter + def client(self, client): + self._client_cache = client + + def to_dict(self) -> dict: + output = {} + for k, v in self.__dict__.items(): + if k.startswith("_"): + continue + output[k] = v + return output + + def __iter__(self): + for key, val in self.to_dict().items(): + if key.startswith("_"): + yield key, val + + def __getitem__(self, key): + if key.startswith("_"): + return None + return self.to_dict()[key] + + @property + def code_method(self) -> None | Callable: + try: + return getattr(self.client.code, self.func_name, None) + except Exception as e: + print(f"Cant find code method. {e}") + return None + + +def make_query(settings: dict) -> str: + query = f""" + SELECT {settings['groupby_col']}, AVG({settings['score_col']}) AS average_score + FROM {settings['dataset']}.{settings['table']} + GROUP BY {settings['groupby_col']} + LIMIT {settings['limit']}""".strip() # nosec: B608 + + return textwrap.dedent(query) + + +def create_simple_query_job(user: TestUser) -> TestJob: + job_type = "simple_query" + func_name = f"{job_type}_{secrets.token_hex(3)}" + + dataset = random.choice([dataset_1, dataset_2]) # nosec: B311 + table, groupby_col, score_col = random.choice( # nosec: B311 + [ + (table_1, table_1_col_id, table_1_col_score), + (table_2, table_2_col_id, table_2_col_score), + ] + ) + limit = random.randint(1, 1_000_000) # nosec: B311 + + settings = { + "dataset": dataset, + "table": table, + "groupby_col": groupby_col, + "score_col": score_col, + "limit": limit, + } + query = make_query(settings) + + result = TestJob( + user_email=user.email, + func_name=func_name, + query=query, + job_type=job_type, + settings=settings, + should_succeed=True, + ) + + result.client = user.client + return result + + +def create_wrong_asset_query(user: TestUser) -> TestJob: + job_type = "wrong_asset_query" + func_name = f"{job_type}_{secrets.token_hex(3)}" + + valid_job = create_simple_query_job(user) + settings = valid_job.settings + corrupted_asset = random.choice(["dataset", "table"]) # nosec: B311 + settings[corrupted_asset] = "wrong_asset" + query = make_query(settings) + + result = TestJob( + user_email=user.email, + func_name=func_name, + query=query, + job_type=job_type, + settings=settings, + should_succeed=False, + ) + + result.client = user.client + return result + + +def create_wrong_syntax_query(user: TestUser) -> TestJob: + job_type = "wrong_syntax_query" + func_name = f"{job_type}_{secrets.token_hex(3)}" + + query = "SELECT * FROM table INCORRECT SYNTAX" + + result = TestJob( + user_email=user.email, + func_name=func_name, + query=query, + job_type=job_type, + settings={}, + should_succeed=False, + ) + + result.client = user.client + return result + + +def create_long_query_job(user: TestUser) -> TestJob: + job_type = "job_too_much_text" + func_name = f"{job_type}_{secrets.token_hex(3)}" + + query = "a" * 1_000 + + result = TestJob( + user_email=user.email, + func_name=func_name, + query=query, + job_type=job_type, + settings={}, + should_succeed=False, + ) + + result.client = user.client + return result + + +def create_query_long_name(user: TestUser) -> TestJob: + job_type = "job_long_name" + func_name = f"{job_type}_{secrets.token_hex(3)}" + + job = create_simple_query_job(user) + + job.job_type = job_type + job.func_name = func_name + "a" * 1_000 + + return job + + +def create_job_funcname_xss(user: TestUser) -> TestJob: + job_type = "job_funcname_xss" + func_name = f"{job_type}_{secrets.token_hex(3)}" + func_name += "" + + job = create_simple_query_job(user) + job.job_type = job_type + job.func_name = func_name + job.should_submit = False + return job + + +def get_request_for_job_info(requests, job): + job_requests = [r for r in requests if r.code.service_func_name == job.func_name] + if len(job_requests) != 1: + raise Exception(f"Too many or too few requests: {job} in requests: {requests}") + return job_requests[0] + + +def create_job_query_xss(user: TestUser) -> TestJob: + job_type = "job_query_xss" + func_name = f"{job_type}_{secrets.token_hex(3)}" + + job = create_simple_query_job(user) + job.job_type = job_type + job.func_name = func_name + job.query += "" + job.should_succeed = False + + return job + + +def create_job_many_columns(user: TestUser) -> TestJob: + job_type = "job_many_columns" + func_name = f"{job_type}_{secrets.token_hex(3)}" + + job = create_simple_query_job(user) + job.job_type = job_type + job.func_name = func_name + settings = job.settings + job.settings["num_extra_cols"] = random.randint(100, 1000) # nosec: B311 + + new_columns_string = ", ".join( + f"{settings['score_col']} as col_{i}" for i in range(settings["num_extra_cols"]) + ) + + job.query = f""" + SELECT {settings['groupby_col']}, AVG({settings['score_col']}) AS average_score, {new_columns_string} + FROM {settings['dataset']}.{settings['table']} + GROUP BY {settings['groupby_col']} + LIMIT {settings['limit']}""".strip() # nosec: B608 + + return job + + +def create_random_job(user: TestUser) -> TestJob: + job_func = random.choice(create_job_functions) # nosec: B311 + return job_func(user) + + +def create_jobs(users: list[TestUser], total_jobs: int = 10) -> list[TestJob]: + jobs = [] + num_users = len(users) + user_index = 0 + each_count = 0 + # keep making jobs until we have enough + while len(jobs) < total_jobs: + # if we havent used each job type yet keep getting the next one + if each_count < len(create_job_functions): + job_func = create_job_functions[each_count] + each_count += 1 + else: + # otherwise lets get a random one + job_func = create_random_job + # use the current index of user + jobs.append(job_func(users[user_index])) + + # only go as high as the last user index + if user_index < num_users - 1: + user_index += 1 + else: + # reset back to the first user + user_index = 0 + + # in case we stuffed up + if len(jobs) > total_jobs: + jobs = jobs[:total_jobs] + return jobs + + +def submit_job(job: TestJob) -> tuple[Any, str]: + client = job.client + response = client.api.services.bigquery.submit_query( + func_name=job.func_name, query=job.query + ) + job.code_path = extract_code_path(response) + return response + + +def extract_code_path(response) -> str | None: + pattern = r"client\.code\.(\w+)\(\)" + match = re.search(pattern, str(response)) + if match: + extracted_code = match.group(1) + return extracted_code + return None + + +def approve_by_running(request): + job = request.code(blocking=False) + result = job.wait() + print("got result of type", type(result), "bool", bool(result)) + # got result of type bool False + # assert result won't work unless we know what type is coming back + job_info = job.info(result=True) + # need force when running multiple times + # todo check and dont run if its already done + response = request.deposit_result(job_info, approve=True, force=True) + return response + + +def get_job_emails(jobs, client, email_server): + all_requests = client.requests + res = {} + for job in jobs: + request = get_request_for_job_info(all_requests, job) + emails = email_server.get_emails_for_user(request.requesting_user_email) + res[request.requesting_user_email] = emails + return res + + +def resolve_request(request): + service_func_name = request.code.service_func_name + if service_func_name.startswith("simple_query"): + request.approve() # approve because it is good + if service_func_name.startswith("wrong_asset_query"): + request.approve() # approve because it is bad + if service_func_name.startswith("wrong_syntax_query"): + request.approve() # approve because it is bad + if service_func_name.startswith("job_too_much_text"): + request.deny(reason="too long, boring!") # deny because it is bad + if service_func_name.startswith("job_long_name"): + request.approve() + if service_func_name.startswith("job_funcname_xss"): + request.deny(reason="too long, boring!") # never reach doesnt matter + if service_func_name.startswith("job_query_xss"): + request.approve() # approve because it is bad + if service_func_name.startswith("job_many_columns"): + request.approve() # approve because it is bad + + return (request.id, request.status) + + +create_job_functions = [ + create_simple_query_job, # quick way to increase the odds + create_simple_query_job, + create_simple_query_job, + create_simple_query_job, + create_simple_query_job, + create_simple_query_job, + create_wrong_syntax_query, + create_long_query_job, + create_query_long_name, + create_job_funcname_xss, + create_job_query_xss, + create_job_many_columns, +] + + +def save_jobs(jobs, filepath="./jobs.json"): + user_jobs = defaultdict(list) + for job in jobs: + user_jobs[job.user_email].append(job.to_dict()) + with open(filepath, "w") as f: + f.write(json.dumps(user_jobs)) + + +def load_jobs(users, high_client, filepath="./jobs.json"): + data = {} + try: + with open(filepath) as f: + data = json.loads(f.read()) + except Exception as e: + print(f"cant read file: {filepath}: {e}") + data = {} + jobs_list = [] + for user in users: + if user.email not in data: + print(f"{user.email} missing from jobs") + continue + user_jobs = data[user.email] + for user_job in user_jobs: + test_job = TestJob(**user_job) + if user._client_cache is None: + user.client = high_client + test_job.client = user.client + jobs_list.append(test_job) + return jobs_list diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/sync_helpers.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/sync_helpers.py new file mode 100644 index 00000000000..e1d558016ba --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/sync_helpers.py @@ -0,0 +1,190 @@ +# third party +from tqdm import tqdm + +# syft absolute +import syft as sy +from syft.client.datasite_client import DatasiteClient +from syft.client.syncing import compare_clients +from syft.service.code.user_code import UserCode +from syft.service.job.job_stash import Job +from syft.service.job.job_stash import JobStatus +from syft.service.request.request import Request +from syft.service.request.request import RequestStatus +from syft.service.sync.diff_state import ObjectDiffBatch +from syft.types.result import Err + + +def deny_requests_without_autosync_tag(client_low: DatasiteClient): + # Deny all requests that are not autosync + requests = client_low.requests.get_all() + if isinstance(requests, sy.SyftError): + print(requests) + return + + denied_requests = [] + for request in tqdm(requests): + if request.status != RequestStatus.PENDING: + continue + if "autosync" not in request.tags: + request.deny( + reason="This request has been denied automatically. " + "Please use the designated API to submit your request." + ) + denied_requests.append(request.id) + print(f"Denied {len(denied_requests)} requests without autosync tag") + + +def is_request_to_sync(batch: ObjectDiffBatch) -> bool: + # True if this is a new low-side request + # TODO add condition for sql requests/usercodes + low_request = batch.root.low_obj + return ( + isinstance(low_request, Request) + and batch.status == "NEW" + and "autosync" in low_request.tags + ) + + +def is_job_to_sync(batch: ObjectDiffBatch): + # True if this is a new high-side job that is either COMPLETED or ERRORED + if batch.status != "NEW": + return False + if not isinstance(batch.root.high_obj, Job): + return False + job = batch.root.high_obj + return job.status in (JobStatus.ERRORED, JobStatus.COMPLETED) + + +def execute_requests( + client_high: DatasiteClient, request_ids: list[sy.UID] +) -> dict[sy.UID, Job]: + jobs_by_request_id = {} + for request_id in request_ids: + request = client_high.requests.get_by_uid(request_id) + if not isinstance(request, Request): + continue + + code = request.code + if not isinstance(code, UserCode): + continue + + func_name = request.code.service_func_name + api_func = getattr(client_high.code, func_name, None) + if api_func is None: + continue + + job = api_func(blocking=False) + jobs_by_request_id[request_id] = job + + return jobs_by_request_id + + +def deny_failed_jobs( + client_low: DatasiteClient, + jobs: list[Job], +) -> None: + # NOTE no syncing is needed, requests are denied on the low side + denied_requests = [] + + for job in jobs: + if job.status != JobStatus.ERRORED: + continue + + error_result = job.result + if isinstance(error_result, Err): + error_msg = error_result.err_value + else: + error_msg = "An unknown error occurred, please check the Job logs for more information." + + code_id = job.user_code_id + if code_id is None: + continue + requests = client_low.requests.get_by_usercode_id(code_id) + if isinstance(requests, list) and len(requests) > 0: + request = requests[0] + request.deny(reason=f"Execution failed: {error_msg}") + denied_requests.append(request.id) + else: + print(f"Failed to deny request for job {job.id}") + + print(f"Denied {len(denied_requests)} failed requests") + + +def sync_finished_jobs( + client_low: DatasiteClient, + client_high: DatasiteClient, +) -> dict[sy.UID, sy.SyftError | sy.SyftSuccess] | sy.SyftError: + sync_job_results = {} + synced_jobs = [] + diff = compare_clients( + from_client=client_high, to_client=client_low, include_types=["job"] + ) + if isinstance(diff, sy.SyftError): + print(diff) + return diff + + for batch in diff.batches: + if is_job_to_sync(batch): + job = batch.root.high_obj + + w = batch.resolve(build_state=False) + share_result = w.click_share_all_private_data() + if isinstance(share_result, sy.SyftError): + sync_job_results[job.id] = share_result + continue + sync_result = w.click_sync() + + synced_jobs.append(job) + sync_job_results[job.id] = sync_result + + print(f"Sharing {len(sync_job_results)} new results") + deny_failed_jobs(client_low, synced_jobs) + return sync_job_results + + +def sync_new_requests( + client_low: DatasiteClient, + client_high: DatasiteClient, +) -> dict[sy.UID, sy.SyftSuccess | sy.SyftError] | sy.SyftError: + sync_request_results = {} + diff = compare_clients( + from_client=client_low, to_client=client_high, include_types=["request"] + ) + if isinstance(diff, sy.SyftError): + print(diff) + return sync_request_results + print(f"{len(diff.batches)} request batches found") + for batch in tqdm(diff.batches): + if is_request_to_sync(batch): + request_id = batch.root.low_obj.id + w = batch.resolve(build_state=False) + result = w.click_sync() + sync_request_results[request_id] = result + return sync_request_results + + +def sync_and_execute_new_requests( + client_low: DatasiteClient, client_high: DatasiteClient +) -> None: + sync_results = sync_new_requests(client_low, client_high) + if isinstance(sync_results, sy.SyftError): + print(sync_results) + return + + request_ids = [ + uid for uid, res in sync_results.items() if isinstance(res, sy.SyftSuccess) + ] + print(f"Synced {len(request_ids)} new requests") + + jobs_by_request = execute_requests(client_high, request_ids) + print(f"Started {len(jobs_by_request)} new jobs") + + +def auto_sync(client_low: DatasiteClient, client_high: DatasiteClient) -> None: + print("Starting auto sync") + print("Denying non tagged jobs") + deny_requests_without_autosync_tag(client_low) + print("Syncing and executing") + sync_and_execute_new_requests(client_low, client_high) + sync_finished_jobs(client_low, client_high) + print("Finished auto sync") diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/worker_helpers.py b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/worker_helpers.py new file mode 100644 index 00000000000..3c2667fecc8 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_helpers/worker_helpers.py @@ -0,0 +1,86 @@ +# syft absolute +import syft as sy + + +def build_and_launch_worker_pool_from_docker_str( + environment: str, + client: sy.DatasiteClient, + worker_pool_name: str, + custom_pool_pod_annotations: dict, + custom_pool_pod_labels: dict, + worker_dockerfile: str, + external_registry: str, + docker_tag: str, + scale_to: int, +): + result = client.api.services.image_registry.add(external_registry) + assert "success" in result.message # nosec: B101 + + # For some reason, when using k9s, result.value is empty so can't use the below line + # local_registry = result.value + local_registry = client.api.services.image_registry[0] + + docker_config = sy.DockerWorkerConfig(dockerfile=worker_dockerfile) + assert docker_config.dockerfile == worker_dockerfile # nosec: B101 + submit_result = client.api.services.worker_image.submit(worker_config=docker_config) + print(submit_result.message) + assert "success" in submit_result.message # nosec: B101 + + worker_image = submit_result.value + + if environment == "remote": + docker_build_result = client.api.services.worker_image.build( + image_uid=worker_image.id, + tag=docker_tag, + registry_uid=local_registry.id, + ) + print(docker_build_result) + + if environment == "remote": + push_result = client.api.services.worker_image.push(worker_image.id) + print(push_result) + + result = client.api.services.worker_pool.launch( + pool_name=worker_pool_name, + image_uid=worker_image.id, + num_workers=1, + pod_annotations=custom_pool_pod_annotations, + pod_labels=custom_pool_pod_labels, + ) + print(result) + # assert 'success' in str(result.message) + + if environment == "remote": + result = client.worker_pools.scale(number=scale_to, pool_name=worker_pool_name) + print(result) + + +def launch_worker_pool_from_docker_tag_and_registry( + environment: str, + client: sy.DatasiteClient, + worker_pool_name: str, + custom_pool_pod_annotations: dict, + custom_pool_pod_labels: dict, + docker_tag: str, + external_registry: str, + scale_to: int = 1, +): + res = client.api.services.image_registry.add(external_registry) + assert "success" in res.message # nosec: B101 + docker_config = sy.PrebuiltWorkerConfig(tag=docker_tag) + image_result = client.api.services.worker_image.submit(worker_config=docker_config) + assert "success" in res.message # nosec: B101 + worker_image = image_result.value + + launch_result = client.api.services.worker_pool.launch( + pool_name=worker_pool_name, + image_uid=worker_image.id, + num_workers=1, + pod_annotations=custom_pool_pod_annotations, + pod_labels=custom_pool_pod_labels, + ) + if environment == "remote" and scale_to > 1: + result = client.worker_pools.scale(number=scale_to, pool_name=worker_pool_name) + print(result) + + return launch_result diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/000-start-and-configure-server-and-admins.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/000-start-and-configure-server-and-admins.ipynb new file mode 100644 index 00000000000..f1e17f0f7cb --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/000-start-and-configure-server-and-admins.ipynb @@ -0,0 +1,285 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # stdlib\n", + "# import os\n", + "\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# stdlib\n", + "from os import environ as env\n", + "import os\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "\n", + "# from syft import get_helpers # noqa: F401\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "\n", + "# third party\n", + "from email_helpers import get_email_server\n", + "# isort: on" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Launch & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# when in k8s these are the default values\n", + "ROOT_EMAIL = \"admin@bigquery.org\"\n", + "ROOT_PASSWORD = \"bqpw\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# in case we are not in k8s we set them here for orchestra to use\n", + "env[\"DEFAULT_ROOT_EMAIL\"] = ROOT_EMAIL\n", + "env[\"DEFAULT_ROOT_PASSWORD\"] = ROOT_PASSWORD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " reset=True,\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "email_server, smtp_server = get_email_server(reset=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ROOT_EMAIL, password=ROOT_PASSWORD\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create new admin client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create\n", + "root_client.register(\n", + " name=\"second admin\", email=ADMIN_EMAIL, password=ADMIN_PW, password_verify=ADMIN_PW\n", + ")\n", + "# update role\n", + "new_user_id = root_client.users.search(email=ADMIN_EMAIL)[0].id\n", + "root_client.users.update(uid=new_user_id, role=\"admin\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# We cannot delete the root client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_admin_id = root_client.users.search(email=ROOT_EMAIL)[0].id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with sy.raises(sy.SyftException):\n", + " high_client.users.delete(root_admin_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create ephemeral admin and delete it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# register 2nd new admin (to delete)\n", + "user_email, user_pw = \"admin3@bigquery.org\", \"bqpw3\"\n", + "## create\n", + "root_client.register(\n", + " name=\"x\", email=user_email, password=user_pw, password_verify=user_pw\n", + ")\n", + "## update role\n", + "new_user_id2 = root_client.users.search(email=user_email)[0].id\n", + "root_client.users.update(uid=new_user_id, role=\"admin\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_client.users.delete(new_user_id2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/001-scale-delete-worker-pools.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/001-scale-delete-worker-pools.ipynb new file mode 100644 index 00000000000..0caab490376 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/001-scale-delete-worker-pools.ipynb @@ -0,0 +1,399 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "# import os\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# stdlib\n", + "import os\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "\n", + "# third party\n", + "from email_helpers import Timeout\n", + "from email_helpers import get_email_server\n", + "# isort: on" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "environment = os.environ.get(\"ORCHESTRA_DEPLOYMENT_TYPE\", \"python\")\n", + "\n", + "num_workers = int(os.environ.get(\"NUM_TEST_WORKERS\", 1))\n", + "\n", + "ROOT_EMAIL = \"admin@bigquery.org\"\n", + "ROOT_PASSWORD = \"bqpw\"\n", + "environment" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "### Launch server & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=\"8080\",\n", + " n_consumers=num_workers, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "email_server, smtp_server = get_email_server(reset=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ROOT_EMAIL, password=ROOT_PASSWORD\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "high_client.worker_pools" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "default_worker_pool = high_client.worker_pools.get_by_name(\"default-pool\")\n", + "default_worker_pool" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "### Scale Worker pool" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "##### Scale up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "# Scale to 1\n", + "if environment == \"remote\":\n", + " high_client.api.worker_pool.scale(\n", + " number=num_workers, pool_name=default_worker_pool.name\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "high_client.api.services.worker_pool[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "# Scale up workers\n", + "if environment == \"remote\":\n", + " scale_up_result = high_client.api.worker_pool.scale(\n", + " number=5, pool_name=default_worker_pool.name\n", + " )\n", + " if environment == \"remote\":\n", + " assert scale_up_result, scale_up_result\n", + "\n", + " assert (\n", + " high_client.api.services.worker_pool[default_worker_pool.name].max_count\n", + " == 5\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "##### Scale down" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "# Scale down workers, this gracefully shutdowns the consumers\n", + "if environment == \"remote\":\n", + " scale_down_result = high_client.api.worker_pool.scale(\n", + " number=num_workers, pool_name=default_worker_pool.name\n", + " )\n", + " assert scale_down_result, scale_down_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "if environment == \"remote\":\n", + "\n", + " def has_worker_scaled_down():\n", + " return (\n", + " high_client.api.worker_pool[default_worker_pool.name].max_count\n", + " == num_workers\n", + " )\n", + "\n", + " worker_scale_timeout = Timeout(timeout_duration=20)\n", + " worker_scale_timeout.run_with_timeout(has_worker_scaled_down)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "if environment == \"remote\":\n", + " assert (\n", + " high_client.api.services.worker_pool[default_worker_pool.name].max_count\n", + " == num_workers\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "#### Delete Worker Pool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "pool_delete_result = high_client.api.services.worker_pool.delete(\n", + " pool_name=default_worker_pool.name\n", + ")\n", + "pool_delete_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "with sy.raises(KeyError):\n", + " _ = high_client.api.services.worker_pool[default_worker_pool.name]" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "#### Re-launch the default worker pool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "default_worker_image = default_worker_pool.image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "launch_result = high_client.api.services.worker_pool.launch(\n", + " pool_name=default_worker_pool.name,\n", + " image_uid=default_worker_image.id,\n", + " num_workers=num_workers,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "assert high_client.api.services.worker_pool[default_worker_pool.name]\n", + "assert (\n", + " high_client.api.services.worker_pool[default_worker_pool.name].max_count\n", + " == num_workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26", + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/010-setup-bigquery-pool.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/010-setup-bigquery-pool.ipynb new file mode 100644 index 00000000000..d40acbcb36d --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/010-setup-bigquery-pool.ipynb @@ -0,0 +1,569 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import os\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# stdlib\n", + "import os\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "from syft import test_settings\n", + "\n", + "# third party\n", + "from email_helpers import get_email_server\n", + "# isort: on" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "environment = os.environ.get(\"ORCHESTRA_DEPLOYMENT_TYPE\", \"python\")\n", + "environment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Launch & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_, smtp_server = get_email_server(reset=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"\n", + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Submit images and build pools" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add registry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.worker_pools" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.worker_pools.get_all()) == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "external_registry = test_settings.get(\"external_registry\", default=\"docker.io\")\n", + "external_registry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = high_client.api.services.image_registry.add(external_registry)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image_registry_list = high_client.api.services.image_registry.get_all()\n", + "image_registry_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_registry = image_registry_list[0]\n", + "local_registry" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dockerfile_list = high_client.images.get_all()\n", + "dockerfile_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_worker_image = next(\n", + " (\n", + " image\n", + " for image in dockerfile_list\n", + " if image.is_prebuilt and \"syft-backend\" in str(image.config)\n", + " ),\n", + " None,\n", + ")\n", + "base_worker_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_worker_image.image_identifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "worker_dockerfile = f\"\"\"\n", + "FROM {str(base_worker_image.image_identifier)}\n", + "\n", + "RUN uv pip install db-dtypes google-cloud-bigquery\n", + "\n", + "\"\"\".strip()\n", + "worker_dockerfile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docker_config = sy.DockerWorkerConfig(dockerfile=worker_dockerfile)\n", + "assert docker_config.dockerfile == worker_dockerfile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "submit_result = high_client.api.services.worker_image.submit(\n", + " worker_config=docker_config\n", + ")\n", + "submit_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dockerfile_list = high_client.images.get_all()\n", + "dockerfile_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "worker_image = next(\n", + " (\n", + " image\n", + " for image in dockerfile_list\n", + " if not image.is_prebuilt and image.config.dockerfile == worker_dockerfile\n", + " ),\n", + " None,\n", + ")\n", + "worker_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(worker_image.config.dockerfile)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# worker_docker_tag = f\"openmined/bigquery:{sy.__version__}\"\n", + "# worker_docker_tag" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docker_tag = str(base_worker_image.image_identifier).replace(\n", + " \"backend\", \"worker-bigquery\"\n", + ")\n", + "docker_tag" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if environment == \"remote\":\n", + " docker_build_result = high_client.api.services.worker_image.build(\n", + " image_uid=worker_image.id,\n", + " tag=docker_tag,\n", + " registry_uid=local_registry.id,\n", + " )\n", + " print(docker_build_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if environment == \"remote\":\n", + " push_result = high_client.api.services.worker_image.push(worker_image.id)\n", + " print(push_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dockerfile_list = high_client.images.get_all()\n", + "dockerfile_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docker_config = sy.PrebuiltWorkerConfig(tag=docker_tag)\n", + "docker_config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# overwrite it for now Mongo ignore\n", + "result = high_client.api.services.worker_image.submit(worker_config=docker_config)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dockerfile_list = high_client.images.get_all()\n", + "dockerfile_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: fix\n", + "# something is wrong here, sometimes it has the non prebuilt one\n", + "# other times it only has the one we built; in python there are multiple\n", + "# for now lets just use which ever one has worker-bigquery in its\n", + "# identifier so we can create a k8s worker pool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "worker_image = next(\n", + " (\n", + " image\n", + " for image in dockerfile_list\n", + " if \"worker-bigquery\" in str(image.image_identifier)\n", + " ),\n", + " None,\n", + ")\n", + "worker_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert worker_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "worker_pool_name = \"bigquery-pool\"\n", + "custom_pool_pod_annotations = {\"bigquery-custom-pool\": \"Pod annotation for bigquery\"}\n", + "custom_pool_pod_labels = {\"bigquery-custom-pool\": \"Pod_label_for_bigquery\"}\n", + "\n", + "num_workers = int(os.environ.get(\"NUM_TEST_WORKERS\", 1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Launch pool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = high_client.api.services.worker_pool.launch(\n", + " pool_name=worker_pool_name,\n", + " image_uid=worker_image.id,\n", + " num_workers=1,\n", + " pod_annotations=custom_pool_pod_annotations,\n", + " pod_labels=custom_pool_pod_labels,\n", + ")\n", + "result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scale pool" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if environment == \"remote\":\n", + " result = high_client.worker_pools.scale(\n", + " number=num_workers, pool_name=worker_pool_name\n", + " )\n", + " print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.worker_pools.get_all()) == 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.settings.allow_guest_signup(enable=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.api.services.user.get_all()) == 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/011-users-emails-passwords.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/011-users-emails-passwords.ipynb new file mode 100644 index 00000000000..4206222a490 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/011-users-emails-passwords.ipynb @@ -0,0 +1,680 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "# import os\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"\n", + "# # !pip install aiosmtpd\n", + "# # !uv pip install aiosmtpd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# stdlib\n", + "import os\n", + "\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "# third party\n", + "from email_helpers import SENDER\n", + "from email_helpers import create_user\n", + "from email_helpers import get_email_server\n", + "from email_helpers import make_user\n", + "from email_helpers import save_users\n", + "# isort: on" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"\n", + "\n", + "environment = os.environ.get(\"ORCHESTRA_DEPLOYMENT_TYPE\", \"python\")\n", + "environment" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "# Launch server & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "email_server, smtp_server = get_email_server()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "high_client.users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: this should show SyftSuccess?\n", + "high_client.api.services.settings.enable_notifications(\n", + " email_sender=SENDER,\n", + " email_server=\"localhost\",\n", + " email_port=\"9025\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# # TODO: this should show SyftSuccess?\n", + "# high_client.api.services.settings.disable_notifications()" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "# Register users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "num_users = int(os.environ.get(\"NUM_TEST_USERS\", 5))\n", + "print(f\"registering {num_users} users\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "users = []\n", + "email_disable_index = 0\n", + "reset_password_index = 1\n", + "for i in range(num_users):\n", + " user = make_user()\n", + " user._email_server = email_server\n", + " create_user(high_client, user)\n", + " user.client = high_client\n", + " if email_disable_index == i:\n", + " user.email_disabled = True\n", + " if reset_password_index == i:\n", + " user.reset_password = True\n", + " users.append(user)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "save_users(users)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "# stdlib\n", + "import asyncio\n", + "\n", + "await asyncio.sleep(5)" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## Verify Emails are sent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "# everyone gets a welcome email\n", + "server_name = high_client.name\n", + "for user in users:\n", + " emails = user.emails\n", + " assert len(emails) == 1\n", + " welcome_email = user.emails[0]\n", + " assert welcome_email.email_from == SENDER\n", + " assert len(welcome_email.email_to) == 1\n", + " assert welcome_email.email_to[0] == user.email\n", + " assert f\"Welcome to {server_name}\" in welcome_email.email_content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "# one user disables notifications\n", + "# one user requests a password reset\n", + "no_email_user = None\n", + "reset_password_user = None\n", + "for user in users:\n", + " user.client = high_client # get user client\n", + " if user.email_disabled:\n", + " no_email_user = user\n", + " # disable for this user only\n", + " user.client.api.notifications.deactivate()\n", + "\n", + " if user.reset_password:\n", + " # ask admin for forgot password flow\n", + " user.client.guest().forgot_password(email=user.email)\n", + " assert \"Password Reset Requested\" in user.emails[1].email_content\n", + " reset_password_user = user" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "ds0 = users[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "ds0_user = ds0.client.account\n", + "ds0_user" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "# with sy.raises(\n", + "# sy.SyftException(public_message=\"*tried to update user*\"\n", + "# ), show=True): this is different on k8s no idea why\n", + "with sy.raises(sy.SyftException, show=True):\n", + " ds0.client.users.update(uid=ds0_user.id, role=\"admin\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "# with sy.raises(sy.SyftException(public_message=\"*tried to update user*\"), show=True):\n", + "with sy.raises(sy.SyftException, show=True):\n", + " ds0_user.update(role=\"admin\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: test disabling and re-enabling all notifications" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "# high_client.api.services.settings.disable_notifications()\n", + "# high_client.api.services.settings.enable_notifications()" + ] + }, + { + "cell_type": "markdown", + "id": "24", + "metadata": {}, + "source": [ + "## Test reset password" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "# # This is necessary as it sets the new token value in user.reset_token\n", + "token = reset_password_user.get_token()\n", + "token" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26", + "metadata": {}, + "outputs": [], + "source": [ + "# This is necessary as it sets the new password value in user.new_password\n", + "passw = reset_password_user.make_new_password()\n", + "passw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "assert token\n", + "assert passw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [ + "output = reset_password_user.client.guest().reset_password(\n", + " token=reset_password_user.reset_token, new_password=reset_password_user.new_password\n", + ")\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29", + "metadata": {}, + "outputs": [], + "source": [ + "assert isinstance(output, sy.SyftSuccess)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "# relogin\n", + "reset_password_user.relogin()\n", + "# reset_password_user.client = reset_password_user.client" + ] + }, + { + "cell_type": "markdown", + "id": "31", + "metadata": {}, + "source": [ + "## Reset password second time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "reset_password_user.client.guest().forgot_password(email=reset_password_user.email)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33", + "metadata": {}, + "outputs": [], + "source": [ + "output = reset_password_user.client.guest().reset_password(\n", + " token=reset_password_user.get_token(),\n", + " new_password=reset_password_user.make_new_password(),\n", + ")\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34", + "metadata": {}, + "outputs": [], + "source": [ + "assert isinstance(output, sy.SyftSuccess)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35", + "metadata": {}, + "outputs": [], + "source": [ + "# print(f\"token:\\t\\t {reset_password_user.reset_token}\\n\\\n", + "# password:\\t {reset_password_user.password}\\n\\\n", + "# new password:\\t {reset_password_user.new_password}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "reset_password_user.update_password()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37", + "metadata": {}, + "outputs": [], + "source": [ + "# print(f\"token:\\t\\t {reset_password_user.reset_token}\\n\\\n", + "# password:\\t {reset_password_user.password}\\n\\\n", + "# new password:\\t {reset_password_user.new_password}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38", + "metadata": {}, + "outputs": [], + "source": [ + "# relogin\n", + "reset_password_user.relogin()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39", + "metadata": {}, + "outputs": [], + "source": [ + "save_users(users)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [ + "reset_password_user" + ] + }, + { + "cell_type": "markdown", + "id": "41", + "metadata": {}, + "source": [ + "## Reduce token expiration and try resetting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42", + "metadata": {}, + "outputs": [], + "source": [ + "# Variable is poorly named, token expiration time is in seconds and not minutes\n", + "high_client.api.services.settings.update(pwd_token_config={\"token_exp_min\": 3})\n", + "high_client.refresh()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43", + "metadata": {}, + "outputs": [], + "source": [ + "# reset_password_user.client.guest().forgot_password(email=reset_password_user.email)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44", + "metadata": {}, + "outputs": [], + "source": [ + "# Wait 3 seconds to ensure token expires\n", + "await asyncio.sleep(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45", + "metadata": {}, + "outputs": [], + "source": [ + "# This should throw a SyftError because we waited too long\n", + "# output = reset_password_user.client.guest().reset_password(\n", + "# token=reset_password_user.get_token(),\n", + "# new_password=reset_password_user.make_new_password(),\n", + "# )\n", + "# output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46", + "metadata": {}, + "outputs": [], + "source": [ + "# assert isinstance(output, sy.SyftError)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47", + "metadata": {}, + "outputs": [], + "source": [ + "# # relogin\n", + "# with sy.raises(sy.SyftException, show=True):\n", + "# reset_password_user.relogin()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48", + "metadata": {}, + "outputs": [], + "source": [ + "# Set things back to the the default settings\n", + "high_client.api.services.settings.update(pwd_token_config={\"token_exp_min\": 1800})\n", + "high_client.refresh()" + ] + }, + { + "cell_type": "markdown", + "id": "49", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50", + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51", + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/020-configure-api.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/020-configure-api.ipynb new file mode 100644 index 00000000000..4a7954cf5e7 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/020-configure-api.ipynb @@ -0,0 +1,663 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import os\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set to use the live APIs\n", + "# os.environ[\"TEST_BIGQUERY_APIS_LIVE\"] = \"True\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# stdlib\n", + "import os\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "from syft import test_settings\n", + "\n", + "# third party\n", + "from apis import make_schema\n", + "from apis import make_submit_query\n", + "from apis import make_test_query\n", + "\n", + "# run email server\n", + "from email_helpers import get_email_server\n", + "# isort: on" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Launch server & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "email_server, smtp_server = get_email_server()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "this_worker_pool_name = \"bigquery-pool\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.worker_pools.get_all()) == 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create `test_query` endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip list | grep bigquery" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !uv pip install db-dtypes google-cloud-bigquery" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Look up the worker pools and identify the name of the one that has the required packages\n", + "# After, bind the endpoint to that workerpool\n", + "high_client.worker_pools" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mock_func = make_test_query(\n", + " settings={\n", + " \"rate_limiter_enabled\": True,\n", + " \"calls_per_min\": 10,\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "private_func = make_test_query(\n", + " settings={\n", + " \"rate_limiter_enabled\": False,\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_endpoint = sy.TwinAPIEndpoint(\n", + " path=\"bigquery.test_query\",\n", + " description=\"This endpoint allows to query Bigquery storage via SQL queries.\",\n", + " private_function=private_func,\n", + " mock_function=mock_func,\n", + " worker_pool=this_worker_pool_name,\n", + ")\n", + "\n", + "high_client.custom_api.add(endpoint=new_endpoint)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Update `test_query` endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Here, we update the endpoint to timeout after 100s (rather the default of 60s)\n", + "high_client.api.services.api.update(\n", + " endpoint_path=\"bigquery.test_query\", endpoint_timeout=120\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.api.services.api.update(\n", + " endpoint_path=\"bigquery.test_query\", hide_mock_definition=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test `test_query` endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_1 = test_settings.get(\"dataset_1\", default=\"dataset_1\")\n", + "dataset_2 = test_settings.get(\"dataset_2\", default=\"dataset_2\")\n", + "table_1 = test_settings.get(\"table_1\", default=\"table_1\")\n", + "table_2 = test_settings.get(\"table_2\", default=\"table_2\")\n", + "table_2_col_id = test_settings.get(\"table_2_col_id\", default=\"table_id\")\n", + "table_2_col_score = test_settings.get(\"table_2_col_score\", default=\"colname\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test mock version\n", + "result = high_client.api.services.bigquery.test_query.mock(\n", + " sql_query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 10\"\n", + ")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(result) == 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test private version\n", + "result = high_client.api.services.bigquery.test_query.private(\n", + " sql_query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 10\"\n", + ")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(result) == 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test mock version for wrong queries\n", + "with sy.raises(\n", + " sy.SyftException(public_message=\"*must be qualified with a dataset*\"), show=True\n", + "):\n", + " _ = high_client.api.services.bigquery.test_query.mock(\n", + " sql_query=\"SELECT * FROM invalid_table LIMIT 1\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test private version\n", + "result = high_client.api.services.bigquery.test_query.private(\n", + " sql_query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 1\"\n", + ")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(result) == 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inspect endpoint state" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Inspect the context state on an endpoint\n", + "state = high_client.api.services.bigquery.test_query.mock.context.state\n", + "state" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(state[ADMIN_EMAIL])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(state[ADMIN_EMAIL]) >= 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create `schema` endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "schema_function = make_schema(\n", + " settings={\n", + " \"calls_per_min\": 5,\n", + " },\n", + " worker_pool=this_worker_pool_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.custom_api.add(endpoint=schema_function)\n", + "high_client.refresh()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test `schema` endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Testing schema\n", + "result = high_client.api.services.bigquery.schema()\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(result) == 23" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create `submit_query` endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "submit_query_function = make_submit_query(\n", + " settings={}, worker_pool=this_worker_pool_name\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.custom_api.add(endpoint=submit_query_function)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.api.services.api.update(\n", + " endpoint_path=\"bigquery.submit_query\", hide_mock_definition=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.custom_api.api_endpoints()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.custom_api.api_endpoints()) == 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.api.services.bigquery.test_query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.api.services.bigquery.submit_query" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test `submit_query` endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Testing submit query\n", + "result = high_client.api.services.bigquery.submit_query(\n", + " func_name=\"my_func\",\n", + " query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 1\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test emails" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# assert (\n", + "# \"Job Failed\"\n", + "# in email_server.get_emails_for_user(user_email=ADMIN_EMAIL)[0].email_content\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# email_server.get_emails_for_user(user_email=\"admin@bigquery.org\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: change this to be all admins or configure which ones etc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# assert len(email_server.get_emails_for_user(user_email=\"admin@bigquery.org\")) > 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# assert \"Query submitted\" in result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/021-create-jobs.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/021-create-jobs.ipynb new file mode 100644 index 00000000000..87a3033d9d8 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/021-create-jobs.ipynb @@ -0,0 +1,515 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "# stdlib\n", + "# import os\n", + "\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# use_live_bigquery = False\n", + "# os.environ[\"TEST_BIGQUERY_APIS_LIVE\"] = str(use_live_bigquery)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# stdlib\n", + "from collections import Counter\n", + "\n", + "import os\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "from syft.service.job.job_stash import JobStatus\n", + "\n", + "# third party\n", + "from email_helpers import get_email_server\n", + "\n", + "# isort: on\n", + "# third party\n", + "from job_helpers import TestJob\n", + "from job_helpers import create_jobs\n", + "from job_helpers import extract_code_path\n", + "from job_helpers import save_jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "environment = os.environ.get(\"ORCHESTRA_DEPLOYMENT_TYPE\", \"python\")\n", + "environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "SERVER_PORT = \"8080\"\n", + "SERVER_URL = f\"http://localhost:{SERVER_PORT}\"\n", + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "# Launch server & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=SERVER_PORT,\n", + " n_consumers=4, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "email_server, smtp_server = get_email_server(reset=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# third party\n", + "from email_helpers import load_users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "[user.email for user in high_client.users.get_all()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "users = load_users(high_client)" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "# Create jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "# Inspect job data (requests for these jobs to be created)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "num_jobs = int(os.environ.get(\"NUM_TEST_JOBS\", 10))\n", + "\n", + "jobs_data = create_jobs(users, total_jobs=num_jobs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "counts = Counter([j.job_type for j in jobs_data])\n", + "for k, v in counts.most_common():\n", + " print(f\"{k}: #{v}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"{len(jobs_data)=}\")\n", + "\n", + "for job in jobs_data:\n", + " print(f\"{job.job_type=}, {job.should_succeed=}, {job.should_submit=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "assert len(jobs_data) == num_jobs\n", + "assert all(isinstance(j, TestJob) for j in jobs_data)\n", + "assert all(job.client is not None for job in jobs_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "save_jobs(jobs_data)" + ] + }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "# Submit jobs\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "admin_emails_before = len(email_server.get_emails_for_user(\"admin@bigquery.org\"))\n", + "print(f\"{admin_emails_before=}\")" + ] + }, + { + "cell_type": "markdown", + "id": "23", + "metadata": {}, + "source": [ + "## Test Succesful jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "jobs_submit_should_succeed = [j for j in jobs_data if j.should_submit]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "for job in jobs_submit_should_succeed:\n", + " client = job.client\n", + " response = client.api.services.bigquery.submit_query(\n", + " func_name=job.func_name, query=job.query\n", + " )\n", + " job.code_path = extract_code_path(response)" + ] + }, + { + "cell_type": "markdown", + "id": "26", + "metadata": {}, + "source": [ + "## Test failures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "jobs_submit_should_fail = [j for j in jobs_data if not j.should_submit]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [ + "for job in jobs_submit_should_fail:\n", + " client = job.client\n", + "\n", + " with sy.raises(sy.SyftException):\n", + " client.api.services.bigquery.submit_query(\n", + " func_name=job.func_name, query=job.query\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29", + "metadata": {}, + "outputs": [], + "source": [ + "for job in jobs_data:\n", + " print(f\"Job {job.func_name:.20} {job.should_submit=}, {job.is_submitted=}\")\n", + "\n", + "assert all(job.is_submitted == job.should_submit for job in jobs_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "save_jobs(jobs_data)" + ] + }, + { + "cell_type": "markdown", + "id": "31", + "metadata": {}, + "source": [ + "## Test: cannot execute submitted jobs yet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "submitted_jobs = [job for job in jobs_data if job.should_submit]\n", + "job_execution_fns = [getattr(job.client.code, job.code_path) for job in submitted_jobs]\n", + "assert len(submitted_jobs) # failsafe for next tests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33", + "metadata": {}, + "outputs": [], + "source": [ + "for fn in job_execution_fns:\n", + " # blocking\n", + " with sy.raises(\n", + " sy.SyftException(public_message=\"*Your code is waiting for approval*\")\n", + " ):\n", + " result = fn()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34", + "metadata": {}, + "outputs": [], + "source": [ + "for fn in job_execution_fns:\n", + " # nonblocking\n", + " result_job = fn(blocking=False)\n", + " result_job.wait()\n", + " assert isinstance(result_job.result, sy.SyftError)\n", + " assert result_job.status == JobStatus.ERRORED" + ] + }, + { + "cell_type": "markdown", + "id": "35", + "metadata": {}, + "source": [ + "# Verify that admin has emails for submitted requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "num_should_submit = sum(j.should_submit for j in jobs_data)\n", + "admin_emails_after = len(email_server.get_emails_for_user(\"admin@bigquery.org\"))\n", + "print(\"admin emails after\", admin_emails_after)\n", + "assert admin_emails_after >= admin_emails_before + num_should_submit\n", + "# assert len(users_emails) > after_number_of_emails\n", + "# assert len(users_emails) == after_number_of_emails + 1" + ] + }, + { + "cell_type": "markdown", + "id": "37", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38", + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39", + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/040-do-review-requests.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/040-do-review-requests.ipynb new file mode 100644 index 00000000000..da95c7e5431 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/040-do-review-requests.ipynb @@ -0,0 +1,342 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import os\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# stdlib\n", + "import random\n", + "import os\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "from syft.service.job.job_stash import Job\n", + "\n", + "# third party\n", + "from email_helpers import get_email_server\n", + "from job_helpers import approve_by_running\n", + "from job_helpers import get_job_emails\n", + "from job_helpers import get_request_for_job_info\n", + "# isort: on" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start server & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "email_server, smtp_server = get_email_server()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Review requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# third party\n", + "from email_helpers import load_users\n", + "from job_helpers import load_jobs\n", + "from job_helpers import save_jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.requests.get_all_pending()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "users = load_users(high_client)\n", + "jobs_data = load_jobs(users, high_client)\n", + "all_requests = high_client.requests\n", + "submitted_jobs_data = [job for job in jobs_data if job.is_submitted]\n", + "n_emails_per_job_user = {\n", + " k: len(v)\n", + " for k, v in get_job_emails(submitted_jobs_data, high_client, email_server).items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO we should record whether it was approved or deposited\n", + "# and test doing both in either order as there might be a bug when\n", + "# force overwriting\n", + "# also changing deny to approve and back again" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run or deny" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "submitted_jobs_data_should_succeed = [\n", + " j for j in submitted_jobs_data if j.should_succeed\n", + "]\n", + "submitted_jobs_data_should_fail = [\n", + " j for j in submitted_jobs_data if not j.should_succeed\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in submitted_jobs_data_should_succeed:\n", + " request = get_request_for_job_info(all_requests, job)\n", + " if random.randrange(2):\n", + " choice = \"approved with deposit_result\"\n", + " response = approve_by_running(request)\n", + " assert isinstance(response, Job)\n", + " else:\n", + " choice = \"approved\"\n", + " response = request.approve()\n", + " assert isinstance(response, sy.SyftSuccess)\n", + " print(f\"Job {job.func_name} should succeed: {job.should_succeed} and was {choice}\")\n", + " job.admin_reviewed = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in submitted_jobs_data_should_fail:\n", + " request = get_request_for_job_info(all_requests, job)\n", + " response = request.deny(\n", + " reason=f\"Your request {job.func_name} looks wrong, try again.\"\n", + " )\n", + " assert isinstance(response, sy.SyftSuccess)\n", + " assert not job.should_succeed\n", + " job.admin_reviewed = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Verify that users have new emails" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_n_emails_per_job_user = {\n", + " k: len(v)\n", + " for k, v in get_job_emails(submitted_jobs_data, high_client, email_server).items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for user_email, new_count in new_n_emails_per_job_user.items():\n", + "# old_count = n_emails_per_job_user[user_email]\n", + "# assert new_count > old_count" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Save state" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "save_jobs(jobs_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.requests.get_all_approved()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.requests.get_all_rejected()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/050-ds-get-results.ipynb b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/050-ds-get-results.ipynb new file mode 100644 index 00000000000..81e0e288a43 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/050-ds-get-results.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import os\n", + "# os.environ[\"ORCHESTRA_DEPLOYMENT_TYPE\"] = \"remote\"\n", + "# os.environ[\"DEV_MODE\"] = \"True\"\n", + "# os.environ[\"TEST_EXTERNAL_REGISTRY\"] = \"k3d-registry.localhost:5800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# isort: off\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.util.util import (\n", + " find_base_dir_with_tox_ini,\n", + " get_caller_file_path,\n", + " is_interpreter_jupyter,\n", + ")\n", + "\n", + "import os\n", + "import sys\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(\n", + " base_dir, \"notebooks/scenarios/bigquery/upgradability/0.9.1_helpers\"\n", + " )\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()\n", + "# third party\n", + "from email_helpers import get_email_server\n", + "from email_helpers import load_users\n", + "from job_helpers import load_jobs\n", + "from job_helpers import save_jobs\n", + "# isort: on" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Launch server & login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client = sy.login(\n", + " url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "email_server, smtp_server = get_email_server()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "users = load_users(high_client)\n", + "jobs = load_jobs(users, high_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# submitted_jobs = [job for job in jobs if job.is_submitted]\n", + "reviewed_jobs = [job for job in jobs if job.admin_reviewed]\n", + "reviewed_jobs_should_succeed = [j for j in reviewed_jobs if j.should_succeed]\n", + "reviewed_jobs_should_fail = [j for j in reviewed_jobs if not j.should_succeed]\n", + "\n", + "print(\n", + " f\"{len(reviewed_jobs)=}, {len(reviewed_jobs_should_succeed)=}, {len(reviewed_jobs_should_fail)=}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: test jobs that were never approved\n", + "# they seem to give weird errors like\n", + "# \"You uploaded an ActionObject that is not yet in the blob storage\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in reviewed_jobs_should_succeed:\n", + " print(f\"> Checking job: {job.job_type} {job.func_name} for user {job.user_email}\")\n", + " api_method = job.code_method\n", + " j = api_method(blocking=False)\n", + " res = j.wait()\n", + "\n", + " if isinstance(res, sy.SyftError):\n", + " raise sy.SyftException(public_message=\"Expected success, got error\")\n", + "\n", + " result = res.get()\n", + " job.result_as_expected = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in reviewed_jobs_should_fail:\n", + " print(f\"> Checking job: {job.job_type} {job.func_name} for user {job.user_email}\")\n", + " api_method = job.code_method\n", + "\n", + " j = api_method(blocking=False)\n", + " res = j.wait()\n", + " if isinstance(res, sy.SyftError):\n", + " job.result_as_expected = True\n", + " else:\n", + " raise sy.SyftException(public_message=f\"failed, job didnt raise {type(j)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "save_jobs(jobs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO fix\n", + "expected_jobs = [job for job in jobs if job.result_as_expected]\n", + "print(f\"got expected_jobs: {len(expected_jobs)} == reviewed_jobs: {len(reviewed_jobs)}\")\n", + "assert len(reviewed_jobs) == len(expected_jobs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "smtp_server.stop()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/emails.json b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/emails.json new file mode 100644 index 00000000000..dc22a271960 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/emails.json @@ -0,0 +1,107 @@ +{ + "admin@bigquery.org": [ + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============5231718646336112954==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (6091) has been received!\r\n\r\n--===============5231718646336112954==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 60916f4d58504b7f91508b9e11e2fe03

\r\n

\r\n Submitted By:\r\n Michael Adams\r\n

\r\n

Date: 2024-09-23 10:22:57

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"60916f4d58504b7f91508b9e11e2fe03\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============5231718646336112954==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============2108625785401698392==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (a850) has been received!\r\n\r\n--===============2108625785401698392==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: a850407831c64b928c4733db4393bb84

\r\n

\r\n Submitted By:\r\n Lisa Clark\r\n

\r\n

Date: 2024-09-23 10:23:08

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"a850407831c64b928c4733db4393bb84\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============2108625785401698392==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============3190702502919456718==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (e0cf) has been received!\r\n\r\n--===============3190702502919456718==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: e0cf4aec1e80443fb49b54ab1ee460c3

\r\n

\r\n Submitted By:\r\n Anthony Simpson\r\n

\r\n

Date: 2024-09-23 10:23:18

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"e0cf4aec1e80443fb49b54ab1ee460c3\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============3190702502919456718==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============6538833477960096742==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (291e) has been received!\r\n\r\n--===============6538833477960096742==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 291ec8c54d594443ba5cdd39176c506d

\r\n

\r\n Submitted By:\r\n Ian Ray\r\n

\r\n

Date: 2024-09-23 10:23:28

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"291ec8c54d594443ba5cdd39176c506d\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============6538833477960096742==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============4270422105925677111==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (3573) has been received!\r\n\r\n--===============4270422105925677111==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 35736f1adf34471a8d805e53d29883ed

\r\n

\r\n Submitted By:\r\n Marie Russo\r\n

\r\n

Date: 2024-09-23 10:23:39

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"35736f1adf34471a8d805e53d29883ed\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============4270422105925677111==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============1927382439090520103==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (14f6) has been received!\r\n\r\n--===============1927382439090520103==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 14f66d6a120e4c3aadb123b2e8dd8805

\r\n

\r\n Submitted By:\r\n Michael Adams\r\n

\r\n

Date: 2024-09-23 10:23:49

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"14f66d6a120e4c3aadb123b2e8dd8805\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============1927382439090520103==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============1564302604072025353==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (1418) has been received!\r\n\r\n--===============1564302604072025353==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 141875e5343a48c8aef17f642a8c91e7

\r\n

\r\n Submitted By:\r\n Lisa Clark\r\n

\r\n

Date: 2024-09-23 10:23:59

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"141875e5343a48c8aef17f642a8c91e7\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============1564302604072025353==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============5221322304721055502==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (8fae) has been received!\r\n\r\n--===============5221322304721055502==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 8fae4312f60742b093c89a968d2a1453

\r\n

\r\n Submitted By:\r\n Anthony Simpson\r\n

\r\n

Date: 2024-09-23 10:24:09

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"8fae4312f60742b093c89a968d2a1453\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============5221322304721055502==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["admin@bigquery.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============7638781143527035342==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: admin@bigquery.org\r\nSubject: Datasite bigquery-high-migrations: A New Request (b910) has been received!\r\n\r\n--===============7638781143527035342==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

A new request has been submitted and requires your attention.\r\n Please review the details below:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: b91086795fda4b2aa8978ae1d4449f78

\r\n

\r\n Submitted By:\r\n Ian Ray\r\n

\r\n

Date: 2024-09-23 10:24:20

\r\n

Status:

\r\n PENDING\r\n
\r\n

Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"b91086795fda4b2aa8978ae1d4449f78\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============7638781143527035342==--\r\n" + } + ], + "lisa-clark-fake@openmined.org": [ + { + "email_from": "noreply@openmined.org", + "email_to": ["lisa-clark-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============0070695112275164508==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: lisa-clark-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (a850) has been approved. \r\n\r\n--===============0070695112275164508==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: a850407831c64b928c4733db4393bb84

\r\n

\r\n Submitted By:\r\n Lisa Clark lisa-clark-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:23:08

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"a850407831c64b928c4733db4393bb84\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============0070695112275164508==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["lisa-clark-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============6956594247645420946==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: lisa-clark-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (a850) has been approved. \r\n\r\n--===============6956594247645420946==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: a850407831c64b928c4733db4393bb84

\r\n

\r\n Submitted By:\r\n Lisa Clark lisa-clark-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:23:08

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange,ActionStoreChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"a850407831c64b928c4733db4393bb84\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============6956594247645420946==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["lisa-clark-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============4955025587982972474==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: lisa-clark-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (1418) has been denied. \r\n\r\n--===============4955025587982972474==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 141875e5343a48c8aef17f642a8c91e7

\r\n

\r\n Submitted By:\r\n Lisa Clark lisa-clark-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:23:59

\r\n

Status:

\r\n REJECTED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"141875e5343a48c8aef17f642a8c91e7\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============4955025587982972474==--\r\n" + } + ], + "anthony-simpson-fake@openmined.org": [ + { + "email_from": "noreply@openmined.org", + "email_to": ["anthony-simpson-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============4618684281710170390==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: anthony-simpson-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (e0cf) has been approved. \r\n\r\n--===============4618684281710170390==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: e0cf4aec1e80443fb49b54ab1ee460c3

\r\n

\r\n Submitted By:\r\n Anthony Simpson anthony-simpson-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:23:18

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"e0cf4aec1e80443fb49b54ab1ee460c3\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============4618684281710170390==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["anthony-simpson-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============4796926491134373799==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: anthony-simpson-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (8fae) has been denied. \r\n\r\n--===============4796926491134373799==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 8fae4312f60742b093c89a968d2a1453

\r\n

\r\n Submitted By:\r\n Anthony Simpson anthony-simpson-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:24:09

\r\n

Status:

\r\n REJECTED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"8fae4312f60742b093c89a968d2a1453\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============4796926491134373799==--\r\n" + } + ], + "ian-ray-fake@openmined.org": [ + { + "email_from": "noreply@openmined.org", + "email_to": ["ian-ray-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============3263288618826687943==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: ian-ray-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (291e) has been approved. \r\n\r\n--===============3263288618826687943==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 291ec8c54d594443ba5cdd39176c506d

\r\n

\r\n Submitted By:\r\n Ian Ray ian-ray-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:23:28

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"291ec8c54d594443ba5cdd39176c506d\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============3263288618826687943==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["ian-ray-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============9068082627312456525==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: ian-ray-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (291e) has been approved. \r\n\r\n--===============9068082627312456525==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 291ec8c54d594443ba5cdd39176c506d

\r\n

\r\n Submitted By:\r\n Ian Ray ian-ray-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:23:28

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange,ActionStoreChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"291ec8c54d594443ba5cdd39176c506d\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============9068082627312456525==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["ian-ray-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============5570947561616756360==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: ian-ray-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (b910) has been approved. \r\n\r\n--===============5570947561616756360==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: b91086795fda4b2aa8978ae1d4449f78

\r\n

\r\n Submitted By:\r\n Ian Ray ian-ray-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:24:20

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"b91086795fda4b2aa8978ae1d4449f78\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============5570947561616756360==--\r\n" + }, + { + "email_from": "noreply@openmined.org", + "email_to": ["ian-ray-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============2122230739167057876==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: ian-ray-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (b910) has been approved. \r\n\r\n--===============2122230739167057876==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: b91086795fda4b2aa8978ae1d4449f78

\r\n

\r\n Submitted By:\r\n Ian Ray ian-ray-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:24:20

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange,ActionStoreChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"b91086795fda4b2aa8978ae1d4449f78\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============2122230739167057876==--\r\n" + } + ], + "marie-russo-fake@openmined.org": [ + { + "email_from": "noreply@openmined.org", + "email_to": ["marie-russo-fake@openmined.org"], + "email_content": "Content-Type: multipart/alternative; boundary=\"===============6066415345440146385==\"\r\nMIME-Version: 1.0\r\nFrom: noreply@openmined.org\r\nTo: marie-russo-fake@openmined.org\r\nSubject: Datasite bigquery-high-migrations: Your request (3573) has been approved. \r\n\r\n--===============6066415345440146385==\r\nContent-Type: text/html; charset=\"us-ascii\"\r\nMIME-Version: 1.0\r\nContent-Transfer-Encoding: 7bit\r\n\r\n\r\n \r\n Access Request Notification\r\n \r\n \r\n \r\n
\r\n
\r\n Request Notification\r\n
\r\n
\r\n

Hello,

\r\n

The status of your recent request has been updated.\r\n Below is the latest information regarding it:

\r\n\r\n
\r\n
Request Details
\r\n
\r\n\r\n

ID: 35736f1adf34471a8d805e53d29883ed

\r\n

\r\n Submitted By:\r\n Marie Russo marie-russo-fake@openmined.org\r\n

\r\n

Date: 2024-09-23 10:23:39

\r\n

Status:

\r\n APPROVED\r\n
\r\n

\r\n Changes:\r\n UserCodeStatusChange\r\n

\r\n\r\n

Use:
\r\n \r\n request = client.api.services.request.get_by_uid(uid=sy.UID(\"35736f1adf34471a8d805e53d29883ed\"))\r\n
\r\n to get this specific request.\r\n

\r\n\r\n

Or you can view all requests with:
\r\n \r\n client.requests\r\n \r\n

\r\n
\r\n
\r\n

If you did not expect this request or have concerns about it,\r\n please contact our support team immediately.

\r\n
\r\n
\r\n This is an automated message, please do not reply directly to this email.
\r\n For assistance, please contact our support team.\r\n
\r\n
\r\n \r\n \r\n--===============6066415345440146385==--\r\n" + } + ] +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/jobs.json b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/jobs.json new file mode 100644 index 00000000000..4f9d247146a --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/jobs.json @@ -0,0 +1,180 @@ +{ + "michael-adams-fake@openmined.org": [ + { + "user_email": "michael-adams-fake@openmined.org", + "func_name": "simple_query_e92e5a", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_2.table_1\n GROUP BY table_id\n LIMIT 717808", + "job_type": "simple_query", + "settings": { + "dataset": "dataset_2", + "table": "table_1", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 717808 + }, + "should_succeed": true, + "should_submit": true, + "code_path": "simple_query_e92e5a", + "admin_reviewed": true, + "result_as_expected": true + }, + { + "user_email": "michael-adams-fake@openmined.org", + "func_name": "simple_query_4e10ab", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_1.table_1\n GROUP BY table_id\n LIMIT 512806", + "job_type": "simple_query", + "settings": { + "dataset": "dataset_1", + "table": "table_1", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 512806 + }, + "should_succeed": true, + "should_submit": true, + "code_path": "simple_query_4e10ab", + "admin_reviewed": true, + "result_as_expected": true + } + ], + "lisa-clark-fake@openmined.org": [ + { + "user_email": "lisa-clark-fake@openmined.org", + "func_name": "simple_query_5374dd", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_1.table_1\n GROUP BY table_id\n LIMIT 55958", + "job_type": "simple_query", + "settings": { + "dataset": "dataset_1", + "table": "table_1", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 55958 + }, + "should_succeed": true, + "should_submit": true, + "code_path": "simple_query_5374dd", + "admin_reviewed": true, + "result_as_expected": true + }, + { + "user_email": "lisa-clark-fake@openmined.org", + "func_name": "wrong_syntax_query_cf08ee", + "query": "SELECT * FROM table INCORRECT SYNTAX", + "job_type": "wrong_syntax_query", + "settings": {}, + "should_succeed": false, + "should_submit": true, + "code_path": "wrong_syntax_query_cf08ee", + "admin_reviewed": true, + "result_as_expected": true + } + ], + "anthony-simpson-fake@openmined.org": [ + { + "user_email": "anthony-simpson-fake@openmined.org", + "func_name": "simple_query_e46828", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_2.table_2\n GROUP BY table_id\n LIMIT 346093", + "job_type": "simple_query", + "settings": { + "dataset": "dataset_2", + "table": "table_2", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 346093 + }, + "should_succeed": true, + "should_submit": true, + "code_path": "simple_query_e46828", + "admin_reviewed": true, + "result_as_expected": true + }, + { + "user_email": "anthony-simpson-fake@openmined.org", + "func_name": "job_too_much_text_47c92e", + "query": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "job_type": "job_too_much_text", + "settings": {}, + "should_succeed": false, + "should_submit": true, + "code_path": "job_too_much_text_47c92e", + "admin_reviewed": true, + "result_as_expected": true + } + ], + "ian-ray-fake@openmined.org": [ + { + "user_email": "ian-ray-fake@openmined.org", + "func_name": "simple_query_57f511", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_1.table_1\n GROUP BY table_id\n LIMIT 199607", + "job_type": "simple_query", + "settings": { + "dataset": "dataset_1", + "table": "table_1", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 199607 + }, + "should_succeed": true, + "should_submit": true, + "code_path": "simple_query_57f511", + "admin_reviewed": true, + "result_as_expected": true + }, + { + "user_email": "ian-ray-fake@openmined.org", + "func_name": "job_long_name_6a5b10aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_2.table_1\n GROUP BY table_id\n LIMIT 138346", + "job_type": "job_long_name", + "settings": { + "dataset": "dataset_2", + "table": "table_1", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 138346 + }, + "should_succeed": true, + "should_submit": true, + "code_path": "job_long_name_6a5b10aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "admin_reviewed": true, + "result_as_expected": true + } + ], + "marie-russo-fake@openmined.org": [ + { + "user_email": "marie-russo-fake@openmined.org", + "func_name": "simple_query_079c08", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_1.table_1\n GROUP BY table_id\n LIMIT 381248", + "job_type": "simple_query", + "settings": { + "dataset": "dataset_1", + "table": "table_1", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 381248 + }, + "should_succeed": true, + "should_submit": true, + "code_path": "simple_query_079c08", + "admin_reviewed": true, + "result_as_expected": true + }, + { + "user_email": "marie-russo-fake@openmined.org", + "func_name": "job_funcname_xss_d77c1a", + "query": "SELECT table_id, AVG(colname) AS average_score\n FROM dataset_1.table_1\n GROUP BY table_id\n LIMIT 640290", + "job_type": "job_funcname_xss", + "settings": { + "dataset": "dataset_1", + "table": "table_1", + "groupby_col": "table_id", + "score_col": "colname", + "limit": 640290 + }, + "should_succeed": true, + "should_submit": false, + "code_path": null, + "admin_reviewed": false, + "result_as_expected": null + } + ] +} diff --git a/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/users.json b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/users.json new file mode 100644 index 00000000000..c2ef3885a77 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks/users.json @@ -0,0 +1,52 @@ +[ + { + "name": "Michael Adams", + "email": "michael-adams-fake@openmined.org", + "password": "qRPt9Ua0_6", + "role": "ServiceRole.DATA_SCIENTIST", + "new_password": null, + "email_disabled": true, + "reset_password": false, + "reset_token": null + }, + { + "name": "Lisa Clark", + "email": "lisa-clark-fake@openmined.org", + "password": "tC9fZrku_o", + "role": "ServiceRole.DATA_SCIENTIST", + "new_password": null, + "email_disabled": false, + "reset_password": true, + "reset_token": "D9893xYYM1HO" + }, + { + "name": "Anthony Simpson", + "email": "anthony-simpson-fake@openmined.org", + "password": ")@28JBEb4c", + "role": "ServiceRole.DATA_SCIENTIST", + "new_password": null, + "email_disabled": false, + "reset_password": false, + "reset_token": null + }, + { + "name": "Ian Ray", + "email": "ian-ray-fake@openmined.org", + "password": "rA9miV^sX$", + "role": "ServiceRole.DATA_SCIENTIST", + "new_password": null, + "email_disabled": false, + "reset_password": false, + "reset_token": null + }, + { + "name": "Marie Russo", + "email": "marie-russo-fake@openmined.org", + "password": "$4Asq)sSVb", + "role": "ServiceRole.DATA_SCIENTIST", + "new_password": null, + "email_disabled": false, + "reset_password": false, + "reset_token": null + } +] diff --git a/notebooks/scenarios/bigquery/upgradability/1-dump-database-to-file.ipynb b/notebooks/scenarios/bigquery/upgradability/1-dump-database-to-file.ipynb new file mode 100644 index 00000000000..4ed6ee40e26 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/1-dump-database-to-file.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "# stdlib\n", + "from pathlib import Path\n", + "\n", + "# syft absolute\n", + "import syft as sy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "ADMIN_EMAIL, ADMIN_PW = \"admin2@bigquery.org\", \"bqpw2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")\n", + "client = sy.login(url=\"http://localhost:8080\", email=ADMIN_EMAIL, password=ADMIN_PW)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "# client = sy.login(email=\"info@openmined.org\", password=\"changethis\", port=8080)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "# Check if this server has data on it\n", + "# assert len(client.users.get_all()) == 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "migration_data = client.get_migration_data(include_blobs=True)\n", + "migration_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "assert migration_data.includes_blobs\n", + "assert migration_data.num_action_objects > 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# migration_data_dir = Path(os.getenv(\"MIGRATION_DATA_DIR\", \".\"))\n", + "migration_data_dir = Path(\".\")\n", + "migration_data_dir.mkdir(exist_ok=True)\n", + "\n", + "blob_path = migration_data_dir / \"migration.blob\"\n", + "yaml_path = migration_data_dir / \"migration.yaml\"\n", + "\n", + "blob_path.unlink(missing_ok=True)\n", + "yaml_path.unlink(missing_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "migration_data.save(blob_path, yaml_path=yaml_path)\n", + "\n", + "assert blob_path.exists()\n", + "assert yaml_path.exists()\n", + "\n", + "print(f\"Saved migration data to {str(blob_path.resolve())}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "if server.server_type.value == \"python\":\n", + " server.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/scenarios/bigquery/upgradability/2-migrate-for-scenarios.ipynb b/notebooks/scenarios/bigquery/upgradability/2-migrate-for-scenarios.ipynb new file mode 100644 index 00000000000..bd4f5cae2f5 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/2-migrate-for-scenarios.ipynb @@ -0,0 +1,508 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# stdlib\n", + "import os\n", + "from os import environ as env\n", + "from pathlib import Path\n", + "import sys\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft.service.action.action_object import AnyActionObject\n", + "from syft.service.user.user_roles import ServiceRole\n", + "from syft.util.test_helpers.email_helpers import load_users\n", + "from syft.util.test_helpers.job_helpers import create_simple_query_job\n", + "from syft.util.test_helpers.job_helpers import create_wrong_syntax_query\n", + "from syft.util.test_helpers.job_helpers import load_jobs\n", + "from syft.util.util import find_base_dir_with_tox_ini\n", + "from syft.util.util import get_caller_file_path\n", + "from syft.util.util import is_interpreter_jupyter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(base_dir, \"test_helpers\")\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# when in k8s these are the default values\n", + "ROOT_EMAIL = \"admin@bigquery.org\"\n", + "ROOT_PASSWORD = \"bqpw\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# in case we are not in k8s we set them here for orchestra to use\n", + "env[\"DEFAULT_ROOT_EMAIL\"] = ROOT_EMAIL\n", + "env[\"DEFAULT_ROOT_PASSWORD\"] = ROOT_PASSWORD" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high-migrations\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " reset=True,\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")\n", + "\n", + "client = sy.login(url=\"http://localhost:8080\", email=ROOT_EMAIL, password=ROOT_PASSWORD)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if this is a new server\n", + "migration_data = client.get_migration_data()\n", + "\n", + "# assert len(migration_data.store_objects[User]) == 1\n", + "# assert UserCode not in migration_data.store_objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "migration_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load migration data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "migration_data_dir = Path(os.getenv(\"MIGRATION_DATA_DIR\", \".\"))\n", + "blob_path = migration_data_dir / \"migration.blob\"\n", + "yaml_path = migration_data_dir / \"migration.yaml\"\n", + "\n", + "print(f\"Loading migration data from {str(blob_path.resolve())}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res = client.load_migration_data(blob_path)\n", + "assert isinstance(res, sy.SyftSuccess), res.message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sy.upgrade_custom_workerpools(client, blob_path, mode=\"auto\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Post migration tests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "users = load_users(client, path=\"0.9.1_notebooks/users.json\")\n", + "jobs = load_jobs(users, client, filepath=\"0.9.1_notebooks/jobs.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_users = client.users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_users[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_user_names = [\n", + " user.name for user in server_users if user.role == ServiceRole.DATA_SCIENTIST\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "user_names = [user.name for user in users]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert server_user_names == user_names" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Old jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# submitted_jobs = [job for job in jobs if job.is_submitted]\n", + "reviewed_jobs = [job for job in jobs if job.admin_reviewed]\n", + "reviewed_jobs_should_succeed = [j for j in reviewed_jobs if j.should_succeed]\n", + "reviewed_jobs_should_fail = [j for j in reviewed_jobs if not j.should_succeed]\n", + "\n", + "print(\n", + " f\"{len(reviewed_jobs)=}, {len(reviewed_jobs_should_succeed)=}, {len(reviewed_jobs_should_fail)=}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in reviewed_jobs_should_succeed:\n", + " print(f\"> Checking job: {job.job_type} {job.func_name} for user {job.user_email}\")\n", + " api_method = job.code_method\n", + " j = api_method(blocking=False)\n", + " res = j.wait()\n", + "\n", + " if isinstance(res, sy.SyftError):\n", + " raise sy.SyftException(public_message=res.message)\n", + "\n", + " result = res.get()\n", + " job.result_as_expected = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in reviewed_jobs_should_fail:\n", + " print(f\"> Checking job: {job.job_type} {job.func_name} for user {job.user_email}\")\n", + " api_method = job.code_method\n", + "\n", + " j = api_method(blocking=False)\n", + " res = j.wait()\n", + " if isinstance(res, sy.SyftError):\n", + " job.result_as_expected = True\n", + " else:\n", + " raise sy.SyftException(public_message=f\"failed, job didnt raise {type(j)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expected_jobs = [job for job in jobs if job.result_as_expected]\n", + "print(f\"got expected_jobs: {len(expected_jobs)} == reviewed_jobs: {len(reviewed_jobs)}\")\n", + "assert len(reviewed_jobs) == len(expected_jobs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use old DS to go through the flow again" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_client = users[0].client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(ds_client.api.services.api.api_endpoints()) == 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job = create_simple_query_job(users[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = ds_client.api.services.bigquery.submit_query(\n", + " func_name=job.func_name, query=job.query\n", + ")\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert isinstance(response, AnyActionObject)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for request in client.requests:\n", + " if request.code.service_func_name == job.func_name:\n", + " request.approve()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job_res = getattr(ds_client.code, job.func_name)(blocking=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job_res.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# third party\n", + "from pandas import DataFrame\n", + "\n", + "assert isinstance(job_res.result.get(), DataFrame)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wrong_syntax_job = create_wrong_syntax_query(users[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = ds_client.api.services.bigquery.submit_query(\n", + " func_name=wrong_syntax_job.func_name, query=wrong_syntax_job.query\n", + ")\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert isinstance(response, AnyActionObject)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for request in client.requests:\n", + " if request.code.service_func_name == wrong_syntax_job.func_name:\n", + " request.approve()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job_res = getattr(ds_client.code, wrong_syntax_job.func_name)(blocking=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert isinstance(job_res.wait(), sy.SyftError)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if server.server_type.value == \"python\":\n", + " server.land()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/scenarios/bigquery/upgradability/migration.blob b/notebooks/scenarios/bigquery/upgradability/migration.blob new file mode 100644 index 00000000000..3e0d4bfe6b0 Binary files /dev/null and b/notebooks/scenarios/bigquery/upgradability/migration.blob differ diff --git a/notebooks/scenarios/bigquery/upgradability/migration.yaml b/notebooks/scenarios/bigquery/upgradability/migration.yaml new file mode 100644 index 00000000000..179e57a3135 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/migration.yaml @@ -0,0 +1,6 @@ +server: + env: + - name: SERVER_UID + value: c1bdbee427ff4068ad8a9ebc151c75a2 + - name: SERVER_PRIVATE_KEY + value: 3d9252d25b8c78ddd64915e580016b0a88c5d6372fa138b88a5aaf61bfbb806e diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/01-setup-high-low-datasites.ipynb b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/01-setup-high-low-datasites.ipynb new file mode 100644 index 00000000000..65a947371db --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/01-setup-high-low-datasites.ipynb @@ -0,0 +1,241 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# syft absolute\n", + "import syft as sy\n", + "from syft import test_settings\n", + "\n", + "print(f\"syft version: {sy.__version__}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_low = sy.orchestra.launch(\n", + " name=\"bigquery-low\",\n", + " server_side_type=\"low\",\n", + " dev_mode=True,\n", + " reset=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")\n", + "\n", + "server_high = sy.orchestra.launch(\n", + " name=\"bigquery-high\",\n", + " server_side_type=\"high\",\n", + " dev_mode=True,\n", + " reset=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "low_client = server_low.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client = server_high.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "low_client.worker_pools.get_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.worker_pools.get_all()) == 1\n", + "assert len(low_client.worker_pools.get_all()) == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def launch_worker_pool(client, pool_name):\n", + " if pool_name not in [x.name for x in client.worker_pools]:\n", + " external_registry = test_settings.get(\"external_registry\", default=\"docker.io\")\n", + " worker_docker_tag = f\"openmined/bigquery:{sy.__version__}\"\n", + " result = client.api.services.worker_image.submit(\n", + " worker_config=sy.PrebuiltWorkerConfig(\n", + " tag=f\"{external_registry}/{worker_docker_tag}\"\n", + " )\n", + " )\n", + " worker_image = client.images.get_all()[1]\n", + " result = client.api.services.image_registry.add(external_registry)\n", + " result = client.api.services.worker_pool.launch(\n", + " pool_name=pool_name,\n", + " image_uid=worker_image.id,\n", + " num_workers=1,\n", + " )\n", + " return result\n", + " else:\n", + " print(\"Pool already exists\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pool_name = \"bigquery-pool\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "launch_worker_pool(high_client, pool_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "launch_worker_pool(low_client, pool_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# result = high_client.worker_pools.scale(number=5, pool_name=pool_name)\n", + "# result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.worker_pools.get_all()) == 2\n", + "assert len(low_client.worker_pools.get_all()) == 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_worker_image = high_client.images.get_all()[0]\n", + "base_worker_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "low_client.register(\n", + " email=\"data_scientist@openmined.org\",\n", + " password=\"verysecurepassword\",\n", + " password_verify=\"verysecurepassword\",\n", + " name=\"John Doe\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.settings.allow_guest_signup(enable=False)\n", + "low_client.settings.allow_guest_signup(enable=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert (\n", + " len(low_client.api.services.user.get_all()) == 2\n", + "), \"Only DS and Admin should be at low side\"\n", + "assert (\n", + " len(high_client.api.services.user.get_all()) == 1\n", + "), \"Only Admin should be at high side\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_high.land()\n", + "server_low.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/02-configure-api-and-sync.ipynb b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/02-configure-api-and-sync.ipynb new file mode 100644 index 00000000000..ad7bf7a823a --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/02-configure-api-and-sync.ipynb @@ -0,0 +1,611 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install db-dtypes google-cloud-bigquery" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "# stdlib\n", + "\n", + "# third party\n", + "# set to use the live APIs\n", + "# import os\n", + "# os.environ[\"TEST_BIGQUERY_APIS_LIVE\"] = \"True\"\n", + "from apis import make_schema\n", + "from apis import make_submit_query\n", + "from apis import make_test_query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "# syft absolute\n", + "import syft as sy\n", + "from syft import test_settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "server_low = sy.orchestra.launch(\n", + " name=\"bigquery-low\",\n", + " server_side_type=\"low\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")\n", + "\n", + "server_high = sy.orchestra.launch(\n", + " name=\"bigquery-high\",\n", + " server_side_type=\"high\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "low_client = server_low.login(email=\"info@openmined.org\", password=\"changethis\")\n", + "high_client = server_high.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "assert len(high_client.worker_pools.get_all()) == 2\n", + "assert len(low_client.worker_pools.get_all()) == 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "this_worker_pool_name = \"bigquery-pool\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "# !pip list | grep bigquery" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "# !pip install db-dtypes google-cloud-bigquery" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Twin endpoints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "mock_func = make_test_query(\n", + " settings={\n", + " \"rate_limiter_enabled\": True,\n", + " \"calls_per_min\": 10,\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "private_func = make_test_query(\n", + " settings={\n", + " \"rate_limiter_enabled\": False,\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "new_endpoint = sy.TwinAPIEndpoint(\n", + " path=\"bigquery.test_query\",\n", + " description=\"This endpoint allows to query Bigquery storage via SQL queries.\",\n", + " private_function=private_func,\n", + " mock_function=mock_func,\n", + " worker_pool=this_worker_pool_name,\n", + ")\n", + "\n", + "high_client.custom_api.add(endpoint=new_endpoint)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "# Here, we update the endpoint to timeout after 100s (rather the default of 60s)\n", + "high_client.api.services.api.update(\n", + " endpoint_path=\"bigquery.test_query\", endpoint_timeout=120\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "high_client.api.services.api.update(\n", + " endpoint_path=\"bigquery.test_query\", hide_mock_definition=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "schema_function = make_schema(\n", + " settings={\n", + " \"calls_per_min\": 5,\n", + " },\n", + " worker_pool=this_worker_pool_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "high_client.custom_api.add(endpoint=schema_function)\n", + "high_client.refresh()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "metadata": {} + }, + "outputs": [], + "source": [ + "dataset_1 = test_settings.get(\"dataset_1\", default=\"dataset_1\")\n", + "dataset_2 = test_settings.get(\"dataset_2\", default=\"dataset_2\")\n", + "table_1 = test_settings.get(\"table_1\", default=\"table_1\")\n", + "table_2 = test_settings.get(\"table_2\", default=\"table_2\")\n", + "table_2_col_id = test_settings.get(\"table_2_col_id\", default=\"table_id\")\n", + "table_2_col_score = test_settings.get(\"table_2_col_score\", default=\"colname\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test mock version\n", + "result = high_client.api.services.bigquery.test_query.mock(\n", + " sql_query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 10\"\n", + ")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.api.services.bigquery.schema()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "submit_query_function = make_submit_query(\n", + " settings={}, worker_pool=this_worker_pool_name\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.custom_api.add(endpoint=submit_query_function)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.api.services.api.update(\n", + " endpoint_path=\"bigquery.submit_query\", hide_mock_definition=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "high_client.custom_api.api_endpoints()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.custom_api.api_endpoints()) == 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert (\n", + " high_client.api.services.bigquery.test_query\n", + " and high_client.api.services.bigquery.submit_query\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test mock version\n", + "result = high_client.api.services.bigquery.test_query.mock(\n", + " sql_query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 10\"\n", + ")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bug with the new Error PR: message printed multiple times. TODO clean up the duplicate exception messages.\n", + "\n", + "# Test mock version for wrong queries\n", + "with sy.raises(\n", + " sy.SyftException(public_message=\"*must be qualified with a dataset*\"), show=True\n", + "):\n", + " high_client.api.services.bigquery.test_query.mock(\n", + " sql_query=\"SELECT * FROM invalid_table LIMIT 1\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test private version\n", + "result = high_client.api.services.bigquery.test_query.private(\n", + " sql_query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 10\"\n", + ")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Testing submit query\n", + "result = high_client.api.services.bigquery.submit_query(\n", + " func_name=\"my_func\",\n", + " query=f\"SELECT * FROM {dataset_1}.{table_1} LIMIT 1\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert \"Query submitted\" in result\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job = high_client.code.my_func(blocking=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job.result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# syft absolute\n", + "from syft.client.syncing import compare_clients\n", + "from syft.service.job.job_stash import Job\n", + "from syft.service.job.job_stash import JobStatus" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def is_job_to_sync(batch):\n", + " if batch.status != \"NEW\":\n", + " return False\n", + " if not isinstance(batch.root.high_obj, Job):\n", + " return False\n", + " job = batch.root.high_obj\n", + " return job.status in (JobStatus.ERRORED, JobStatus.COMPLETED)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def sync_new_objects(\n", + " from_client, to_client, dry_run: bool = True, private_data: bool = False\n", + "):\n", + " sim = \"Simulating \" if dry_run else \"\"\n", + " priv = \"WITH PRIVATE DATA\" if private_data else \"\"\n", + " print(f\"{sim}Syncing from {from_client.name} to {to_client.name} {priv}\")\n", + " changes = []\n", + " diff = compare_clients(\n", + " from_client=from_client, to_client=to_client, hide_usercode=False\n", + " )\n", + " if isinstance(diff, sy.SyftError):\n", + " return diff\n", + "\n", + " for batch in diff.batches:\n", + " try:\n", + " if is_job_to_sync(batch) or batch.status == \"NEW\":\n", + " w = batch.resolve(build_state=False)\n", + " if private_data:\n", + " w.click_share_all_private_data()\n", + " if not dry_run:\n", + " w.click_sync()\n", + " change_text = f\"Synced {batch.status} {batch.root_type.__name__}\"\n", + " if not dry_run:\n", + " changes.append(change_text)\n", + " else:\n", + " print(f\"Would have run: {change_text}\")\n", + " except Exception as e:\n", + " print(\"sync_new_objects\", e)\n", + " raise e\n", + " return changes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = sync_new_objects(high_client, low_client)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = sync_new_objects(high_client, low_client, dry_run=False)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert [\n", + " \"Synced NEW TwinAPIEndpoint\",\n", + " \"Synced NEW TwinAPIEndpoint\",\n", + " \"Synced NEW TwinAPIEndpoint\",\n", + "] == result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# widget = sy.sync(from_client=high_client, to_client=low_client, hide_usercode=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # TODO: ignore private function from high side in diff\n", + "# widget" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# widget.click_sync(0)\n", + "# widget.click_sync(1)\n", + "# widget.click_sync(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Some internal helper methods\n", + "\n", + "# widget._share_all()\n", + "# widget._sync_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_high.land()\n", + "server_low.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/03-ds-submit-request.ipynb b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/03-ds-submit-request.ipynb new file mode 100644 index 00000000000..8d6c0665dc7 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/03-ds-submit-request.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# syft absolute\n", + "import syft as sy\n", + "from syft import test_settings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Only low side server and login as DS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_low = sy.orchestra.launch(\n", + " name=\"bigquery-low\",\n", + " server_side_type=\"low\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_client = server_low.login(\n", + " email=\"data_scientist@openmined.org\", password=\"verysecurepassword\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Low side research" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(ds_client.custom_api.api_endpoints()) == 3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_1 = test_settings.get(\"dataset_1\", default=\"dataset_1\")\n", + "dataset_2 = test_settings.get(\"dataset_2\", default=\"dataset_2\")\n", + "table_1 = test_settings.get(\"table_1\", default=\"table_1\")\n", + "table_2 = test_settings.get(\"table_2\", default=\"table_2\")\n", + "table_2_col_id = test_settings.get(\"table_2_col_id\", default=\"table_id\")\n", + "table_2_col_score = test_settings.get(\"table_2_col_score\", default=\"colname\")\n", + "query_limit_size = test_settings.get(\"query_limit_size\", default=10000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = ds_client.api.services.bigquery.test_query.mock(\n", + " sql_query=f\"SELECT * from {dataset_2}.{table_2} limit 10\"\n", + ")\n", + "assert len(result) == 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with sy.raises(sy.SyftException, show=True):\n", + " ds_client.api.services.bigquery.test_query.private(\n", + " sql_query=f\"SELECT * from {dataset_2}.{table_2} limit 10\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res = ds_client.api.services.bigquery.schema()\n", + "# third party\n", + "import pandas as pd\n", + "\n", + "assert isinstance(res.get(), pd.DataFrame)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FUNC_NAME = \"large_sample\"\n", + "LARGE_SAMPLE_QUERY = f\"SELECT * FROM {dataset_2}.{table_2} LIMIT {query_limit_size}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mock_res = ds_client.api.services.bigquery.test_query(sql_query=LARGE_SAMPLE_QUERY)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "submission = ds_client.api.services.bigquery.submit_query(\n", + " func_name=FUNC_NAME, query=LARGE_SAMPLE_QUERY\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_code_path(response):\n", + " # stdlib\n", + " import re\n", + "\n", + " pattern = r\"client\\.code\\.(\\w+)\\(\\)\"\n", + " match = re.search(pattern, str(response))\n", + " if match:\n", + " extracted_code = match.group(1)\n", + " return extracted_code\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# why are we randomizing things here?\n", + "func_name = extract_code_path(submission)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "api_method = getattr(ds_client.code, func_name, None)\n", + "api_method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# todo: this is very noisy, but it actually passes\n", + "with sy.raises(\n", + " sy.SyftException(\n", + " public_message=\"*Please wait for the admin to allow the execution of this code*\"\n", + " ),\n", + " show=True,\n", + "):\n", + " result = api_method(blocking=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert \"large_sample\" in func_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "api_method_2 = getattr(ds_client.code, func_name, None)\n", + "api_method_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with sy.raises(\n", + " sy.SyftException(public_message=\"*Your code is waiting for approval*\"), show=True\n", + "):\n", + " result = api_method_2()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_low.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/04-do-review-requests.ipynb b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/04-do-review-requests.ipynb new file mode 100644 index 00000000000..a4a632e2f13 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/04-do-review-requests.ipynb @@ -0,0 +1,422 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# syft absolute\n", + "import syft as sy\n", + "from syft.service.code.user_code import UserCode\n", + "from syft.service.request.request import Request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_low = sy.orchestra.launch(\n", + " name=\"bigquery-low\",\n", + " server_side_type=\"low\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")\n", + "\n", + "server_high = sy.orchestra.launch(\n", + " name=\"bigquery-high\",\n", + " server_side_type=\"high\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "low_client = server_low.login(email=\"info@openmined.org\", password=\"changethis\")\n", + "high_client = server_high.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # todo: this is way too noisy\n", + "# widget = sy.sync(from_client=low_client, to_client=high_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# widget" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# sync the users new request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# syft absolute\n", + "from syft.client.syncing import compare_clients" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# syft absolute\n", + "from syft.service.job.job_stash import Job\n", + "from syft.service.job.job_stash import JobStatus" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def is_job_to_sync(batch):\n", + " if batch.status != \"NEW\":\n", + " return False\n", + " if not isinstance(batch.root.high_obj, Job):\n", + " return False\n", + " job = batch.root.high_obj\n", + " return job.status in (JobStatus.ERRORED, JobStatus.COMPLETED)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def sync_new_objects(\n", + " from_client, to_client, dry_run: bool = True, private_data: bool = False\n", + "):\n", + " sim = \"Simulating \" if dry_run else \"\"\n", + " priv = \"WITH PRIVATE DATA\" if private_data else \"\"\n", + " print(f\"{sim}Syncing from {from_client.name} to {to_client.name} {priv}\")\n", + " changes = []\n", + " diff = compare_clients(\n", + " from_client=from_client, to_client=to_client, hide_usercode=False\n", + " )\n", + " if isinstance(diff, sy.SyftError):\n", + " return diff\n", + "\n", + " for batch in diff.batches:\n", + " try:\n", + " if is_job_to_sync(batch) or batch.status == \"NEW\":\n", + " w = batch.resolve(build_state=False)\n", + " if private_data:\n", + " w.click_share_all_private_data()\n", + " if not dry_run:\n", + " w.click_sync()\n", + " change_text = f\"Synced {batch.status} {batch.root_type.__name__}\"\n", + " if not dry_run:\n", + " changes.append(change_text)\n", + " else:\n", + " print(f\"Would have run: {change_text}\")\n", + " except Exception as e:\n", + " print(\"sync_new_objects\", e)\n", + " raise e\n", + " return changes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sync_new_objects(low_client, high_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = sync_new_objects(low_client, high_client, dry_run=False, private_data=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert \"Synced NEW UserCode\" in result\n", + "assert \"Synced NEW Request\" in result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(high_client.code.get_all()) == 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "requests = high_client.requests\n", + "requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "user_request = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for request in requests:\n", + " if \"large_sample\" in getattr(\n", + " getattr(request, \"code\", None), \"service_func_name\", None\n", + " ):\n", + " user_request = request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert user_request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def execute_request(client, request) -> dict:\n", + " if not isinstance(request, Request):\n", + " return \"This is not a request\"\n", + "\n", + " code = request.code\n", + " if not isinstance(code, UserCode):\n", + " return \"No usercode found\"\n", + "\n", + " func_name = request.code.service_func_name\n", + " api_func = getattr(client.code, func_name, None)\n", + " if api_func is None:\n", + " return \"Code name was not found on the client.\"\n", + "\n", + " job = api_func(blocking=False)\n", + " return job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job = execute_request(high_client, user_request)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # todo: this is way too noisy\n", + "# widget = sy.sync(from_client=high_client, to_client=low_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# widget" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sync_new_objects(high_client, low_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = sync_new_objects(high_client, low_client, dry_run=False, private_data=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert \"Synced NEW Job\" in result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "requests = low_client.requests\n", + "requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "user_request = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for request in requests:\n", + " if \"large_sample\" in getattr(\n", + " getattr(request, \"code\", None), \"service_func_name\", None\n", + " ):\n", + " user_request = request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "user_request.status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert \"approved\" in str(user_request.status).lower()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_high.land()\n", + "server_low.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/05-ds-get-results.ipynb b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/05-ds-get-results.ipynb new file mode 100644 index 00000000000..6981f1e7105 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/05-ds-get-results.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# third party\n", + "import pandas as pd\n", + "\n", + "# syft absolute\n", + "import syft as sy\n", + "from syft import test_settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_low = sy.orchestra.launch(\n", + " name=\"bigquery-low\",\n", + " server_side_type=\"low\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_client = server_low.login(\n", + " email=\"data_scientist@openmined.org\", password=\"verysecurepassword\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "api_method = None\n", + "for code in ds_client.code:\n", + " if \"large_sample\" in code.service_func_name:\n", + " api_method = getattr(ds_client.code, code.service_func_name, None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "job = api_method(blocking=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res = job.wait().get()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert isinstance(res, pd.DataFrame)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query_limit_size = test_settings.get(\"query_limit_size\", default=10000)\n", + "assert len(res) == query_limit_size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server_low.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/__init__.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/__init__.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/__init__.py new file mode 100644 index 00000000000..7231b580696 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/__init__.py @@ -0,0 +1,23 @@ +# stdlib +import os + +# syft absolute +from syft.util.util import str_to_bool + +# relative +from .submit_query import make_submit_query + +env_var = "TEST_BIGQUERY_APIS_LIVE" +use_live = str_to_bool(str(os.environ.get(env_var, "False"))) +env_name = "Live" if use_live else "Mock" +print(f"Using {env_name} API Code, this will query BigQuery. ${env_var}=={use_live}") + + +if use_live: + # relative + from .live.schema import make_schema + from .live.test_query import make_test_query +else: + # relative + from .mock.schema import make_schema + from .mock.test_query import make_test_query diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/__init__.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/schema.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/schema.py new file mode 100644 index 00000000000..5b39d9d9066 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/schema.py @@ -0,0 +1,108 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy +from syft import test_settings + +# relative +from ..rate_limiter import is_within_rate_limit + + +def make_schema(settings: dict, worker_pool: str) -> Callable: + updated_settings = { + "calls_per_min": 5, + "rate_limiter_enabled": True, + "credentials": test_settings.gce_service_account.to_dict(), + "region": test_settings.gce_region, + "project_id": test_settings.gce_project_id, + "dataset_1": test_settings.dataset_1, + "table_1": test_settings.table_1, + "table_2": test_settings.table_2, + } | settings + + @sy.api_endpoint( + path="bigquery.schema", + description="This endpoint allows for visualising the metadata of tables available in BigQuery.", + settings=updated_settings, + helper_functions=[ + is_within_rate_limit + ], # Adds ratelimit as this is also a method available to data scientists + worker_pool=worker_pool, + ) + def live_schema( + context, + ) -> str: + # stdlib + import datetime + + # third party + from google.cloud import bigquery # noqa: F811 + from google.oauth2 import service_account + import pandas as pd + + # syft absolute + from syft import SyftException + + # Auth for Bigquer based on the workload identity + credentials = service_account.Credentials.from_service_account_info( + context.settings["credentials"] + ) + scoped_credentials = credentials.with_scopes( + ["https://www.googleapis.com/auth/cloud-platform"] + ) + + client = bigquery.Client( + credentials=scoped_credentials, + location=context.settings["region"], + ) + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + try: + # Formats the data schema in a data frame format + # Warning: the only supported format types are primitives, np.ndarrays and pd.DataFrames + + data_schema = [] + for table_id in [ + f"{context.settings['dataset_1']}.{context.settings['table_1']}", + f"{context.settings['dataset_1']}.{context.settings['table_2']}", + ]: + table = client.get_table(table_id) + for schema in table.schema: + data_schema.append( + { + "project": str(table.project), + "dataset_id": str(table.dataset_id), + "table_id": str(table.table_id), + "schema_name": str(schema.name), + "schema_field": str(schema.field_type), + "description": str(table.description), + "num_rows": str(table.num_rows), + } + ) + return pd.DataFrame(data_schema) + + except Exception as e: + # not a bigquery exception + if not hasattr(e, "_errors"): + output = f"got exception e: {type(e)} {str(e)}" + raise SyftException( + public_message=f"An error occured executing the API call {output}" + ) + + # Should add appropriate error handling for what should be exposed to the data scientists. + raise SyftException( + public_message="An error occured executing the API call, please contact the domain owner." + ) + + return live_schema diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/test_query.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/test_query.py new file mode 100644 index 00000000000..344879dcb62 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/live/test_query.py @@ -0,0 +1,113 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy +from syft import test_settings + +# relative +from ..rate_limiter import is_within_rate_limit + + +def make_test_query(settings) -> Callable: + updated_settings = { + "calls_per_min": 10, + "rate_limiter_enabled": True, + "credentials": test_settings.gce_service_account.to_dict(), + "region": test_settings.gce_region, + "project_id": test_settings.gce_project_id, + } | settings + + # these are the same if you allow the rate limiter to be turned on and off + @sy.api_endpoint_method( + settings=updated_settings, + helper_functions=[is_within_rate_limit], + ) + def live_test_query( + context, + sql_query: str, + ) -> str: + # stdlib + import datetime + + # third party + from google.cloud import bigquery # noqa: F811 + from google.oauth2 import service_account + + # syft absolute + from syft import SyftException + + # Auth for Bigquer based on the workload identity + credentials = service_account.Credentials.from_service_account_info( + context.settings["credentials"] + ) + scoped_credentials = credentials.with_scopes( + ["https://www.googleapis.com/auth/cloud-platform"] + ) + + client = bigquery.Client( + credentials=scoped_credentials, + location=context.settings["region"], + ) + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + try: + rows = client.query_and_wait( + sql_query, + project=context.settings["project_id"], + ) + + if rows.total_rows > 1_000_000: + raise SyftException( + public_message="Please only write queries that gather aggregate statistics" + ) + + return rows.to_dataframe() + + except Exception as e: + # not a bigquery exception + if not hasattr(e, "_errors"): + output = f"got exception e: {type(e)} {str(e)}" + raise SyftException( + public_message=f"An error occured executing the API call {output}" + ) + + # Treat all errors that we would like to be forwarded to the data scientists + # By default, any exception is only visible to the data owner. + + if e._errors[0]["reason"] in [ + "badRequest", + "blocked", + "duplicate", + "invalidQuery", + "invalid", + "jobBackendError", + "jobInternalError", + "notFound", + "notImplemented", + "rateLimitExceeded", + "resourceInUse", + "resourcesExceeded", + "tableUnavailable", + "timeout", + ]: + raise SyftException( + public_message="Error occured during the call: " + + e._errors[0]["message"] + ) + else: + raise SyftException( + public_message="An error occured executing the API call, please contact the domain owner." + ) + + return live_test_query diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/__init__.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/data.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/data.py new file mode 100644 index 00000000000..82262bf7a01 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/data.py @@ -0,0 +1,268 @@ +# stdlib +from math import nan + +schema_dict = { + "project": { + 0: "example-project", + 1: "example-project", + 2: "example-project", + 3: "example-project", + 4: "example-project", + 5: "example-project", + 6: "example-project", + 7: "example-project", + 8: "example-project", + 9: "example-project", + 10: "example-project", + 11: "example-project", + 12: "example-project", + 13: "example-project", + 14: "example-project", + 15: "example-project", + 16: "example-project", + 17: "example-project", + 18: "example-project", + 19: "example-project", + 20: "example-project", + 21: "example-project", + 22: "example-project", + }, + "dataset_id": { + 0: "test_1gb", + 1: "test_1gb", + 2: "test_1gb", + 3: "test_1gb", + 4: "test_1gb", + 5: "test_1gb", + 6: "test_1gb", + 7: "test_1gb", + 8: "test_1gb", + 9: "test_1gb", + 10: "test_1gb", + 11: "test_1gb", + 12: "test_1gb", + 13: "test_1gb", + 14: "test_1gb", + 15: "test_1gb", + 16: "test_1gb", + 17: "test_1gb", + 18: "test_1gb", + 19: "test_1gb", + 20: "test_1gb", + 21: "test_1gb", + 22: "test_1gb", + }, + "table_id": { + 0: "posts", + 1: "posts", + 2: "posts", + 3: "posts", + 4: "posts", + 5: "posts", + 6: "posts", + 7: "comments", + 8: "comments", + 9: "comments", + 10: "comments", + 11: "comments", + 12: "comments", + 13: "comments", + 14: "comments", + 15: "comments", + 16: "comments", + 17: "comments", + 18: "comments", + 19: "comments", + 20: "comments", + 21: "comments", + 22: "comments", + }, + "schema_name": { + 0: "int64_field_0", + 1: "id", + 2: "name", + 3: "subscribers_count", + 4: "permalink", + 5: "nsfw", + 6: "spam", + 7: "int64_field_0", + 8: "id", + 9: "body", + 10: "parent_id", + 11: "created_at", + 12: "last_modified_at", + 13: "gilded", + 14: "permalink", + 15: "score", + 16: "comment_id", + 17: "post_id", + 18: "author_id", + 19: "spam", + 20: "deleted", + 21: "upvote_raio", + 22: "collapsed_in_crowd_control", + }, + "schema_field": { + 0: "INTEGER", + 1: "STRING", + 2: "STRING", + 3: "INTEGER", + 4: "STRING", + 5: "FLOAT", + 6: "BOOLEAN", + 7: "INTEGER", + 8: "STRING", + 9: "STRING", + 10: "STRING", + 11: "INTEGER", + 12: "INTEGER", + 13: "BOOLEAN", + 14: "STRING", + 15: "INTEGER", + 16: "STRING", + 17: "STRING", + 18: "STRING", + 19: "BOOLEAN", + 20: "BOOLEAN", + 21: "FLOAT", + 22: "BOOLEAN", + }, + "description": { + 0: "None", + 1: "None", + 2: "None", + 3: "None", + 4: "None", + 5: "None", + 6: "None", + 7: "None", + 8: "None", + 9: "None", + 10: "None", + 11: "None", + 12: "None", + 13: "None", + 14: "None", + 15: "None", + 16: "None", + 17: "None", + 18: "None", + 19: "None", + 20: "None", + 21: "None", + 22: "None", + }, + "num_rows": { + 0: "2000000", + 1: "2000000", + 2: "2000000", + 3: "2000000", + 4: "2000000", + 5: "2000000", + 6: "2000000", + 7: "2000000", + 8: "2000000", + 9: "2000000", + 10: "2000000", + 11: "2000000", + 12: "2000000", + 13: "2000000", + 14: "2000000", + 15: "2000000", + 16: "2000000", + 17: "2000000", + 18: "2000000", + 19: "2000000", + 20: "2000000", + 21: "2000000", + 22: "2000000", + }, +} + + +query_dict = { + "int64_field_0": { + 0: 4, + 1: 5, + 2: 10, + 3: 16, + 4: 17, + 5: 23, + 6: 24, + 7: 25, + 8: 27, + 9: 40, + }, + "id": { + 0: "t5_via1x", + 1: "t5_cv9gn", + 2: "t5_8p2tq", + 3: "t5_8fcro", + 4: "t5_td5of", + 5: "t5_z01fv", + 6: "t5_hmqjk", + 7: "t5_1flyj", + 8: "t5_5rwej", + 9: "t5_uurcv", + }, + "name": { + 0: "/channel/mylittlepony", + 1: "/channel/polyamory", + 2: "/channel/Catholicism", + 3: "/channel/cordcutters", + 4: "/channel/stevenuniverse", + 5: "/channel/entitledbitch", + 6: "/channel/engineering", + 7: "/channel/nottheonion", + 8: "/channel/FoodPorn", + 9: "/channel/puppysmiles", + }, + "subscribers_count": { + 0: 4323081, + 1: 2425929, + 2: 4062607, + 3: 7543226, + 4: 2692168, + 5: 2709080, + 6: 8766144, + 7: 2580984, + 8: 7784809, + 9: 3715991, + }, + "permalink": { + 0: "/channel//channel/mylittlepony", + 1: "/channel//channel/polyamory", + 2: "/channel//channel/Catholicism", + 3: "/channel//channel/cordcutters", + 4: "/channel//channel/stevenuniverse", + 5: "/channel//channel/entitledbitch", + 6: "/channel//channel/engineering", + 7: "/channel//channel/nottheonion", + 8: "/channel//channel/FoodPorn", + 9: "/channel//channel/puppysmiles", + }, + "nsfw": { + 0: nan, + 1: nan, + 2: nan, + 3: nan, + 4: nan, + 5: nan, + 6: nan, + 7: nan, + 8: nan, + 9: nan, + }, + "spam": { + 0: False, + 1: False, + 2: False, + 3: False, + 4: False, + 5: False, + 6: False, + 7: False, + 8: False, + 9: False, + }, +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/schema.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/schema.py new file mode 100644 index 00000000000..a95e04f2f1d --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/schema.py @@ -0,0 +1,52 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy + +# relative +from ..rate_limiter import is_within_rate_limit +from .data import schema_dict + + +def make_schema(settings, worker_pool) -> Callable: + updated_settings = { + "calls_per_min": 5, + "rate_limiter_enabled": True, + "schema_dict": schema_dict, + } | settings + + @sy.api_endpoint( + path="bigquery.schema", + description="This endpoint allows for visualising the metadata of tables available in BigQuery.", + settings=updated_settings, + helper_functions=[is_within_rate_limit], + worker_pool=worker_pool, + ) + def mock_schema( + context, + ) -> str: + # syft absolute + from syft import SyftException + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + # stdlib + import datetime + + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + # third party + import pandas as pd + + df = pd.DataFrame(context.settings["schema_dict"]) + return df + + return mock_schema diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/test_query.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/test_query.py new file mode 100644 index 00000000000..ae028a8cf36 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/mock/test_query.py @@ -0,0 +1,138 @@ +# stdlib +from collections.abc import Callable + +# syft absolute +import syft as sy + +# relative +from ..rate_limiter import is_within_rate_limit +from .data import query_dict + + +def extract_limit_value(sql_query: str) -> int: + # stdlib + import re + + limit_pattern = re.compile(r"\bLIMIT\s+(\d+)\b", re.IGNORECASE) + match = limit_pattern.search(sql_query) + if match: + return int(match.group(1)) + return None + + +def is_valid_sql(query: str) -> bool: + # stdlib + import sqlite3 + + # Prepare an in-memory SQLite database + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + + try: + # Use the EXPLAIN QUERY PLAN command to get the query plan + cursor.execute(f"EXPLAIN QUERY PLAN {query}") + except sqlite3.Error as e: + if "no such table" in str(e).lower(): + return True + return False + finally: + conn.close() + + +def adjust_dataframe_rows(df, target_rows: int): + # third party + import pandas as pd + + current_rows = len(df) + + if target_rows > current_rows: + # Repeat rows to match target_rows + repeat_times = (target_rows + current_rows - 1) // current_rows + df_expanded = pd.concat([df] * repeat_times, ignore_index=True).head( + target_rows + ) + else: + # Truncate rows to match target_rows + df_expanded = df.head(target_rows) + + return df_expanded + + +def make_test_query(settings: dict) -> Callable: + updated_settings = { + "calls_per_min": 10, + "rate_limiter_enabled": True, + "query_dict": query_dict, + } | settings + + # these are the same if you allow the rate limiter to be turned on and off + @sy.api_endpoint_method( + settings=updated_settings, + helper_functions=[ + is_within_rate_limit, + extract_limit_value, + is_valid_sql, + adjust_dataframe_rows, + ], + ) + def mock_test_query( + context, + sql_query: str, + ) -> str: + # stdlib + import datetime + + # third party + from google.api_core.exceptions import BadRequest + + # syft absolute + from syft import SyftException + + # Store a dict with the calltimes for each user, via the email. + if context.settings["rate_limiter_enabled"]: + if context.user.email not in context.state.keys(): + context.state[context.user.email] = [] + + if not context.code.is_within_rate_limit(context): + raise SyftException( + public_message="Rate limit of calls per minute has been reached." + ) + context.state[context.user.email].append(datetime.datetime.now()) + + bad_table = "invalid_table" + bad_post = ( + "BadRequest: 400 POST " + "https://bigquery.googleapis.com/bigquery/v2/projects/project-id/" + "queries?prettyPrint=false: " + ) + if bad_table in sql_query: + try: + raise BadRequest( + f'{bad_post} Table "{bad_table}" must be qualified ' + "with a dataset (e.g. dataset.table)." + ) + except Exception as e: + raise SyftException( + public_message=f"*must be qualified with a dataset*. {e}" + ) + + if not context.code.is_valid_sql(sql_query): + raise BadRequest( + f'{bad_post} Syntax error: Unexpected identifier "{sql_query}" at [1:1]' + ) + + # third party + import pandas as pd + + limit = context.code.extract_limit_value(sql_query) + if limit > 1_000_000: + raise SyftException( + public_message="Please only write queries that gather aggregate statistics" + ) + + base_df = pd.DataFrame(context.settings["query_dict"]) + + df = context.code.adjust_dataframe_rows(base_df, limit) + return df + + return mock_test_query diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/rate_limiter.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/rate_limiter.py new file mode 100644 index 00000000000..8ce319b61f4 --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/rate_limiter.py @@ -0,0 +1,16 @@ +def is_within_rate_limit(context) -> bool: + """Rate limiter for custom API calls made by users.""" + # stdlib + import datetime + + state = context.state + settings = context.settings + email = context.user.email + + current_time = datetime.datetime.now() + calls_last_min = [ + 1 if (current_time - call_time).seconds < 60 else 0 + for call_time in state[email] + ] + + return sum(calls_last_min) < settings.get("calls_per_min", 5) diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/submit_query.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/submit_query.py new file mode 100644 index 00000000000..a0125ee009b --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/apis/submit_query.py @@ -0,0 +1,42 @@ +# syft absolute +import syft as sy + + +def make_submit_query(settings, worker_pool): + updated_settings = {"user_code_worker": worker_pool} | settings + + @sy.api_endpoint( + path="bigquery.submit_query", + description="API endpoint that allows you to submit SQL queries to run on the private data.", + worker_pool=worker_pool, + settings=updated_settings, + ) + def submit_query( + context, + func_name: str, + query: str, + ) -> str: + # syft absolute + import syft as sy + + @sy.syft_function( + name=func_name, + input_policy=sy.MixedInputPolicy( + endpoint=sy.Constant( + val=context.admin_client.api.services.bigquery.test_query + ), + query=sy.Constant(val=query), + client=context.admin_client, + ), + worker_pool_name=context.settings["user_code_worker"], + ) + def execute_query(query: str, endpoint): + res = endpoint(sql_query=query) + return res + + request = context.user_client.code.request_code_execution(execute_query) + context.admin_client.requests.set_tags(request, ["autosync"]) + + return f"Query submitted {request}. Use `client.code.{func_name}()` to run your query" + + return submit_query diff --git a/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/sync_helpers.py b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/sync_helpers.py new file mode 100644 index 00000000000..e1d558016ba --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/0.9.1_notebooks/sync_helpers.py @@ -0,0 +1,190 @@ +# third party +from tqdm import tqdm + +# syft absolute +import syft as sy +from syft.client.datasite_client import DatasiteClient +from syft.client.syncing import compare_clients +from syft.service.code.user_code import UserCode +from syft.service.job.job_stash import Job +from syft.service.job.job_stash import JobStatus +from syft.service.request.request import Request +from syft.service.request.request import RequestStatus +from syft.service.sync.diff_state import ObjectDiffBatch +from syft.types.result import Err + + +def deny_requests_without_autosync_tag(client_low: DatasiteClient): + # Deny all requests that are not autosync + requests = client_low.requests.get_all() + if isinstance(requests, sy.SyftError): + print(requests) + return + + denied_requests = [] + for request in tqdm(requests): + if request.status != RequestStatus.PENDING: + continue + if "autosync" not in request.tags: + request.deny( + reason="This request has been denied automatically. " + "Please use the designated API to submit your request." + ) + denied_requests.append(request.id) + print(f"Denied {len(denied_requests)} requests without autosync tag") + + +def is_request_to_sync(batch: ObjectDiffBatch) -> bool: + # True if this is a new low-side request + # TODO add condition for sql requests/usercodes + low_request = batch.root.low_obj + return ( + isinstance(low_request, Request) + and batch.status == "NEW" + and "autosync" in low_request.tags + ) + + +def is_job_to_sync(batch: ObjectDiffBatch): + # True if this is a new high-side job that is either COMPLETED or ERRORED + if batch.status != "NEW": + return False + if not isinstance(batch.root.high_obj, Job): + return False + job = batch.root.high_obj + return job.status in (JobStatus.ERRORED, JobStatus.COMPLETED) + + +def execute_requests( + client_high: DatasiteClient, request_ids: list[sy.UID] +) -> dict[sy.UID, Job]: + jobs_by_request_id = {} + for request_id in request_ids: + request = client_high.requests.get_by_uid(request_id) + if not isinstance(request, Request): + continue + + code = request.code + if not isinstance(code, UserCode): + continue + + func_name = request.code.service_func_name + api_func = getattr(client_high.code, func_name, None) + if api_func is None: + continue + + job = api_func(blocking=False) + jobs_by_request_id[request_id] = job + + return jobs_by_request_id + + +def deny_failed_jobs( + client_low: DatasiteClient, + jobs: list[Job], +) -> None: + # NOTE no syncing is needed, requests are denied on the low side + denied_requests = [] + + for job in jobs: + if job.status != JobStatus.ERRORED: + continue + + error_result = job.result + if isinstance(error_result, Err): + error_msg = error_result.err_value + else: + error_msg = "An unknown error occurred, please check the Job logs for more information." + + code_id = job.user_code_id + if code_id is None: + continue + requests = client_low.requests.get_by_usercode_id(code_id) + if isinstance(requests, list) and len(requests) > 0: + request = requests[0] + request.deny(reason=f"Execution failed: {error_msg}") + denied_requests.append(request.id) + else: + print(f"Failed to deny request for job {job.id}") + + print(f"Denied {len(denied_requests)} failed requests") + + +def sync_finished_jobs( + client_low: DatasiteClient, + client_high: DatasiteClient, +) -> dict[sy.UID, sy.SyftError | sy.SyftSuccess] | sy.SyftError: + sync_job_results = {} + synced_jobs = [] + diff = compare_clients( + from_client=client_high, to_client=client_low, include_types=["job"] + ) + if isinstance(diff, sy.SyftError): + print(diff) + return diff + + for batch in diff.batches: + if is_job_to_sync(batch): + job = batch.root.high_obj + + w = batch.resolve(build_state=False) + share_result = w.click_share_all_private_data() + if isinstance(share_result, sy.SyftError): + sync_job_results[job.id] = share_result + continue + sync_result = w.click_sync() + + synced_jobs.append(job) + sync_job_results[job.id] = sync_result + + print(f"Sharing {len(sync_job_results)} new results") + deny_failed_jobs(client_low, synced_jobs) + return sync_job_results + + +def sync_new_requests( + client_low: DatasiteClient, + client_high: DatasiteClient, +) -> dict[sy.UID, sy.SyftSuccess | sy.SyftError] | sy.SyftError: + sync_request_results = {} + diff = compare_clients( + from_client=client_low, to_client=client_high, include_types=["request"] + ) + if isinstance(diff, sy.SyftError): + print(diff) + return sync_request_results + print(f"{len(diff.batches)} request batches found") + for batch in tqdm(diff.batches): + if is_request_to_sync(batch): + request_id = batch.root.low_obj.id + w = batch.resolve(build_state=False) + result = w.click_sync() + sync_request_results[request_id] = result + return sync_request_results + + +def sync_and_execute_new_requests( + client_low: DatasiteClient, client_high: DatasiteClient +) -> None: + sync_results = sync_new_requests(client_low, client_high) + if isinstance(sync_results, sy.SyftError): + print(sync_results) + return + + request_ids = [ + uid for uid, res in sync_results.items() if isinstance(res, sy.SyftSuccess) + ] + print(f"Synced {len(request_ids)} new requests") + + jobs_by_request = execute_requests(client_high, request_ids) + print(f"Started {len(jobs_by_request)} new jobs") + + +def auto_sync(client_low: DatasiteClient, client_high: DatasiteClient) -> None: + print("Starting auto sync") + print("Denying non tagged jobs") + deny_requests_without_autosync_tag(client_low) + print("Syncing and executing") + sync_and_execute_new_requests(client_low, client_high) + sync_finished_jobs(client_low, client_high) + print("Finished auto sync") diff --git a/notebooks/scenarios/bigquery/upgradability/sync/1-dump-database-to-file.ipynb b/notebooks/scenarios/bigquery/upgradability/sync/1-dump-database-to-file.ipynb new file mode 100644 index 00000000000..09b02ee5ddd --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/1-dump-database-to-file.ipynb @@ -0,0 +1,201 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "# stdlib\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "# syft absolute\n", + "import syft as sy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "server_low = sy.orchestra.launch(\n", + " name=\"bigquery-low\",\n", + " server_side_type=\"low\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")\n", + "\n", + "server_high = sy.orchestra.launch(\n", + " name=\"bigquery-high\",\n", + " server_side_type=\"high\",\n", + " dev_mode=True,\n", + " n_consumers=1,\n", + " create_producer=True,\n", + " port=\"auto\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "low_client = server_low.login(email=\"info@openmined.org\", password=\"changethis\")\n", + "high_client = server_high.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "# Dump low side" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "migration_data_dir = Path(os.getenv(\"MIGRATION_DATA_DIR\", \".\"))\n", + "migration_data_dir.mkdir(exist_ok=True)\n", + "\n", + "low_blob_path = migration_data_dir / \"migration_low.blob\"\n", + "low_yaml_path = migration_data_dir / \"migration_low.yaml\"\n", + "\n", + "low_blob_path.unlink(missing_ok=True)\n", + "low_yaml_path.unlink(missing_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "# Dump low state\n", + "\n", + "low_migration_data = low_client.get_migration_data(include_blobs=True)\n", + "low_migration_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "low_migration_data.save(path=low_blob_path, yaml_path=low_yaml_path)" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "# Dump high side" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "high_blob_path = migration_data_dir / \"migration_high.blob\"\n", + "high_yaml_path = migration_data_dir / \"migration_high.yaml\"\n", + "\n", + "high_blob_path.unlink(missing_ok=True)\n", + "high_yaml_path.unlink(missing_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "high_migration_data = high_client.get_migration_data(include_blobs=True)\n", + "high_migration_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "high_migration_data.save(path=high_blob_path, yaml_path=high_yaml_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "for path in (low_blob_path, low_yaml_path, high_blob_path, high_yaml_path):\n", + " assert path.exists(), f\"Migration file {path} does not exist\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "if server_high.server_type.value == \"python\":\n", + " server_high.land()\n", + "\n", + "if server_low.server_type.value == \"python\":\n", + " server_low.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/2-migrate-for-scenarios.ipynb b/notebooks/scenarios/bigquery/upgradability/sync/2-migrate-for-scenarios.ipynb new file mode 100644 index 00000000000..326eb8c62cd --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/2-migrate-for-scenarios.ipynb @@ -0,0 +1,403 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# stdlib\n", + "import os\n", + "from os import environ as env\n", + "from pathlib import Path\n", + "\n", + "# syft absolute\n", + "import syft as sy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# stdlib\n", + "import sys\n", + "\n", + "# syft absolute\n", + "from syft.util.util import find_base_dir_with_tox_ini\n", + "from syft.util.util import get_caller_file_path\n", + "from syft.util.util import is_interpreter_jupyter\n", + "\n", + "\n", + "def add_helper_path_to_python_path() -> None:\n", + " current_path = \".\"\n", + "\n", + " # jupyter uses \".\" which resolves to the notebook\n", + " if not is_interpreter_jupyter():\n", + " # python uses the file which has from syft import test_settings in it\n", + " import_path = get_caller_file_path()\n", + " if import_path:\n", + " current_path = import_path\n", + "\n", + " base_dir = find_base_dir_with_tox_ini(current_path)\n", + " notebook_helper_path = os.path.join(base_dir, \"notebooks/notebook_helpers\")\n", + " sys.path.append(notebook_helper_path)\n", + "\n", + "\n", + "add_helper_path_to_python_path()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# third party\n", + "from email_helpers import load_users\n", + "from job_helpers import load_jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# when in k8s these are the default values\n", + "ROOT_EMAIL = \"admin@bigquery.org\"\n", + "ROOT_PASSWORD = \"bqpw\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# in case we are not in k8s we set them here for orchestra to use\n", + "env[\"DEFAULT_ROOT_EMAIL\"] = ROOT_EMAIL\n", + "env[\"DEFAULT_ROOT_PASSWORD\"] = ROOT_PASSWORD" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "server = sy.orchestra.launch(\n", + " name=\"bigquery-high\",\n", + " dev_mode=True,\n", + " server_side_type=\"high\",\n", + " reset=True,\n", + " port=\"8080\",\n", + " n_consumers=1, # How many workers to be spawned\n", + " create_producer=True, # Can produce more workers\n", + ")\n", + "\n", + "client = sy.login(url=\"http://localhost:8080\", email=ROOT_EMAIL, password=ROOT_PASSWORD)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check if this is a new server\n", + "migration_data = client.get_migration_data()\n", + "\n", + "# assert len(migration_data.store_objects[User]) == 1\n", + "# assert UserCode not in migration_data.store_objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "migration_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load migration data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# migration_data_dir = Path(os.getenv(\"MIGRATION_DATA_DIR\", \".\"))\n", + "migration_data_dir = Path(\"/home/teo/OpenMined/PySyft/.tox/.tmp/migration\")\n", + "blob_path = migration_data_dir / \"migration.blob\"\n", + "yaml_path = migration_data_dir / \"migration.yaml\"\n", + "\n", + "print(f\"Loading migration data from {str(blob_path.resolve())}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res = client.load_migration_data(blob_path)\n", + "assert isinstance(res, sy.SyftSuccess), res.message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sy.upgrade_custom_workerpools(client, blob_path, mode=\"auto\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Post migration tests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "TODO:\n", + " * verify users\n", + " * login\n", + " * check every role\n", + " * mostly check on lengths\n", + " * can a DS see the results of their old jobs/logs\n", + " * still use the api schema both mock \n", + " * still submit a new query via submit_query\n", + " * can admin still approve and approve_by_running, deny\n", + " * check on old broken queries\n", + " * create a new broken query\n", + " * can ds get the results of the new queries\n", + " * emails should work now\n", + " * test in k8s (both L2 and L0)\n", + " * test in L0 (migrate both nodes?)\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "users = load_users(client, path=\"0.9.1_notebooks/users.json\")\n", + "jobs = load_jobs(users, client, filepath=\"0.9.1_notebooks/jobs.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO fix users??" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client.users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "users" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Old jobs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# submitted_jobs = [job for job in jobs if job.is_submitted]\n", + "reviewed_jobs = [job for job in jobs if job.admin_reviewed]\n", + "reviewed_jobs_should_succeed = [j for j in reviewed_jobs if j.should_succeed]\n", + "reviewed_jobs_should_fail = [j for j in reviewed_jobs if not j.should_succeed]\n", + "\n", + "print(\n", + " f\"{len(reviewed_jobs)=}, {len(reviewed_jobs_should_succeed)=}, {len(reviewed_jobs_should_fail)=}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in reviewed_jobs_should_succeed:\n", + " print(f\"> Checking job: {job.job_type} {job.func_name} for user {job.user_email}\")\n", + " api_method = job.code_method\n", + " j = api_method(blocking=False)\n", + " res = j.wait()\n", + "\n", + " if isinstance(res, sy.SyftError):\n", + " raise sy.SyftException(public_message=\"Expected success, got error\")\n", + "\n", + " result = res.get()\n", + " job.result_as_expected = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for job in reviewed_jobs_should_fail:\n", + " print(f\"> Checking job: {job.job_type} {job.func_name} for user {job.user_email}\")\n", + " api_method = job.code_method\n", + "\n", + " j = api_method(blocking=False)\n", + " res = j.wait()\n", + " if isinstance(res, sy.SyftError):\n", + " job.result_as_expected = True\n", + " else:\n", + " raise sy.SyftException(public_message=f\"failed, job didnt raise {type(j)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expected_jobs = [job for job in jobs if job.result_as_expected]\n", + "print(f\"got expected_jobs: {len(expected_jobs)} == reviewed_jobs: {len(reviewed_jobs)}\")\n", + "assert len(reviewed_jobs) == len(expected_jobs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use old DS to go through the flow again" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_client = users[0].client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check on emails now?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/scenarios/bigquery/upgradability/sync/migration_high.blob b/notebooks/scenarios/bigquery/upgradability/sync/migration_high.blob new file mode 100644 index 00000000000..4143e59c7af Binary files /dev/null and b/notebooks/scenarios/bigquery/upgradability/sync/migration_high.blob differ diff --git a/notebooks/scenarios/bigquery/upgradability/sync/migration_high.yaml b/notebooks/scenarios/bigquery/upgradability/sync/migration_high.yaml new file mode 100644 index 00000000000..c3819a0c46a --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/migration_high.yaml @@ -0,0 +1,6 @@ +server: + env: + - name: SERVER_UID + value: fbdf5a287e58454cbbd3fac4ad744d37 + - name: SERVER_PRIVATE_KEY + value: fcfd09deed32e3574558b6719fed46e0b8fd957d59608e9d8b42ef07c6080d3e diff --git a/notebooks/scenarios/bigquery/upgradability/sync/migration_low.blob b/notebooks/scenarios/bigquery/upgradability/sync/migration_low.blob new file mode 100644 index 00000000000..6abeef2b057 Binary files /dev/null and b/notebooks/scenarios/bigquery/upgradability/sync/migration_low.blob differ diff --git a/notebooks/scenarios/bigquery/upgradability/sync/migration_low.yaml b/notebooks/scenarios/bigquery/upgradability/sync/migration_low.yaml new file mode 100644 index 00000000000..c950671118c --- /dev/null +++ b/notebooks/scenarios/bigquery/upgradability/sync/migration_low.yaml @@ -0,0 +1,6 @@ +server: + env: + - name: SERVER_UID + value: 4a471a09f56b4a1d809c0a7614074283 + - name: SERVER_PRIVATE_KEY + value: 3c095c07c94d7f7aec863d61641c71c467cee08cf9a44120a9cb7a493def22cc diff --git a/packages/syft/src/syft/service/worker/worker_image.py b/packages/syft/src/syft/service/worker/worker_image.py index 17a42ead8bc..10a581439fa 100644 --- a/packages/syft/src/syft/service/worker/worker_image.py +++ b/packages/syft/src/syft/service/worker/worker_image.py @@ -41,6 +41,10 @@ class SyftWorkerImageV1(SyftObject): image_hash: str | None = None built_at: DateTime | None = None + @property + def is_prebuilt(self) -> bool: + return isinstance(self.config, PrebuiltWorkerConfig) + @serializable() class SyftWorkerImage(SyftObject): diff --git a/tox.ini b/tox.ini index 7a0bbd89d12..eecfd6712ae 100644 --- a/tox.ini +++ b/tox.ini @@ -371,7 +371,7 @@ commands = bash -c "echo Running with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE DEV_MODE=$DEV_MODE TEST_NOTEBOOK_PATHS=$TEST_NOTEBOOK_PATHS; date" bash -c "for subfolder in $(echo ${TEST_NOTEBOOK_PATHS} | tr ',' ' ');\ do \ - pytest -s -x --nbmake --nbmake-timeout=1000 "$subfolder" --ignore=scenarios/bigquery/sync -p no:randomly -vvvv --log-cli-level=DEBUG --capture=no;\ + pytest -s -x --nbmake --nbmake-timeout=1000 "$subfolder" --ignore=scenarios/bigquery/sync --ignore=scenarios/bigquery/upgradability -p no:randomly -vvvv --log-cli-level=DEBUG --capture=no;\ done" # This is testing BQ with syncing and with in-memory python @@ -481,7 +481,7 @@ commands = # Checking logs generated & startup of bigquery-high bash -c '(kubectl logs service/backend --context k3d-${DATASITE_CLUSTER_NAME} --namespace syft -f &) | grep -q "Application startup complete" || true' - bash -c "pytest -s -x --nbmake notebooks/scenarios/bigquery -p no:randomly --ignore=notebooks/scenarios/bigquery/sync -vvvv --nbmake-timeout=1000 --log-cli-level=DEBUG --capture=no;" + bash -c "pytest -s -x --nbmake notebooks/scenarios/bigquery -p no:randomly --ignore=notebooks/scenarios/bigquery/sync --ignore=notebooks/scenarios/bigquery/upgradability -vvvv --nbmake-timeout=1000 --log-cli-level=DEBUG --capture=no;" # deleting clusters created bash -c "CLUSTER_NAME=${DATASITE_CLUSTER_NAME} tox -e dev.k8s.destroy || true" @@ -1566,3 +1566,71 @@ commands_post = bash -c "CLUSTER_NAME=${DATASITE_CLUSTER_NAME} tox -e dev.k8s.destroy || true" bash -c 'rm -f ${MIGRATION_DATA_DIR}/migration.blob' bash -c 'rm -f ${MIGRATION_DATA_DIR}/migration.yaml' + +[testenv:migration.scenarios.prepare] +description = Prepare Migration Data +pip_pre = True +setenv = + MIGRATION_DATA_DIR = {env:MIGRATION_DATA_DIR:notebooks/scenarios/bigquery/upgradability} + ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:python} + DEV_MODE = {env:DEV_MODE:True} + TEST_NOTEBOOK_PATHS = {env:TEST_NOTEBOOK_PATHS:notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks} + TEST_query_limit_size={env:test_query_limit_size:500000} + SERVER_URL = {env:SERVER_URL:http://localhost} + SERVER_PORT = {env:SERVER_PORT:8080} + NUM_TEST_USERS = {env:NUM_TEST_USERS:5} + NUM_TEST_JOBS = {env:NUM_TEST_JOBS:10} +deps = + nbmake + requests + syft[dev,datascience]==0.9.1 + ; {[testenv:syft]deps} + db-dtypes + google-cloud-bigquery + aiosmtpd +allowlist_externals = + bash + python +commands = + ; Run notebooks to prepare migration data + bash -c 'pwd' + bash -c 'python -c "import syft as sy; print(\"Migrating from syft version:\", sy.__version__)"' + bash -c "echo Running with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE DEV_MODE=$DEV_MODE TEST_NOTEBOOK_PATHS=$TEST_NOTEBOOK_PATHS; date" + + bash -c "for subfolder in $(echo ${TEST_NOTEBOOK_PATHS} | tr ',' ' ');\ + do \ + pytest -s -x --nbmake --nbmake-timeout=1000 "$subfolder" --ignore=notebooks/scenarios/bigquery/sync -p no:randomly -vvvv --log-cli-level=DEBUG --capture=no;\ + done" + + pytest -x --nbmake --nbmake-timeout=1000 notebooks/scenarios/bigquery/upgradability/1-dump-database-to-file.ipynb -vvvv + bash -c 'ls -l ${MIGRATION_DATA_DIR}/migration.blob' + bash -c 'ls -l ${MIGRATION_DATA_DIR}/migration.yaml' + bash -c "echo 'Migration data prepared in ${MIGRATION_DATA_DIR}'" + +[testenv:migration.scenarios.test] +description = Migration Test +setenv = + MIGRATION_DATA_DIR = {env:MIGRATION_DATA_DIR:.} + ORCHESTRA_DEPLOYMENT_TYPE = {env:ORCHESTRA_DEPLOYMENT_TYPE:python} + DEV_MODE = {env:DEV_MODE:True} + TEST_NOTEBOOK_PATHS = {env:TEST_NOTEBOOK_PATHS:notebooks/scenarios/bigquery/upgradability/0.9.1_notebooks} + TEST_query_limit_size={env:test_query_limit_size:500000} + SERVER_URL = {env:SERVER_URL:http://localhost} + SERVER_PORT = {env:SERVER_PORT:8080} + NUM_TEST_USERS = {env:NUM_TEST_USERS:5} + NUM_TEST_JOBS = {env:NUM_TEST_JOBS:10} +deps = + -e{toxinidir}/packages/syft[dev] + nbmake + db-dtypes + google-cloud-bigquery + aiosmtpd +changedir = {toxinidir}/notebooks +allowlist_externals = + bash + tox + pytest +commands = + ; tox -e migration.prepare + bash -c 'python -c "import syft as sy; print(\"Migrating to syft version:\", sy.__version__)"' + pytest -x --nbmake --nbmake-timeout=1000 scenarios/bigquery/upgradability/2-migrate-for-scenarios.ipynb -vvvv --log-cli-level=DEBUG