From 6cf4e5f3db4cc1cccfadf62cbdf9c763ba4f340e Mon Sep 17 00:00:00 2001 From: Max Chis Date: Sat, 18 Oct 2025 14:22:18 -0400 Subject: [PATCH 1/6] Begin draft --- src/api/endpoints/submit/url/queries/core.py | 3 +- .../endpoints/suggest}/__init__.py | 0 src/api/endpoints/suggest/routes.py | 19 +++++ src/api/endpoints/suggest/url/__init__.py | 0 .../endpoints/suggest/url/models/__init__.py | 0 .../endpoints/suggest/url/models/request.py | 13 ++++ .../suggest/url/models/response/__init__.py | 0 .../suggest/url/models/response/enums.py | 7 ++ .../suggest/url/models/response/model.py | 9 +++ .../endpoints/suggest/url/queries/__init__.py | 0 src/api/endpoints/suggest/url/queries/core.py | 73 +++++++++++++++++++ src/api/endpoints/suggest/url/wrapper.py | 24 ++++++ .../probe/queries/insert_redirects/query.py | 2 +- .../insert_redirects/request_manager.py | 4 +- src/db/queries/urls_exist/__init__.py | 0 .../exist => db/queries/urls_exist}/model.py | 0 .../exist => db/queries/urls_exist}/query.py | 5 +- src/db/queries/urls_exist/requester.py | 41 +++++++++++ src/db/utils/validate.py | 15 ---- src/util/models/full_url.py | 4 + src/util/url.py | 16 +++- 21 files changed, 211 insertions(+), 24 deletions(-) rename src/{core/tasks/url/operators/probe/queries/urls/exist => api/endpoints/suggest}/__init__.py (100%) create mode 100644 src/api/endpoints/suggest/routes.py create mode 100644 src/api/endpoints/suggest/url/__init__.py create mode 100644 src/api/endpoints/suggest/url/models/__init__.py create mode 100644 src/api/endpoints/suggest/url/models/request.py create mode 100644 src/api/endpoints/suggest/url/models/response/__init__.py create mode 100644 src/api/endpoints/suggest/url/models/response/enums.py create mode 100644 src/api/endpoints/suggest/url/models/response/model.py create mode 100644 src/api/endpoints/suggest/url/queries/__init__.py create mode 100644 src/api/endpoints/suggest/url/queries/core.py create mode 100644 src/api/endpoints/suggest/url/wrapper.py create mode 100644 src/db/queries/urls_exist/__init__.py rename src/{core/tasks/url/operators/probe/queries/urls/exist => db/queries/urls_exist}/model.py (100%) rename src/{core/tasks/url/operators/probe/queries/urls/exist => db/queries/urls_exist}/query.py (91%) create mode 100644 src/db/queries/urls_exist/requester.py diff --git a/src/api/endpoints/submit/url/queries/core.py b/src/api/endpoints/submit/url/queries/core.py index 513d26ad..f65f81d0 100644 --- a/src/api/endpoints/submit/url/queries/core.py +++ b/src/api/endpoints/submit/url/queries/core.py @@ -18,9 +18,8 @@ from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.queries.base.builder import QueryBuilderBase -from src.db.utils.validate import is_valid_url from src.util.models.url_and_scheme import URLAndScheme -from src.util.url import clean_url, get_url_and_scheme +from src.util.url import clean_url, get_url_and_scheme, is_valid_url class SubmitURLQueryBuilder(QueryBuilderBase): diff --git a/src/core/tasks/url/operators/probe/queries/urls/exist/__init__.py b/src/api/endpoints/suggest/__init__.py similarity index 100% rename from src/core/tasks/url/operators/probe/queries/urls/exist/__init__.py rename to src/api/endpoints/suggest/__init__.py diff --git a/src/api/endpoints/suggest/routes.py b/src/api/endpoints/suggest/routes.py new file mode 100644 index 00000000..8caeb8ac --- /dev/null +++ b/src/api/endpoints/suggest/routes.py @@ -0,0 +1,19 @@ +from fastapi import APIRouter, Depends + +from src.api.dependencies import get_async_core +from src.api.endpoints.suggest.url.models.request import URLSuggestionRequest +from src.api.endpoints.suggest.url.models.response.model import URLSuggestResponse +from src.api.endpoints.suggest.url.wrapper import suggest_url_wrapper +from src.core.core import AsyncCore + +suggest_router = APIRouter(prefix="/suggest", tags=["suggest"]) + +@suggest_router.post("/url") +async def suggest_url( + request: URLSuggestionRequest, + async_core: AsyncCore = Depends(get_async_core), +) -> URLSuggestResponse: + return await suggest_url_wrapper( + request=request, + adb_client=async_core.adb_client, + ) diff --git a/src/api/endpoints/suggest/url/__init__.py b/src/api/endpoints/suggest/url/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/suggest/url/models/__init__.py b/src/api/endpoints/suggest/url/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/suggest/url/models/request.py b/src/api/endpoints/suggest/url/models/request.py new file mode 100644 index 00000000..4deec1d5 --- /dev/null +++ b/src/api/endpoints/suggest/url/models/request.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel + +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType + + +class URLSuggestionRequest(BaseModel): + url: str + url_type: URLType | None = None + record_type: RecordType | None = None + agency_ids: list[int] = [] + location_ids: list[int] = [] + name: str | None = None \ No newline at end of file diff --git a/src/api/endpoints/suggest/url/models/response/__init__.py b/src/api/endpoints/suggest/url/models/response/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/suggest/url/models/response/enums.py b/src/api/endpoints/suggest/url/models/response/enums.py new file mode 100644 index 00000000..337d759a --- /dev/null +++ b/src/api/endpoints/suggest/url/models/response/enums.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class URLSuggestResultEnum(Enum): + ACCEPTED = "accepted" + ACCEPTED_WITH_ERRORS = "accepted_with_errors" + DUPLICATE = "duplicate" diff --git a/src/api/endpoints/suggest/url/models/response/model.py b/src/api/endpoints/suggest/url/models/response/model.py new file mode 100644 index 00000000..091734bb --- /dev/null +++ b/src/api/endpoints/suggest/url/models/response/model.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + +from src.api.endpoints.suggest.url.models.response.enums import URLSuggestResultEnum + + +class URLSuggestResponse(BaseModel): + result: URLSuggestResultEnum + url_id: int | None + msg: str \ No newline at end of file diff --git a/src/api/endpoints/suggest/url/queries/__init__.py b/src/api/endpoints/suggest/url/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/suggest/url/queries/core.py b/src/api/endpoints/suggest/url/queries/core.py new file mode 100644 index 00000000..77b90128 --- /dev/null +++ b/src/api/endpoints/suggest/url/queries/core.py @@ -0,0 +1,73 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.suggest.url.models.request import URLSuggestionRequest +from src.api.endpoints.suggest.url.models.response.enums import URLSuggestResultEnum +from src.api.endpoints.suggest.url.models.response.model import URLSuggestResponse +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.queries.base.builder import QueryBuilderBase +from src.db.queries.urls_exist.model import URLExistsResult +from src.db.queries.urls_exist.query import URLsExistInDBQueryBuilder +from src.db.queries.urls_exist.requester import URLSuggestRequester +from src.util.models.full_url import FullURL + + +class URLSuggestQueryBuilder(QueryBuilderBase): + + def __init__( + self, + request: URLSuggestionRequest + ): + super().__init__() + self.request = request + + async def run(self, session: AsyncSession) -> URLSuggestResponse: + # Clean URL + full_url = FullURL(self.request.url) + + # Check if already exists in database + url_exists_result: URLExistsResult = (await URLsExistInDBQueryBuilder( + [full_url] + ).run(session))[0] + if url_exists_result.url_id is not None: + return URLSuggestResponse( + url_id=url_exists_result.url_id, + result=URLSuggestResultEnum.DUPLICATE, + msg=f"URL Already Exists In Database with ID {url_exists_result.url_id}" + ) + + # Add URL + url = URL( + scheme=full_url.scheme, + url=full_url.id_form, + trailing_slash=full_url.has_trailing_slash, + ) + session.add(url) + await session.flush() + url_id: int = url.id + + try: + requester = URLSuggestRequester(session=session, url_id=url_id) + + # Optionally add other annotations + await requester.optionally_add_url_type_suggestion(self.request.url_type) + + await requester.optionally_add_record_type_suggestion(self.request.record_type) + + await requester.optionally_add_agency_id_suggestions(self.request.agency_ids) + + await requester.optionally_add_name_suggestion(self.request.name) + + # If cleaned URL matches original URL, return as ACCEPTED + return URLSuggestResponse( + url_id=url_id, + result=URLSuggestResultEnum.ACCEPTED, + msg="URL was accepted" + ) + + except Exception as e: + return URLSuggestResponse( + url_id=url_id, + result=URLSuggestResultEnum.ACCEPTED_WITH_ERRORS, + msg=f"The URL was accepted, but there were errors in adding provided annotations: {e}" + ) + diff --git a/src/api/endpoints/suggest/url/wrapper.py b/src/api/endpoints/suggest/url/wrapper.py new file mode 100644 index 00000000..7927db25 --- /dev/null +++ b/src/api/endpoints/suggest/url/wrapper.py @@ -0,0 +1,24 @@ +from http import HTTPStatus + +from fastapi import HTTPException + +from src.api.endpoints.suggest.url.models.request import URLSuggestionRequest +from src.api.endpoints.suggest.url.models.response.model import URLSuggestResponse +from src.api.endpoints.suggest.url.queries.core import URLSuggestQueryBuilder +from src.db.client.async_ import AsyncDatabaseClient +from src.util.url import is_valid_url + + +async def suggest_url_wrapper( + request: URLSuggestionRequest, + adb_client: AsyncDatabaseClient, +) -> URLSuggestResponse: + if not is_valid_url(request.url): + raise HTTPException( + status_code=HTTPStatus.BAD_REQUEST, + detail="Invalid URL" + ) + + return await adb_client.run_query_builder( + URLSuggestQueryBuilder(request) + ) \ No newline at end of file diff --git a/src/core/tasks/url/operators/probe/queries/insert_redirects/query.py b/src/core/tasks/url/operators/probe/queries/insert_redirects/query.py index 8dd4f693..79dd7d9a 100644 --- a/src/core/tasks/url/operators/probe/queries/insert_redirects/query.py +++ b/src/core/tasks/url/operators/probe/queries/insert_redirects/query.py @@ -2,7 +2,7 @@ from src.core.tasks.url.operators.probe.queries.insert_redirects.extract import extract_response_pairs from src.core.tasks.url.operators.probe.queries.insert_redirects.request_manager import InsertRedirectsRequestManager -from src.core.tasks.url.operators.probe.queries.urls.exist.model import URLExistsResult +from src.db.queries.urls_exist.model import URLExistsResult from src.core.tasks.url.operators.probe.tdo import URLProbeTDO from src.db.dtos.url.mapping_.full import FullURLMapping from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py b/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py index 45eaa8e3..614a8bca 100644 --- a/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py +++ b/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py @@ -8,8 +8,8 @@ convert_url_response_mapping_to_web_metadata_list from src.core.tasks.url.operators.probe.queries.insert_redirects.map import map_url_mappings_to_probe_responses from src.core.tasks.url.operators.probe.queries.insert_redirects.models.url_response_map import URLResponseMapping -from src.core.tasks.url.operators.probe.queries.urls.exist.model import URLExistsResult -from src.core.tasks.url.operators.probe.queries.urls.exist.query import URLsExistInDBQueryBuilder +from src.db.queries.urls_exist.model import URLExistsResult +from src.db.queries.urls_exist import URLsExistInDBQueryBuilder from src.core.tasks.url.operators.probe.tdo import URLProbeTDO from src.db.dtos.url.mapping_.full import FullURLMapping from src.db.helpers.session import session_helper as sh diff --git a/src/db/queries/urls_exist/__init__.py b/src/db/queries/urls_exist/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/url/operators/probe/queries/urls/exist/model.py b/src/db/queries/urls_exist/model.py similarity index 100% rename from src/core/tasks/url/operators/probe/queries/urls/exist/model.py rename to src/db/queries/urls_exist/model.py diff --git a/src/core/tasks/url/operators/probe/queries/urls/exist/query.py b/src/db/queries/urls_exist/query.py similarity index 91% rename from src/core/tasks/url/operators/probe/queries/urls/exist/query.py rename to src/db/queries/urls_exist/query.py index 4e9d3173..510cf78f 100644 --- a/src/core/tasks/url/operators/probe/queries/urls/exist/query.py +++ b/src/db/queries/urls_exist/query.py @@ -1,11 +1,10 @@ from sqlalchemy import select, func from sqlalchemy.ext.asyncio import AsyncSession -from src.core.tasks.url.operators.probe.queries.urls.exist.model import URLExistsResult -from src.db.helpers.session.session_helper import results_exist +from src.db.queries.urls_exist.model import URLExistsResult +from src.db.helpers.session import session_helper as sh from src.db.models.impl.url.core.sqlalchemy import URL from src.db.queries.base.builder import QueryBuilderBase -from src.db.helpers.session import session_helper as sh from src.util.models.full_url import FullURL diff --git a/src/db/queries/urls_exist/requester.py b/src/db/queries/urls_exist/requester.py new file mode 100644 index 00000000..45335b87 --- /dev/null +++ b/src/db/queries/urls_exist/requester.py @@ -0,0 +1,41 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.enums import RecordType +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.templates.requester import RequesterBase + + +class URLSuggestRequester(RequesterBase): + + def __init__( + self, + session: AsyncSession, + url_id: int + ): + super().__init__(session=session) + self.url_id = url_id + + async def optionally_add_url_type_suggestion( + self, + url_type: URLType | None + ) -> None: + if url_type is None: + return + # TODO + + async def optionally_add_record_type_suggestion(self, record_type: RecordType | None): + if record_type is None: + return + # TODO + + async def optionally_add_agency_id_suggestions(self, agency_ids: list[int]): + if len(agency_ids) == 0: + return + # TODO + + async def optionally_add_name_suggestion(self, name: str | None): + if name is None: + return + # TODO + + diff --git a/src/db/utils/validate.py b/src/db/utils/validate.py index 4837e12c..9d77f910 100644 --- a/src/db/utils/validate.py +++ b/src/db/utils/validate.py @@ -1,7 +1,4 @@ from typing import Protocol -from urllib.parse import urlparse - -from pydantic import BaseModel def validate_has_protocol(obj: object, protocol: type[Protocol]): @@ -13,15 +10,3 @@ def validate_all_models_of_same_type(objects: list[object]): if not all(isinstance(model, type(first_model)) for model in objects): raise TypeError("Models must be of the same type") -def is_valid_url(url: str) -> bool: - try: - result = urlparse(url) - # If scheme is missing, `netloc` will be empty, so we check path too - if result.scheme in ("http", "https") and result.netloc: - return True - if not result.scheme and result.path: - # no scheme, treat path as potential domain - return "." in result.path - return False - except ValueError: - return False diff --git a/src/util/models/full_url.py b/src/util/models/full_url.py index 1118040e..9b3fc694 100644 --- a/src/util/models/full_url.py +++ b/src/util/models/full_url.py @@ -79,6 +79,10 @@ def id_form(self) -> str: clean: str = clean_url(no_trailing_slash) return clean + @property + def has_trailing_slash(self) -> bool: + return self.full_url.endswith("/") + def clean(self) -> str: return clean_url(self.full_url) diff --git a/src/util/url.py b/src/util/url.py index 0fdf7d0b..88c8959d 100644 --- a/src/util/url.py +++ b/src/util/url.py @@ -31,4 +31,18 @@ def remove_url_scheme(url: str) -> str: parsed = urlparse(url) if parsed.scheme: return url.replace(f"{parsed.scheme}://", "", 1) - return url \ No newline at end of file + return url + + +def is_valid_url(url: str) -> bool: + try: + result = urlparse(url) + # If scheme is missing, `netloc` will be empty, so we check path too + if result.scheme in ("http", "https") and result.netloc: + return True + if not result.scheme and result.path: + # no scheme, treat path as potential domain + return "." in result.path + return False + except ValueError: + return False From d93d90a193a9c864ba79b8ea6b09ca71688eb37a Mon Sep 17 00:00:00 2001 From: Max Chis Date: Mon, 20 Oct 2025 16:40:53 -0400 Subject: [PATCH 2/6] Add agency endpoints --- ...adf9d894180_add_autogenerated_agency_id.py | 32 ++++++ pytest.ini | 1 + .../{batch/dtos/post => agencies}/__init__.py | 0 src/api/endpoints/agencies/by_id/__init__.py | 0 .../agencies/by_id/delete/__init__.py | 0 .../endpoints/agencies/by_id/delete/query.py | 21 ++++ .../agencies/by_id/delete/request.py | 0 .../agencies/by_id/locations/__init__.py | 0 .../by_id/locations/delete/__init__.py | 0 .../agencies/by_id/locations/delete/query.py | 29 +++++ .../agencies/by_id/locations/get/__init__.py | 0 .../agencies/by_id/locations/get/query.py | 37 ++++++ .../agencies/by_id/locations/get/response.py | 6 + .../agencies/by_id/locations/post/__init__.py | 0 .../agencies/by_id/locations/post/query.py | 23 ++++ .../endpoints/agencies/by_id/put/__init__.py | 0 src/api/endpoints/agencies/by_id/put/query.py | 42 +++++++ .../endpoints/agencies/by_id/put/request.py | 9 ++ src/api/endpoints/agencies/root/__init__.py | 0 .../endpoints/agencies/root/get/__init__.py | 0 src/api/endpoints/agencies/root/get/query.py | 52 +++++++++ .../endpoints/agencies/root/get/response.py | 12 ++ .../endpoints/agencies/root/post/__init__.py | 0 src/api/endpoints/agencies/root/post/query.py | 44 +++++++ .../endpoints/agencies/root/post/request.py | 10 ++ .../endpoints/agencies/root/post/response.py | 5 + src/api/endpoints/agencies/routes.py | 107 ++++++++++++++++++ src/api/endpoints/batch/routes.py | 5 +- src/api/main.py | 4 +- src/api/shared/__init__.py | 0 src/api/shared/models/__init__.py | 0 .../models/message_response.py} | 0 src/core/core.py | 10 +- .../insert_redirects/request_manager.py | 2 +- src/db/client/async_.py | 8 ++ src/db/models/impl/agency/sqlalchemy.py | 20 +++- src/db/models/views/location_expanded.py | 1 - src/db/queries/base/builder.py | 3 +- .../api/_helpers/RequestValidator.py | 80 ++++++++++++- .../integration/api/agencies/__init__.py | 0 .../integration/api/agencies/test_core.py | 75 ++++++++++++ tests/automated/integration/api/conftest.py | 69 ----------- tests/automated/integration/conftest.py | 69 ++++++++++- .../automated/integration/readonly/README.md | 1 + .../integration/readonly/__init__.py | 0 .../integration/readonly/api/__init__.py | 0 .../readonly/api/agencies/__init__.py | 0 .../readonly/api/agencies/get/__init__.py | 0 .../api/agencies/get/test_locations.py | 16 +++ .../readonly/api/agencies/get/test_root.py | 20 ++++ .../integration/readonly/conftest.py | 101 +++++++++++++++++ tests/helpers/data_creator/core.py | 11 +- 52 files changed, 829 insertions(+), 96 deletions(-) create mode 100644 alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py rename src/api/endpoints/{batch/dtos/post => agencies}/__init__.py (100%) create mode 100644 src/api/endpoints/agencies/by_id/__init__.py create mode 100644 src/api/endpoints/agencies/by_id/delete/__init__.py create mode 100644 src/api/endpoints/agencies/by_id/delete/query.py create mode 100644 src/api/endpoints/agencies/by_id/delete/request.py create mode 100644 src/api/endpoints/agencies/by_id/locations/__init__.py create mode 100644 src/api/endpoints/agencies/by_id/locations/delete/__init__.py create mode 100644 src/api/endpoints/agencies/by_id/locations/delete/query.py create mode 100644 src/api/endpoints/agencies/by_id/locations/get/__init__.py create mode 100644 src/api/endpoints/agencies/by_id/locations/get/query.py create mode 100644 src/api/endpoints/agencies/by_id/locations/get/response.py create mode 100644 src/api/endpoints/agencies/by_id/locations/post/__init__.py create mode 100644 src/api/endpoints/agencies/by_id/locations/post/query.py create mode 100644 src/api/endpoints/agencies/by_id/put/__init__.py create mode 100644 src/api/endpoints/agencies/by_id/put/query.py create mode 100644 src/api/endpoints/agencies/by_id/put/request.py create mode 100644 src/api/endpoints/agencies/root/__init__.py create mode 100644 src/api/endpoints/agencies/root/get/__init__.py create mode 100644 src/api/endpoints/agencies/root/get/query.py create mode 100644 src/api/endpoints/agencies/root/get/response.py create mode 100644 src/api/endpoints/agencies/root/post/__init__.py create mode 100644 src/api/endpoints/agencies/root/post/query.py create mode 100644 src/api/endpoints/agencies/root/post/request.py create mode 100644 src/api/endpoints/agencies/root/post/response.py create mode 100644 src/api/endpoints/agencies/routes.py create mode 100644 src/api/shared/__init__.py create mode 100644 src/api/shared/models/__init__.py rename src/api/{endpoints/batch/dtos/post/abort.py => shared/models/message_response.py} (100%) create mode 100644 tests/automated/integration/api/agencies/__init__.py create mode 100644 tests/automated/integration/api/agencies/test_core.py delete mode 100644 tests/automated/integration/api/conftest.py create mode 100644 tests/automated/integration/readonly/README.md create mode 100644 tests/automated/integration/readonly/__init__.py create mode 100644 tests/automated/integration/readonly/api/__init__.py create mode 100644 tests/automated/integration/readonly/api/agencies/__init__.py create mode 100644 tests/automated/integration/readonly/api/agencies/get/__init__.py create mode 100644 tests/automated/integration/readonly/api/agencies/get/test_locations.py create mode 100644 tests/automated/integration/readonly/api/agencies/get/test_root.py create mode 100644 tests/automated/integration/readonly/conftest.py diff --git a/alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py b/alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py new file mode 100644 index 00000000..37fed1aa --- /dev/null +++ b/alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py @@ -0,0 +1,32 @@ +"""Add autogenerated agency id + +Revision ID: 6adf9d894180 +Revises: 7fc6502f1fa3 +Create Date: 2025-10-20 16:20:44.081736 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '6adf9d894180' +down_revision: Union[str, None] = '7fc6502f1fa3' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute(""" + CREATE SEQUENCE agencies_agency_id START WITH 23191; + """) + + op.execute(""" + ALTER TABLE agencies ALTER COLUMN agency_id SET DEFAULT nextval('agencies_agency_id'); + """) + + +def downgrade() -> None: + pass diff --git a/pytest.ini b/pytest.ini index ceaa093c..5c39d47c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,3 +3,4 @@ timeout = 300 asyncio_default_fixture_loop_scope=function markers = manual: mark test as manual-only (excluded from default test runs) +asyncio_mode = auto \ No newline at end of file diff --git a/src/api/endpoints/batch/dtos/post/__init__.py b/src/api/endpoints/agencies/__init__.py similarity index 100% rename from src/api/endpoints/batch/dtos/post/__init__.py rename to src/api/endpoints/agencies/__init__.py diff --git a/src/api/endpoints/agencies/by_id/__init__.py b/src/api/endpoints/agencies/by_id/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/delete/__init__.py b/src/api/endpoints/agencies/by_id/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/delete/query.py b/src/api/endpoints/agencies/by_id/delete/query.py new file mode 100644 index 00000000..800b7cde --- /dev/null +++ b/src/api/endpoints/agencies/by_id/delete/query.py @@ -0,0 +1,21 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.queries.base.builder import QueryBuilderBase + + +class DeleteAgencyQueryBuilder(QueryBuilderBase): + def __init__( + self, + agency_id: int, + ): + super().__init__() + self.agency_id = agency_id + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(Agency) + .where(Agency.agency_id == self.agency_id) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/api/endpoints/agencies/by_id/delete/request.py b/src/api/endpoints/agencies/by_id/delete/request.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/locations/__init__.py b/src/api/endpoints/agencies/by_id/locations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/locations/delete/__init__.py b/src/api/endpoints/agencies/by_id/locations/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/locations/delete/query.py b/src/api/endpoints/agencies/by_id/locations/delete/query.py new file mode 100644 index 00000000..9c96c65b --- /dev/null +++ b/src/api/endpoints/agencies/by_id/locations/delete/query.py @@ -0,0 +1,29 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase + + +class DeleteAgencyLocationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + location_id: int, + ): + super().__init__() + self.agency_id = agency_id + self.location_id = location_id + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(LinkAgencyLocation) + .where( + (LinkAgencyLocation.agency_id == self.agency_id) + & (LinkAgencyLocation.location_id == self.location_id) + ) + ) + + await session.execute(statement) + diff --git a/src/api/endpoints/agencies/by_id/locations/get/__init__.py b/src/api/endpoints/agencies/by_id/locations/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/locations/get/query.py b/src/api/endpoints/agencies/by_id/locations/get/query.py new file mode 100644 index 00000000..e7ad22d5 --- /dev/null +++ b/src/api/endpoints/agencies/by_id/locations/get/query.py @@ -0,0 +1,37 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.models.views.location_expanded import LocationExpandedView +from src.db.queries.base.builder import QueryBuilderBase + + +class GetAgencyLocationsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + ): + super().__init__() + self.agency_id = agency_id + + async def run(self, session: AsyncSession) -> list[AgencyGetLocationsResponse]: + query = ( + select( + LinkAgencyLocation.location_id, + LocationExpandedView.full_display_name + ) + .where( + LinkAgencyLocation.agency_id == self.agency_id + ) + .join( + LocationExpandedView, + LocationExpandedView.id == LinkAgencyLocation.location_id + ) + ) + + result: Sequence[RowMapping] = await self.sh.mappings(session, query=query) + return [AgencyGetLocationsResponse(**row) for row in result] \ No newline at end of file diff --git a/src/api/endpoints/agencies/by_id/locations/get/response.py b/src/api/endpoints/agencies/by_id/locations/get/response.py new file mode 100644 index 00000000..1e4a3078 --- /dev/null +++ b/src/api/endpoints/agencies/by_id/locations/get/response.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class AgencyGetLocationsResponse(BaseModel): + location_id: int + full_display_name: str diff --git a/src/api/endpoints/agencies/by_id/locations/post/__init__.py b/src/api/endpoints/agencies/by_id/locations/post/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/locations/post/query.py b/src/api/endpoints/agencies/by_id/locations/post/query.py new file mode 100644 index 00000000..fd1bdf2f --- /dev/null +++ b/src/api/endpoints/agencies/by_id/locations/post/query.py @@ -0,0 +1,23 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase + + +class AddAgencyLocationQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + location_id: int + ): + super().__init__() + self.agency_id = agency_id + self.location_id = location_id + + async def run(self, session: AsyncSession) -> None: + lal = LinkAgencyLocation( + agency_id=self.agency_id, + location_id=self.location_id, + ) + session.add(lal) \ No newline at end of file diff --git a/src/api/endpoints/agencies/by_id/put/__init__.py b/src/api/endpoints/agencies/by_id/put/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/by_id/put/query.py b/src/api/endpoints/agencies/by_id/put/query.py new file mode 100644 index 00000000..0f58a7db --- /dev/null +++ b/src/api/endpoints/agencies/by_id/put/query.py @@ -0,0 +1,42 @@ +from fastapi import HTTPException +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.agencies.by_id.put.request import AgencyPutRequest +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.queries.base.builder import QueryBuilderBase + + +class UpdateAgencyQueryBuilder(QueryBuilderBase): + + def __init__( + self, + agency_id: int, + request: AgencyPutRequest, + ): + super().__init__() + self.agency_id = agency_id + self.request = request + + async def run(self, session: AsyncSession) -> None: + + query = ( + select( + Agency + ) + .where( + Agency.agency_id == self.agency_id + ) + ) + + agency = await self.sh.one_or_none(session, query=query) + if not agency: + raise HTTPException(status_code=400, detail="Agency not found") + + if self.request.name is not None: + agency.name = self.request.name + if self.request.type is not None: + agency.type = self.request.type + if self.request.jurisdiction_type is not None: + agency.jurisdiction_type = self.request.jurisdiction_type + diff --git a/src/api/endpoints/agencies/by_id/put/request.py b/src/api/endpoints/agencies/by_id/put/request.py new file mode 100644 index 00000000..b485e43c --- /dev/null +++ b/src/api/endpoints/agencies/by_id/put/request.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + + +class AgencyPutRequest(BaseModel): + name: str | None = None + type: AgencyType | None = None + jurisdiction_type: JurisdictionType | None = None diff --git a/src/api/endpoints/agencies/root/__init__.py b/src/api/endpoints/agencies/root/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/root/get/__init__.py b/src/api/endpoints/agencies/root/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/root/get/query.py b/src/api/endpoints/agencies/root/get/query.py new file mode 100644 index 00000000..9452f12e --- /dev/null +++ b/src/api/endpoints/agencies/root/get/query.py @@ -0,0 +1,52 @@ +from sqlalchemy import select, Result +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import joinedload, selectinload + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.agencies.root.get.response import AgencyGetResponse +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.queries.base.builder import QueryBuilderBase + + +class GetAgenciesQueryBuilder(QueryBuilderBase): + + def __init__( + self, + page: int, + ): + super().__init__() + self.page = page + + async def run(self, session: AsyncSession) -> list[AgencyGetResponse]: + + query = ( + select( + Agency + ) + .options( + selectinload(Agency.locations) + ) + .offset((self.page - 1) * 100) + .limit(100) + ) + + results: Result[tuple[Agency]] = await session.execute(query) + responses: list[AgencyGetResponse] = [] + for result in results: + agency: Agency = result[0] + locations: list[AgencyGetLocationsResponse] = [ + AgencyGetLocationsResponse( + location_id=location.id, + full_display_name=location.full_display_name, + ) + for location in agency.locations + ] + responses.append(AgencyGetResponse( + id=agency.agency_id, + name=agency.name, + type=agency.agency_type, + jurisdiction_type=agency.jurisdiction_type, + locations=locations, + )) + + return responses diff --git a/src/api/endpoints/agencies/root/get/response.py b/src/api/endpoints/agencies/root/get/response.py new file mode 100644 index 00000000..b9d374eb --- /dev/null +++ b/src/api/endpoints/agencies/root/get/response.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel + +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + + +class AgencyGetResponse(BaseModel): + id: int + name: str + type: AgencyType + jurisdiction_type: JurisdictionType + locations: list[AgencyGetLocationsResponse] \ No newline at end of file diff --git a/src/api/endpoints/agencies/root/post/__init__.py b/src/api/endpoints/agencies/root/post/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/agencies/root/post/query.py b/src/api/endpoints/agencies/root/post/query.py new file mode 100644 index 00000000..29ff9823 --- /dev/null +++ b/src/api/endpoints/agencies/root/post/query.py @@ -0,0 +1,44 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.agencies.root.post.request import AgencyPostRequest +from src.api.endpoints.agencies.root.post.response import AgencyPostResponse +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase + + +class AddAgencyQueryBuilder(QueryBuilderBase): + + def __init__( + self, + request: AgencyPostRequest, + ): + super().__init__() + self.request = request + + async def run(self, session: AsyncSession) -> AgencyPostResponse: + agency = Agency( + name=self.request.name, + agency_type=self.request.type, + jurisdiction_type=self.request.jurisdiction_type, + ) + + session.add(agency) + await session.flush() + await session.refresh(agency) + agency_id: int = agency.agency_id + + try: + + for location_id in self.request.location_ids: + lal = LinkAgencyLocation( + agency_id=agency_id, + location_id=location_id, + ) + session.add(lal) + + except Exception as e: + await session.rollback() + raise e + + return AgencyPostResponse(agency_id=agency_id) \ No newline at end of file diff --git a/src/api/endpoints/agencies/root/post/request.py b/src/api/endpoints/agencies/root/post/request.py new file mode 100644 index 00000000..6d95eaf2 --- /dev/null +++ b/src/api/endpoints/agencies/root/post/request.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType + + +class AgencyPostRequest(BaseModel): + name: str + type: AgencyType + jurisdiction_type: JurisdictionType + location_ids: list[int] \ No newline at end of file diff --git a/src/api/endpoints/agencies/root/post/response.py b/src/api/endpoints/agencies/root/post/response.py new file mode 100644 index 00000000..dfba5261 --- /dev/null +++ b/src/api/endpoints/agencies/root/post/response.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class AgencyPostResponse(BaseModel): + agency_id: int \ No newline at end of file diff --git a/src/api/endpoints/agencies/routes.py b/src/api/endpoints/agencies/routes.py new file mode 100644 index 00000000..6edfdf03 --- /dev/null +++ b/src/api/endpoints/agencies/routes.py @@ -0,0 +1,107 @@ +from fastapi import APIRouter +from fastapi.params import Query, Depends, Path + +from src.api.dependencies import get_async_core +from src.api.endpoints.agencies.by_id.delete.query import DeleteAgencyQueryBuilder +from src.api.endpoints.agencies.by_id.locations.delete.query import DeleteAgencyLocationQueryBuilder +from src.api.endpoints.agencies.by_id.locations.get.query import GetAgencyLocationsQueryBuilder +from src.api.endpoints.agencies.by_id.locations.get.response import AgencyGetLocationsResponse +from src.api.endpoints.agencies.by_id.locations.post.query import AddAgencyLocationQueryBuilder +from src.api.endpoints.agencies.by_id.put.query import UpdateAgencyQueryBuilder +from src.api.endpoints.agencies.by_id.put.request import AgencyPutRequest +from src.api.endpoints.agencies.root.get.query import GetAgenciesQueryBuilder +from src.api.endpoints.agencies.root.get.response import AgencyGetResponse +from src.api.endpoints.agencies.root.post.query import AddAgencyQueryBuilder +from src.api.endpoints.agencies.root.post.request import AgencyPostRequest +from src.api.endpoints.agencies.root.post.response import AgencyPostResponse +from src.api.shared.models.message_response import MessageResponse +from src.core.core import AsyncCore + +agencies_router = APIRouter(prefix="/agencies", tags=["agencies"]) + +@agencies_router.get("") +async def get_agencies( + async_core: AsyncCore = Depends(get_async_core), + page: int = Query( + description="Page number", + default=1 + ), +) -> list[AgencyGetResponse]: + return await async_core.adb_client.run_query_builder( + GetAgenciesQueryBuilder(page=page) + ) + +@agencies_router.post("") +async def create_agency( + request: AgencyPostRequest, + async_core: AsyncCore = Depends(get_async_core), +) -> AgencyPostResponse: + return await async_core.adb_client.run_query_builder( + AddAgencyQueryBuilder(request=request) + ) + +@agencies_router.delete("/{agency_id}") +async def delete_agency( + agency_id: int = Path( + description="Agency ID to delete" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + DeleteAgencyQueryBuilder(agency_id=agency_id) + ) + return MessageResponse(message="Agency deleted.") + +@agencies_router.put("/{agency_id}") +async def update_agency( + request: AgencyPutRequest, + agency_id: int = Path( + description="Agency ID to update" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + UpdateAgencyQueryBuilder(agency_id=agency_id, request=request) + ) + return MessageResponse(message="Agency updated.") + +@agencies_router.get("/{agency_id}/locations") +async def get_agency_locations( + agency_id: int = Path( + description="Agency ID to get locations for" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> list[AgencyGetLocationsResponse]: + return await async_core.adb_client.run_query_builder( + GetAgencyLocationsQueryBuilder(agency_id=agency_id) + ) + +@agencies_router.post("/{agency_id}/locations/{location_id}") +async def add_location_to_agency( + agency_id: int = Path( + description="Agency ID to add location to" + ), + location_id: int = Path( + description="Location ID to add" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + AddAgencyLocationQueryBuilder(agency_id=agency_id, location_id=location_id) + ) + return MessageResponse(message="Location added to agency.") + +@agencies_router.delete("/{agency_id}/locations/{location_id}") +async def remove_location_from_agency( + agency_id: int = Path( + description="Agency ID to remove location from" + ), + location_id: int = Path( + description="Location ID to remove" + ), + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + DeleteAgencyLocationQueryBuilder(agency_id=agency_id, location_id=location_id) + ) + return MessageResponse(message="Location removed from agency.") diff --git a/src/api/endpoints/batch/routes.py b/src/api/endpoints/batch/routes.py index bd7bbf61..87839fb7 100644 --- a/src/api/endpoints/batch/routes.py +++ b/src/api/endpoints/batch/routes.py @@ -1,5 +1,3 @@ -from typing import Optional - from fastapi import Path, APIRouter from fastapi.params import Query, Depends @@ -7,12 +5,11 @@ from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary -from src.api.endpoints.batch.dtos.post.abort import MessageResponse +from src.api.shared.models.message_response import MessageResponse from src.api.endpoints.batch.duplicates.dto import GetDuplicatesByBatchResponse from src.api.endpoints.batch.urls.dto import GetURLsByBatchResponse from src.collectors.enums import CollectorType from src.core.core import AsyncCore -from src.core.enums import BatchStatus from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum from src.security.dtos.access_info import AccessInfo from src.security.manager import get_access_info diff --git a/src/api/main.py b/src/api/main.py index 2d31dc1f..0026fda3 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -7,6 +7,7 @@ from pdap_access_manager import AccessManager from starlette.responses import RedirectResponse +from src.api.endpoints.agencies.routes import agencies_router from src.api.endpoints.annotate.routes import annotate_router from src.api.endpoints.batch.routes import batch_router from src.api.endpoints.collector.routes import collector_router @@ -177,7 +178,8 @@ async def redirect_docs(): search_router, metrics_router, submit_router, - contributions_router + contributions_router, + agencies_router ] for router in routers: diff --git a/src/api/shared/__init__.py b/src/api/shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/shared/models/__init__.py b/src/api/shared/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/batch/dtos/post/abort.py b/src/api/shared/models/message_response.py similarity index 100% rename from src/api/endpoints/batch/dtos/post/abort.py rename to src/api/shared/models/message_response.py diff --git a/src/core/core.py b/src/core/core.py index 7d4ac083..ad2f20d5 100644 --- a/src/core/core.py +++ b/src/core/core.py @@ -1,16 +1,12 @@ from http import HTTPStatus -from typing import Optional from fastapi import HTTPException from pydantic import BaseModel -from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse -from src.api.endpoints.annotate.all.post.models.request import AllAnnotationPostInfo -from src.api.endpoints.annotate.all.post.query import AddAllAnnotationsToURLQueryBuilder from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary -from src.api.endpoints.batch.dtos.post.abort import MessageResponse +from src.api.shared.models.message_response import MessageResponse from src.api.endpoints.batch.duplicates.dto import GetDuplicatesByBatchResponse from src.api.endpoints.batch.urls.dto import GetURLsByBatchResponse from src.api.endpoints.collector.dtos.collector_start import CollectorStartInfo @@ -23,9 +19,6 @@ from src.api.endpoints.metrics.dtos.get.urls.aggregated.pending import GetMetricsURLsAggregatedPendingResponseDTO from src.api.endpoints.metrics.dtos.get.urls.breakdown.pending import GetMetricsURLsBreakdownPendingResponseDTO from src.api.endpoints.metrics.dtos.get.urls.breakdown.submitted import GetMetricsURLsBreakdownSubmittedResponseDTO -from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.api.endpoints.review.enums import RejectionReason -from src.api.endpoints.review.next.dto import GetNextURLForFinalReviewOuterResponse from src.api.endpoints.search.dtos.response import SearchURLResponse from src.api.endpoints.task.by_id.dto import TaskInfo from src.api.endpoints.task.dtos.get.task_status import GetTaskStatusResponseInfo @@ -39,7 +32,6 @@ from src.db.enums import TaskType from src.db.models.impl.batch.pydantic.info import BatchInfo from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum -from src.security.dtos.access_info import AccessInfo class AsyncCore: diff --git a/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py b/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py index 614a8bca..64e6299a 100644 --- a/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py +++ b/src/core/tasks/url/operators/probe/queries/insert_redirects/request_manager.py @@ -9,13 +9,13 @@ from src.core.tasks.url.operators.probe.queries.insert_redirects.map import map_url_mappings_to_probe_responses from src.core.tasks.url.operators.probe.queries.insert_redirects.models.url_response_map import URLResponseMapping from src.db.queries.urls_exist.model import URLExistsResult -from src.db.queries.urls_exist import URLsExistInDBQueryBuilder from src.core.tasks.url.operators.probe.tdo import URLProbeTDO from src.db.dtos.url.mapping_.full import FullURLMapping from src.db.helpers.session import session_helper as sh from src.db.models.impl.link.url_redirect_url.pydantic import LinkURLRedirectURLPydantic from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL from src.db.models.impl.url.web_metadata.insert import URLWebMetadataPydantic +from src.db.queries.urls_exist.query import URLsExistInDBQueryBuilder from src.external.url_request.probe.models.redirect import URLProbeRedirectResponsePair from src.external.url_request.probe.models.response import URLProbeResponse from src.util.models.full_url import FullURL diff --git a/src/db/client/async_.py b/src/db/client/async_.py index d1d093a8..11e49472 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -437,6 +437,14 @@ async def has_urls_with_html_data_and_without_auto_record_type_suggestion(self, model=AutoRecordTypeSuggestion ) + @session_manager + async def one_or_none( + self, + session: AsyncSession, + model: Base + ) -> Row | None: + return await sh.one_or_none(session=session, query=select(model)) + @session_manager async def get_all( self, diff --git a/src/db/models/impl/agency/sqlalchemy.py b/src/db/models/impl/agency/sqlalchemy.py index 002b0255..28717bfd 100644 --- a/src/db/models/impl/agency/sqlalchemy.py +++ b/src/db/models/impl/agency/sqlalchemy.py @@ -2,8 +2,8 @@ References an agency in the data sources database. """ -from sqlalchemy import Column, Integer, String, DateTime -from sqlalchemy.orm import relationship +from sqlalchemy import Column, Integer, String, DateTime, Sequence +from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column from src.db.models.impl.agency.enums import AgencyType, JurisdictionType @@ -20,10 +20,13 @@ class Agency( # TODO: Rename agency_id to ds_agency_id - agency_id = Column(Integer, primary_key=True) + agency_id = Column( + Integer, + Sequence("agencies_agency_id"), + primary_key=True) name = Column(String, nullable=False) - agency_type = enum_column(AgencyType, name="agency_type_enum") - jurisdiction_type = enum_column( + agency_type: Mapped[AgencyType] = enum_column(AgencyType, name="agency_type_enum") + jurisdiction_type: Mapped[JurisdictionType] = enum_column( JurisdictionType, name="jurisdiction_type_enum", nullable=True, @@ -33,3 +36,10 @@ class Agency( automated_suggestions = relationship("AgencyIDSubtaskSuggestion") user_suggestions = relationship("UserUrlAgencySuggestion", back_populates="agency") confirmed_urls = relationship("LinkURLAgency", back_populates="agency") + + locations = relationship( + "LocationExpandedView", + primaryjoin="Agency.agency_id == LinkAgencyLocation.agency_id", + secondaryjoin="LocationExpandedView.id == LinkAgencyLocation.location_id", + secondary="link_agencies_locations", + ) diff --git a/src/db/models/views/location_expanded.py b/src/db/models/views/location_expanded.py index 1eb973aa..cf60005b 100644 --- a/src/db/models/views/location_expanded.py +++ b/src/db/models/views/location_expanded.py @@ -45,7 +45,6 @@ class LocationExpandedView( WithIDBase, ViewMixin, - LocationDependentMixin ): __tablename__ = "locations_expanded" diff --git a/src/db/queries/base/builder.py b/src/db/queries/base/builder.py index f0ef345c..8a1829d0 100644 --- a/src/db/queries/base/builder.py +++ b/src/db/queries/base/builder.py @@ -1,4 +1,4 @@ -from typing import Any, Generic, Optional +from typing import Any, Generic from sqlalchemy import FromClause, ColumnClause from sqlalchemy.ext.asyncio import AsyncSession @@ -12,6 +12,7 @@ class QueryBuilderBase(Generic[LabelsType]): def __init__(self, labels: LabelsType | None = None): self.query: FromClause | None = None self.labels = labels + self.sh = sh def get(self, key: str) -> ColumnClause: return getattr(self.query.c, key) diff --git a/tests/automated/integration/api/_helpers/RequestValidator.py b/tests/automated/integration/api/_helpers/RequestValidator.py index 73293522..0db00cb3 100644 --- a/tests/automated/integration/api/_helpers/RequestValidator.py +++ b/tests/automated/integration/api/_helpers/RequestValidator.py @@ -10,7 +10,7 @@ from src.api.endpoints.batch.dtos.get.logs import GetBatchLogsResponse from src.api.endpoints.batch.dtos.get.summaries.response import GetBatchSummariesResponse from src.api.endpoints.batch.dtos.get.summaries.summary import BatchSummary -from src.api.endpoints.batch.dtos.post.abort import MessageResponse +from src.api.shared.models.message_response import MessageResponse from src.api.endpoints.batch.duplicates.dto import GetDuplicatesByBatchResponse from src.api.endpoints.batch.urls.dto import GetURLsByBatchResponse from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInputDTO @@ -102,6 +102,24 @@ def open_v2( ) return response.json() + def open_v3( + self, + method: str, + url: str, + params: dict | None = None, + expected_model: type[BaseModel] | None = None, + **kwargs + ) -> BaseModel | dict: + response = self.open_v2( + method=method, + url=url, + params=params, + **kwargs + ) + if expected_model: + return expected_model(**response) + return response + def get( self, url: str, @@ -158,6 +176,66 @@ def get_v2( **kwargs ) + def get_v3( + self, + url: str, + params: dict | None = None, + expected_model: BaseModel | None = None, + **kwargs + ): + return self.open_v3( + method="GET", + url=url, + params=params, + expected_model=expected_model, + **kwargs + ) + + def post_v3( + self, + url: str, + params: dict | None = None, + expected_model: BaseModel | None = None, + **kwargs + ): + return self.open_v3( + method="POST", + url=url, + params=params, + expected_model=expected_model, + **kwargs + ) + + def put_v3( + self, + url: str, + params: dict | None = None, + expected_model: BaseModel | None = None, + **kwargs + ): + return self.open_v3( + method="PUT", + url=url, + params=params, + expected_model=expected_model, + **kwargs + ) + + def delete_v3( + self, + url: str, + params: dict | None = None, + expected_model: BaseModel | None = None, + **kwargs + ): + return self.open_v3( + method="DELETE", + url=url, + params=params, + expected_model=expected_model, + **kwargs + ) + def put( self, diff --git a/tests/automated/integration/api/agencies/__init__.py b/tests/automated/integration/api/agencies/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/agencies/test_core.py b/tests/automated/integration/api/agencies/test_core.py new file mode 100644 index 00000000..cc2ddefc --- /dev/null +++ b/tests/automated/integration/api/agencies/test_core.py @@ -0,0 +1,75 @@ +import pytest + +from src.api.endpoints.agencies.by_id.put.request import AgencyPutRequest +from src.api.endpoints.agencies.root.post.request import AgencyPostRequest +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo + + +@pytest.mark.asyncio +async def test_agencies( + api_test_helper: APITestHelper, + california: USStateCreationInfo, + pennsylvania: USStateCreationInfo +): + ath = api_test_helper + rv = ath.request_validator + + rv.post_v3( + url=f"/agencies", + json=AgencyPostRequest( + name="Test Agency", + type=AgencyType.LAW_ENFORCEMENT, + jurisdiction_type=JurisdictionType.STATE, + location_ids=[california.location_id] + ).model_dump(mode="json") + ) + + agency: Agency = await ath.adb_client().one_or_none(model=Agency) + assert agency.name == "Test Agency" + assert agency.agency_type == AgencyType.LAW_ENFORCEMENT + assert agency.jurisdiction_type == JurisdictionType.STATE + + link: LinkAgencyLocation = await ath.adb_client().one_or_none(model=LinkAgencyLocation) + assert link is not None + assert link.agency_id == agency.agency_id + assert link.location_id == california.location_id + + rv.delete_v3( + url=f"/agencies/{agency.agency_id}/locations/{california.location_id}", + ) + + link: LinkAgencyLocation | None = await ath.adb_client().one_or_none(model=LinkAgencyLocation) + assert link is None + + rv.post_v3( + url=f"/agencies/{agency.agency_id}/locations/{pennsylvania.location_id}", + ) + + link: LinkAgencyLocation = await ath.adb_client().one_or_none(model=LinkAgencyLocation) + assert link is not None + assert link.agency_id == agency.agency_id + assert link.location_id == pennsylvania.location_id + + rv.put_v3( + url=f"/agencies/{agency.agency_id}", + json=AgencyPutRequest( + name="Test Agency Updated", + ).model_dump(mode="json") + ) + + agency: Agency = await ath.adb_client().one_or_none(model=Agency) + assert agency.name == "Test Agency Updated" + assert agency.agency_type == AgencyType.LAW_ENFORCEMENT + assert agency.jurisdiction_type == JurisdictionType.STATE + + + rv.delete_v3( + url=f"/agencies/{agency.agency_id}", + ) + + agency: Agency | None = await ath.adb_client().one_or_none(model=Agency) + assert agency is None diff --git a/tests/automated/integration/api/conftest.py b/tests/automated/integration/api/conftest.py deleted file mode 100644 index fa019469..00000000 --- a/tests/automated/integration/api/conftest.py +++ /dev/null @@ -1,69 +0,0 @@ -from typing import Generator, Any, AsyncGenerator -from unittest.mock import AsyncMock - -import pytest -import pytest_asyncio -from starlette.testclient import TestClient - -from src.api.main import app -from src.core.core import AsyncCore -from src.security.dtos.access_info import AccessInfo -from src.security.enums import Permissions -from src.security.manager import get_access_info -from tests.automated.integration.api._helpers.RequestValidator import RequestValidator -from tests.helpers.api_test_helper import APITestHelper - -MOCK_USER_ID = 1 - -def disable_task_trigger(ath: APITestHelper) -> None: - ath.async_core.collector_manager.post_collection_function_trigger = AsyncMock() - - - -async def fail_task_trigger() -> None: - raise Exception( - "Task Trigger is set to fail in tests by default, to catch unintentional calls." - "If this is not intended, either replace with a Mock or the expected task function." - ) - -def override_access_info() -> AccessInfo: - return AccessInfo( - user_id=MOCK_USER_ID, - permissions=[ - Permissions.SOURCE_COLLECTOR, - Permissions.SOURCE_COLLECTOR_FINAL_REVIEW - ] - ) - - -@pytest.fixture(scope="session") -def client(disable_task_flags) -> Generator[TestClient, None, None]: - with TestClient(app) as c: - app.dependency_overrides[get_access_info] = override_access_info - async_core: AsyncCore = c.app.state.async_core - - # Interfaces to the web should be mocked - task_manager = async_core.task_manager - task_manager.url_request_interface = AsyncMock() - task_manager.discord_poster = AsyncMock() - # Disable Logger - task_manager.logger.disabled = True - # Set trigger to fail immediately if called, to force it to be manually specified in tests - task_manager.task_trigger._func = fail_task_trigger - yield c - - # Reset environment variables back to original state - - -@pytest_asyncio.fixture -async def api_test_helper( - client: TestClient, - db_data_creator, - monkeypatch -) -> AsyncGenerator[APITestHelper, Any]: - yield APITestHelper( - request_validator=RequestValidator(client=client), - async_core=client.app.state.async_core, - db_data_creator=db_data_creator, - ) - await client.app.state.async_core.collector_manager.logger.clear_log_queue() diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 574f35f4..cca17136 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -1,12 +1,21 @@ -from unittest.mock import MagicMock +from typing import Generator, AsyncGenerator, Any +from unittest.mock import MagicMock, AsyncMock import pytest import pytest_asyncio +from starlette.testclient import TestClient +from src.api.main import app from src.collectors.manager import AsyncCollectorManager from src.core.core import AsyncCore from src.core.logger import AsyncCoreLogger from src.db.client.async_ import AsyncDatabaseClient +from src.db.client.sync import DatabaseClient +from src.security.dtos.access_info import AccessInfo +from src.security.enums import Permissions +from src.security.manager import get_access_info +from tests.automated.integration.api._helpers.RequestValidator import RequestValidator +from tests.helpers.api_test_helper import APITestHelper from tests.helpers.data_creator.core import DBDataCreator from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo @@ -93,4 +102,60 @@ async def los_angeles_locality( state_id=california.us_state_id, county_id=los_angeles_county.county_id, name="Los Angeles" - ) \ No newline at end of file + ) + + +MOCK_USER_ID = 1 + + +async def fail_task_trigger() -> None: + raise Exception( + "Task Trigger is set to fail in tests by default, to catch unintentional calls." + "If this is not intended, either replace with a Mock or the expected task function." + ) + + +def override_access_info() -> AccessInfo: + return AccessInfo( + user_id=MOCK_USER_ID, + permissions=[ + Permissions.SOURCE_COLLECTOR, + Permissions.SOURCE_COLLECTOR_FINAL_REVIEW + ] + ) + + +@pytest.fixture(scope="session") +def client(disable_task_flags) -> Generator[TestClient, None, None]: + with TestClient(app) as c: + app.dependency_overrides[get_access_info] = override_access_info + async_core: AsyncCore = c.app.state.async_core + + # Interfaces to the web should be mocked + task_manager = async_core.task_manager + task_manager.url_request_interface = AsyncMock() + task_manager.discord_poster = AsyncMock() + # Disable Logger + task_manager.logger.disabled = True + # Set trigger to fail immediately if called, to force it to be manually specified in tests + task_manager.task_trigger._func = fail_task_trigger + yield c + + # Reset environment variables back to original state + + +@pytest_asyncio.fixture +async def api_test_helper( + client: TestClient, + db_client_test: DatabaseClient, + adb_client_test: AsyncDatabaseClient +) -> AsyncGenerator[APITestHelper, Any]: + yield APITestHelper( + request_validator=RequestValidator(client=client), + async_core=client.app.state.async_core, + db_data_creator=DBDataCreator( + db_client=db_client_test, + adb_client=adb_client_test + ), + ) + await client.app.state.async_core.collector_manager.logger.clear_log_queue() diff --git a/tests/automated/integration/readonly/README.md b/tests/automated/integration/readonly/README.md new file mode 100644 index 00000000..3c72830f --- /dev/null +++ b/tests/automated/integration/readonly/README.md @@ -0,0 +1 @@ +Read Only tests are tests that work on a variant of the database populated with static test data. These tests are designed to not modify the database in any way. diff --git a/tests/automated/integration/readonly/__init__.py b/tests/automated/integration/readonly/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/readonly/api/__init__.py b/tests/automated/integration/readonly/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/readonly/api/agencies/__init__.py b/tests/automated/integration/readonly/api/agencies/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/readonly/api/agencies/get/__init__.py b/tests/automated/integration/readonly/api/agencies/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/readonly/api/agencies/get/test_locations.py b/tests/automated/integration/readonly/api/agencies/get/test_locations.py new file mode 100644 index 00000000..13481c58 --- /dev/null +++ b/tests/automated/integration/readonly/api/agencies/get/test_locations.py @@ -0,0 +1,16 @@ +import pytest + +from tests.automated.integration.readonly.conftest import ReadOnlyTestHelper + + +@pytest.mark.asyncio +async def test_agency_get_locations( + readonly_helper: ReadOnlyTestHelper, +) -> None: + + response_raw: list[dict] = readonly_helper.api_test_helper.request_validator.get_v3( + url=f"/agencies/{readonly_helper.agency_1_id}/locations", + ) + assert len(response_raw) == 1 + assert response_raw[0]["location_id"] == readonly_helper.agency_1_location_id + assert response_raw[0]["full_display_name"] == "Pittsburgh, Allegheny, Pennsylvania" diff --git a/tests/automated/integration/readonly/api/agencies/get/test_root.py b/tests/automated/integration/readonly/api/agencies/get/test_root.py new file mode 100644 index 00000000..fa390abd --- /dev/null +++ b/tests/automated/integration/readonly/api/agencies/get/test_root.py @@ -0,0 +1,20 @@ +import pytest + +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from tests.automated.integration.readonly.conftest import ReadOnlyTestHelper + + +@pytest.mark.asyncio +async def test_agency_get( + readonly_helper: ReadOnlyTestHelper +): + + responses_raw: list[dict] = readonly_helper.api_test_helper.request_validator.get_v3( + url=f"/agencies", + ) + assert len(responses_raw) == 1 + response_raw = responses_raw[0] + assert response_raw["id"] == readonly_helper.agency_1_id + assert response_raw["name"] == "Agency 1" + assert response_raw["type"] == AgencyType.LAW_ENFORCEMENT.value + assert response_raw["jurisdiction_type"] == JurisdictionType.STATE.value \ No newline at end of file diff --git a/tests/automated/integration/readonly/conftest.py b/tests/automated/integration/readonly/conftest.py new file mode 100644 index 00000000..1085c184 --- /dev/null +++ b/tests/automated/integration/readonly/conftest.py @@ -0,0 +1,101 @@ +import asyncio +from typing import Any, AsyncGenerator + +import pytest +import pytest_asyncio +from pydantic import BaseModel +from starlette.testclient import TestClient + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.helpers.connect import get_postgres_connection_string +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from tests.automated.integration.api._helpers.RequestValidator import RequestValidator +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.counter import next_int +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo +from tests.helpers.setup.wipe import wipe_database + + +class ReadOnlyTestHelper(BaseModel): + class Config: + arbitrary_types_allowed = True + + adb_client: AsyncDatabaseClient + api_test_helper: APITestHelper + + agency_1_id: int + agency_1_location_id: int + + +@pytest.fixture(scope="module") +def event_loop(): + loop = asyncio.new_event_loop() + yield loop + loop.close() + +@pytest_asyncio.fixture(scope='module') +async def california_readonly( +) -> USStateCreationInfo: + return await DBDataCreator().create_us_state( + name="California", + iso="CA" + ) + +@pytest_asyncio.fixture(scope="module") +async def readonly_helper( + event_loop, + client: TestClient, +) -> AsyncGenerator[ReadOnlyTestHelper, Any]: + wipe_database(get_postgres_connection_string()) + conn = get_postgres_connection_string(is_async=True) + adb_client = AsyncDatabaseClient(db_url=conn) + db_data_creator = DBDataCreator() + api_test_helper = APITestHelper( + request_validator=RequestValidator(client=client), + async_core=client.app.state.async_core, + db_data_creator=db_data_creator, + ) + + # Pennsylvania + pennsylvania = await DBDataCreator().create_us_state( + name="Pennsylvania", + iso="PA" + ) + + allegheny_county = await DBDataCreator().create_county( + state_id=pennsylvania.us_state_id, + name="Allegheny" + ) + pittsburgh = await DBDataCreator().create_locality( + state_id=pennsylvania.us_state_id, + county_id=allegheny_county.county_id, + name="Pittsburgh" + ) + + + # Add Agencies + agency_1 = Agency( + agency_id=next_int(), + name="Agency 1", + agency_type=AgencyType.LAW_ENFORCEMENT, + jurisdiction_type=JurisdictionType.STATE, + ) + await adb_client.add(agency_1) + + # Add Agency location + agency_1_location = LinkAgencyLocation( + agency_id=agency_1.agency_id, + location_id=pittsburgh.location_id, + ) + await adb_client.add(agency_1_location) + + yield ReadOnlyTestHelper( + adb_client=adb_client, + api_test_helper=api_test_helper, + + agency_1_id=agency_1.agency_id, + agency_1_location_id=pittsburgh.location_id, + ) \ No newline at end of file diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index 6bf7df5f..575c594f 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -68,12 +68,19 @@ class DBDataCreator: """ Assists in the creation of test data """ - def __init__(self, db_client: Optional[DatabaseClient] = None): + def __init__( + self, + db_client: DatabaseClient | None = None, + adb_client: AsyncDatabaseClient | None = None + ): if db_client is not None: self.db_client = db_client else: self.db_client = DatabaseClient() - self.adb_client: AsyncDatabaseClient = AsyncDatabaseClient() + if adb_client is not None: + self.adb_client = adb_client + else: + self.adb_client: AsyncDatabaseClient = AsyncDatabaseClient() self.clients = DBDataCreatorClientContainer( adb=self.adb_client, db=self.db_client From 7c86759204429e7ef53aee527ed5b79dee146d0d Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 21 Oct 2025 08:07:08 -0400 Subject: [PATCH 3/6] Add URL suggestion endpoint --- ...adf9d894180_add_autogenerated_agency_id.py | 32 ----- ...80_enable_data_source_agency_submission.py | 110 ++++++++++++++++ .../endpoints/agencies/by_id/delete/query.py | 1 + src/api/endpoints/collector/manual/query.py | 2 +- .../endpoints/review/approve/query_/core.py | 4 +- .../endpoints/submit/data_source/__init__.py | 0 src/api/endpoints/submit/data_source/query.py | 108 ++++++++++++++++ .../endpoints/submit/data_source/request.py | 37 ++++++ .../endpoints/submit/data_source/response.py | 5 + .../endpoints/submit/data_source/wrapper.py | 39 ++++++ src/api/endpoints/submit/routes.py | 15 ++- src/db/client/async_.py | 4 +- src/db/models/impl/url/core/sqlalchemy.py | 6 +- .../impl/url/optional_data_source_metadata.py | 16 --- .../impl/url/optional_ds_metadata/__init__.py | 0 .../impl/url/optional_ds_metadata/enums.py | 29 +++++ .../url/optional_ds_metadata/sqlalchemy.py | 40 ++++++ src/db/statement_composer.py | 4 +- .../integration/api/agencies/test_core.py | 12 +- .../api/submit/data_source/__init__.py | 0 .../api/submit/data_source/test_core.py | 118 ++++++++++++++++++ .../integration/api/test_manual_batch.py | 2 +- tests/automated/integration/conftest.py | 8 ++ .../db/client/approve_url/test_basic.py | 2 +- .../tasks/url/impl/probe/check/manager.py | 4 +- .../test_url_miscellaneous_metadata_task.py | 2 +- 26 files changed, 529 insertions(+), 71 deletions(-) delete mode 100644 alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py create mode 100644 alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py create mode 100644 src/api/endpoints/submit/data_source/__init__.py create mode 100644 src/api/endpoints/submit/data_source/query.py create mode 100644 src/api/endpoints/submit/data_source/request.py create mode 100644 src/api/endpoints/submit/data_source/response.py create mode 100644 src/api/endpoints/submit/data_source/wrapper.py delete mode 100644 src/db/models/impl/url/optional_data_source_metadata.py create mode 100644 src/db/models/impl/url/optional_ds_metadata/__init__.py create mode 100644 src/db/models/impl/url/optional_ds_metadata/enums.py create mode 100644 src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py create mode 100644 tests/automated/integration/api/submit/data_source/__init__.py create mode 100644 tests/automated/integration/api/submit/data_source/test_core.py diff --git a/alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py b/alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py deleted file mode 100644 index 37fed1aa..00000000 --- a/alembic/versions/2025_10_20_1620-6adf9d894180_add_autogenerated_agency_id.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Add autogenerated agency id - -Revision ID: 6adf9d894180 -Revises: 7fc6502f1fa3 -Create Date: 2025-10-20 16:20:44.081736 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '6adf9d894180' -down_revision: Union[str, None] = '7fc6502f1fa3' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - op.execute(""" - CREATE SEQUENCE agencies_agency_id START WITH 23191; - """) - - op.execute(""" - ALTER TABLE agencies ALTER COLUMN agency_id SET DEFAULT nextval('agencies_agency_id'); - """) - - -def downgrade() -> None: - pass diff --git a/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py new file mode 100644 index 00000000..d1a72f7e --- /dev/null +++ b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py @@ -0,0 +1,110 @@ +"""Enable data source/agency submission + +Revision ID: 6adf9d894180 +Revises: 7fc6502f1fa3 +Create Date: 2025-10-20 16:20:44.081736 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import ENUM, ARRAY + + +# revision identifiers, used by Alembic. +revision: str = '6adf9d894180' +down_revision: Union[str, None] = '7fc6502f1fa3' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +def upgrade() -> None: + _add_autogenerated_agency_id() + _add_new_columns_to_optional_ds_metadata() + +def _add_new_columns_to_optional_ds_metadata(): + table_name: str = "url_optional_data_source_metadata" + + agency_aggregation_enum = ENUM( + 'federal', + 'state', + 'county', + 'local', + name='agency_aggregation_enum', + create_type=True, + ) + agency_aggregation_enum.create(op.get_bind()) + + update_method_enum = ENUM( + 'Overwrite', + 'Insert', + 'No updates', + name='update_method_enum', + create_type=True + ) + update_method_enum.create(op.get_bind()) + + retention_schedule_enum = ENUM( + 'Future only', + '1 month', + '1 day', + '1 week', + '1-10 years', + '< 1 day', + '< 1 week', + '< 1 year', + '> 10 years', + name='retention_schedule_enum', + create_type=True + ) + retention_schedule_enum.create(op.get_bind()) + + access_type_enum = ENUM( + 'Webpage', + 'Download', + 'API', + name='access_type_enum', + create_type=True, + ) + access_type_enum.create(op.get_bind()) + + for column in [ + sa.Column('coverage_start', sa.Date(), nullable=True), + sa.Column('coverage_end', sa.Date(), nullable=True), + sa.Column("agency_supplied", sa.Boolean(), nullable=True), + sa.Column('agency_originated', sa.Boolean(), nullable=True), + sa.Column('agency_aggregation', agency_aggregation_enum), + sa.Column('agency_described_not_in_database', sa.Text(), nullable=True), + sa.Column('update_method', update_method_enum, nullable=True), + sa.Column('readme_url', sa.Text(), nullable=True), + sa.Column('originating_entity', sa.Text(), nullable=True), + sa.Column('retention_schedule', retention_schedule_enum, nullable=True), + sa.Column('scraper_url', sa.Text(), nullable=True), + sa.Column('submission_notes', sa.Text(), nullable=True), + sa.Column('access_notes', sa.Text(), nullable=True), + sa.Column('data_portal', sa.Text(), nullable=True), + sa.Column('access_types', ARRAY( + access_type_enum + ), nullable=True), + ]: + op.add_column( + table_name, + column, + ) + +def _add_autogenerated_agency_id(): + op.execute( + """ + CREATE SEQUENCE agencies_agency_id START WITH 23191; + """ + ) + + op.execute( + """ + ALTER TABLE agencies + ALTER COLUMN agency_id SET DEFAULT nextval('agencies_agency_id'); + """ + ) + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/agencies/by_id/delete/query.py b/src/api/endpoints/agencies/by_id/delete/query.py index 800b7cde..61ce2653 100644 --- a/src/api/endpoints/agencies/by_id/delete/query.py +++ b/src/api/endpoints/agencies/by_id/delete/query.py @@ -6,6 +6,7 @@ class DeleteAgencyQueryBuilder(QueryBuilderBase): + def __init__( self, agency_id: int, diff --git a/src/api/endpoints/collector/manual/query.py b/src/api/endpoints/collector/manual/query.py index 6cd7d7b8..dff2cbed 100644 --- a/src/api/endpoints/collector/manual/query.py +++ b/src/api/endpoints/collector/manual/query.py @@ -9,7 +9,7 @@ from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.queries.base.builder import QueryBuilderBase from src.util.models.url_and_scheme import URLAndScheme diff --git a/src/api/endpoints/review/approve/query_/core.py b/src/api/endpoints/review/approve/query_/core.py index 15641764..b7abec5a 100644 --- a/src/api/endpoints/review/approve/query_/core.py +++ b/src/api/endpoints/review/approve/query_/core.py @@ -6,14 +6,12 @@ from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo from src.api.endpoints.review.approve.query_.util import update_if_not_none -from src.collectors.enums import URLStatus -from src.db.constants import PLACEHOLDER_AGENCY_NAME from src.db.models.impl.agency.sqlalchemy import Agency from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.models.impl.url.reviewing_user import ReviewingUserURL from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/api/endpoints/submit/data_source/__init__.py b/src/api/endpoints/submit/data_source/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/submit/data_source/query.py b/src/api/endpoints/submit/data_source/query.py new file mode 100644 index 00000000..2d3d685b --- /dev/null +++ b/src/api/endpoints/submit/data_source/query.py @@ -0,0 +1,108 @@ +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest +from src.api.endpoints.submit.data_source.response import SubmitDataSourceURLProposalResponse +from src.collectors.enums import URLStatus +from src.db.models.impl.url.core.enums import URLSource +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency +from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation +from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType +from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.queries.base.builder import QueryBuilderBase +from src.util.models.full_url import FullURL + + +class SubmitDataSourceURLProposalQueryBuilder(QueryBuilderBase): + + def __init__(self, request: DataSourceSubmissionRequest): + super().__init__() + self.request = request + + async def run(self, session: AsyncSession) -> Any: + full_url = FullURL(full_url=self.request.source_url) + + url = URL( + url=full_url.id_form, + scheme=full_url.scheme, + trailing_slash=full_url.has_trailing_slash, + name=self.request.name, + status=URLStatus.OK, + source=URLSource.MANUAL, + ) + + session.add(url) + await session.flush() + + url_id: int = url.id + + # Optionally add Record Type as suggestion + if self.request.record_type is not None: + record_type_suggestion = AnonymousAnnotationRecordType( + url_id=url_id, + record_type=self.request.record_type.value + ) + session.add(record_type_suggestion) + + # Optionally add Agency ID suggestions + if self.request.agency_ids is not None: + agency_id_suggestions = [ + AnonymousAnnotationAgency( + url_id=url_id, + agency_id=agency_id + ) + for agency_id in self.request.agency_ids + ] + session.add_all(agency_id_suggestions) + + # Optionally add Location ID suggestions + if self.request.location_ids is not None: + location_id_suggestions = [ + AnonymousAnnotationLocation( + url_id=url_id, + location_id=location_id + ) + for location_id in self.request.location_ids + ] + session.add_all(location_id_suggestions) + + # Optionally add name suggestion + if self.request.name is not None: + name_suggestion = URLNameSuggestion( + url_id=url_id, + suggestion=self.request.name, + source=NameSuggestionSource.USER + ) + session.add(name_suggestion) + + # Add data source metadata + ds_metadata = URLOptionalDataSourceMetadata( + url_id=url_id, + coverage_start=self.request.coverage_start, + coverage_end=self.request.coverage_end, + supplying_entity=self.request.supplying_entity, + agency_supplied=self.request.agency_supplied, + agency_originated=self.request.agency_originated, + agency_aggregation=self.request.agency_aggregation, + agency_described_not_in_database=self.request.agency_described_not_in_database, + data_portal=self.request.data_portal, + update_method=self.request.update_method, + readme_url=self.request.readme_url, + originating_entity=self.request.originating_entity, + retention_schedule=self.request.retention_schedule, + scraper_url=self.request.scraper_url, + submission_notes=self.request.submission_notes, + access_notes=self.request.access_notes, + access_types=self.request.access_types, + record_formats=self.request.record_formats, + ) + session.add(ds_metadata) + await session.flush() + + return SubmitDataSourceURLProposalResponse( + url_id=url_id, + ) \ No newline at end of file diff --git a/src/api/endpoints/submit/data_source/request.py b/src/api/endpoints/submit/data_source/request.py new file mode 100644 index 00000000..011ebe2a --- /dev/null +++ b/src/api/endpoints/submit/data_source/request.py @@ -0,0 +1,37 @@ +from datetime import date + +from pydantic import BaseModel + +from src.core.enums import RecordType +from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, UpdateMethodEnum, \ + RetentionScheduleEnum, AccessTypeEnum + + +class DataSourceSubmissionRequest(BaseModel): + # Required + name: str + record_type: RecordType + source_url: str + + # Optional URL DS Metadata + coverage_start: date | None = None + coverage_end: date | None = None + supplying_entity: str | None = None + agency_supplied: bool | None = None + agency_originated: bool | None = None + agency_aggregation: AgencyAggregationEnum | None = None + agency_described_not_in_database: str | None = None + update_method: UpdateMethodEnum | None = None + readme_url: str | None = None + originating_entity: str | None = None + retention_schedule: RetentionScheduleEnum | None = None + scraper_url: str | None = None + submission_notes: str | None = None + data_portal: str | None = None + access_notes: str | None = None + access_types: list[AccessTypeEnum] = [] + record_formats: list[str] = [] + + # Links to other entities + agency_ids: list[int] = [] + location_ids: list[int] = [] diff --git a/src/api/endpoints/submit/data_source/response.py b/src/api/endpoints/submit/data_source/response.py new file mode 100644 index 00000000..b2d7ba3f --- /dev/null +++ b/src/api/endpoints/submit/data_source/response.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class SubmitDataSourceURLProposalResponse(BaseModel): + url_id: int \ No newline at end of file diff --git a/src/api/endpoints/submit/data_source/wrapper.py b/src/api/endpoints/submit/data_source/wrapper.py new file mode 100644 index 00000000..32794150 --- /dev/null +++ b/src/api/endpoints/submit/data_source/wrapper.py @@ -0,0 +1,39 @@ +from fastapi import HTTPException + +from src.api.endpoints.submit.data_source.query import SubmitDataSourceURLProposalQueryBuilder +from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest +from src.api.endpoints.submit.data_source.response import SubmitDataSourceURLProposalResponse +from src.db.client.async_ import AsyncDatabaseClient +from src.db.queries.urls_exist.model import URLExistsResult +from src.db.queries.urls_exist.query import URLsExistInDBQueryBuilder +from src.util.models.full_url import FullURL +from src.util.url import is_valid_url + + +async def submit_data_source_url_proposal( + request: DataSourceSubmissionRequest, + adb_client: AsyncDatabaseClient +) -> SubmitDataSourceURLProposalResponse: + + if not is_valid_url(request.source_url): + raise HTTPException( + status_code=400, + detail="Invalid URL" + ) + + url_exists_results: URLExistsResult = (await adb_client.run_query_builder( + URLsExistInDBQueryBuilder( + full_urls=[FullURL(request.source_url)] + ) + ))[0] + if url_exists_results.exists: + raise HTTPException( + status_code=400, + detail="URL already exists in database." + ) + + return await adb_client.run_query_builder( + SubmitDataSourceURLProposalQueryBuilder( + request=request + ) + ) \ No newline at end of file diff --git a/src/api/endpoints/submit/routes.py b/src/api/endpoints/submit/routes.py index d91d1821..ee315493 100644 --- a/src/api/endpoints/submit/routes.py +++ b/src/api/endpoints/submit/routes.py @@ -1,6 +1,8 @@ from fastapi import APIRouter, Depends from src.api.dependencies import get_async_core +from src.api.endpoints.submit.data_source.query import SubmitDataSourceURLProposalQueryBuilder +from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest from src.api.endpoints.submit.url.models.request import URLSubmissionRequest from src.api.endpoints.submit.url.models.response import URLSubmissionResponse from src.api.endpoints.submit.url.queries.core import SubmitURLQueryBuilder @@ -21,4 +23,15 @@ async def submit_url( request=request, user_id=access_info.user_id ) - ) \ No newline at end of file + ) + +@submit_router.post("/data-source") +async def submit_data_source( + request: DataSourceSubmissionRequest, + async_core: AsyncCore = Depends(get_async_core), +): + return await async_core.adb_client.run_query_builder( + SubmitDataSourceURLProposalQueryBuilder( + request=request, + ) + ) diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 11e49472..93af63f9 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -80,7 +80,7 @@ from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion @@ -438,7 +438,7 @@ async def has_urls_with_html_data_and_without_auto_record_type_suggestion(self, ) @session_manager - async def one_or_none( + async def one_or_none_model( self, session: AsyncSession, model: Base diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index d4d8e7c2..50fa1676 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -1,6 +1,6 @@ from sqlalchemy import Column, Text, String, JSON, case, literal, Boolean from sqlalchemy.ext.hybrid import hybrid_property -from sqlalchemy.orm import relationship +from sqlalchemy.orm import relationship, Mapped from sqlalchemy.util import hybridproperty from src.collectors.enums import URLStatus @@ -27,7 +27,7 @@ class URL(UpdatedAtMixin, CreatedAtMixin, WithIDBase): # The metadata from the collector collector_metadata = Column(JSON) # The outcome of the URL: submitted, human_labeling, rejected, duplicate, etc. - status = enum_column( + status: Mapped[URLStatus] = enum_column( URLStatus, name='url_status', nullable=False @@ -57,7 +57,7 @@ def full_url(cls): else_=cls.url ) - source = enum_column( + source: Mapped[URLSource] = enum_column( URLSource, name='url_source', nullable=False diff --git a/src/db/models/impl/url/optional_data_source_metadata.py b/src/db/models/impl/url/optional_data_source_metadata.py deleted file mode 100644 index bb2a95e5..00000000 --- a/src/db/models/impl/url/optional_data_source_metadata.py +++ /dev/null @@ -1,16 +0,0 @@ -from sqlalchemy import Column, ARRAY, String -from sqlalchemy.orm import relationship - -from src.db.models.mixins import URLDependentMixin -from src.db.models.templates_.with_id import WithIDBase - - -class URLOptionalDataSourceMetadata(URLDependentMixin, WithIDBase): - __tablename__ = 'url_optional_data_source_metadata' - - record_formats = Column(ARRAY(String), nullable=True) - data_portal_type = Column(String, nullable=True) - supplying_entity = Column(String, nullable=True) - - # Relationships - url = relationship("URL", uselist=False, back_populates="optional_data_source_metadata") diff --git a/src/db/models/impl/url/optional_ds_metadata/__init__.py b/src/db/models/impl/url/optional_ds_metadata/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/url/optional_ds_metadata/enums.py b/src/db/models/impl/url/optional_ds_metadata/enums.py new file mode 100644 index 00000000..3b08e6f0 --- /dev/null +++ b/src/db/models/impl/url/optional_ds_metadata/enums.py @@ -0,0 +1,29 @@ +from enum import Enum + + +class AgencyAggregationEnum(Enum): + FEDERAL = "federal" + STATE = "state" + COUNTY = "county" + LOCALITY = "local" + +class UpdateMethodEnum(Enum): + OVERWRITE = "Overwrite" + INSERT = "Insert" + NO_UPDATES = "No updates" + +class RetentionScheduleEnum(Enum): + FUTURE_ONLY = "Future only" + ONE_MONTH = "1 month" + ONE_DAY = "1 day" + ONE_WEEK = "1 week" + ONE_TO_TEN_YEARS = "1-10 years" + LT_1_DAY = "< 1 day" + LT_1_WEEK = "< 1 week" + LT_1_YEAR = "< 1 year" + GT_10_YEARS = "> 10 years" + +class AccessTypeEnum(Enum): + WEBPAGE = "Webpage" + DOWNLOAD = "Download" + API = "API" \ No newline at end of file diff --git a/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py b/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py new file mode 100644 index 00000000..37d4638d --- /dev/null +++ b/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py @@ -0,0 +1,40 @@ +from sqlalchemy import Column, ARRAY, String, Date, Boolean, Enum +from sqlalchemy.orm import relationship, Mapped + +from src.db.models.helpers import enum_column +from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, AccessTypeEnum, \ + RetentionScheduleEnum, UpdateMethodEnum +from src.db.models.mixins import URLDependentMixin +from src.db.models.templates_.with_id import WithIDBase + + +class URLOptionalDataSourceMetadata(URLDependentMixin, WithIDBase): + __tablename__ = 'url_optional_data_source_metadata' + + record_formats = Column(ARRAY(String), nullable=True) + data_portal = Column(String, nullable=True) + supplying_entity = Column(String, nullable=True) + coverage_start = Column(Date, nullable=True) + coverage_end = Column(Date, nullable=True) + agency_supplied = Column(Boolean, nullable=True) + agency_originated = Column(Boolean, nullable=True) + agency_aggregation: Mapped[AgencyAggregationEnum] = enum_column(AgencyAggregationEnum, name="agency_aggregation_enum") + agency_described_not_in_database = Column(String, nullable=True) + update_method: Mapped[UpdateMethodEnum] = enum_column(UpdateMethodEnum, name="update_method_enum") + readme_url = Column(String, nullable=True) + originating_entity = Column(String, nullable=True) + retention_schedule: Mapped[RetentionScheduleEnum] = enum_column(RetentionScheduleEnum, name="retention_schedule_enum") + scraper_url = Column(String, nullable=True) + submission_notes = Column(String, nullable=True) + access_notes = Column(String, nullable=True) + access_types: Mapped[list[AccessTypeEnum]] = Column(ARRAY( + Enum( + AccessTypeEnum, + name="access_type_enum", + native_enum=True, + values_callable=lambda AccessTypeEnum: [e.value for e in AccessTypeEnum] + ) + ), nullable=True) + + # Relationships + url = relationship("URL", uselist=False, back_populates="optional_data_source_metadata") diff --git a/src/db/statement_composer.py b/src/db/statement_composer.py index 0ae843b3..31d6c7f9 100644 --- a/src/db/statement_composer.py +++ b/src/db/statement_composer.py @@ -5,8 +5,6 @@ from sqlalchemy.orm import selectinload from src.collectors.enums import URLStatus -from src.core.enums import BatchStatus -from src.db.constants import STANDARD_ROW_LIMIT from src.db.enums import TaskType from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL @@ -14,7 +12,7 @@ from src.db.models.impl.task.core import Task from src.db.models.impl.task.enums import TaskStatus from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.scrape_info.sqlalchemy import URLScrapeInfo from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata from src.db.types import UserSuggestionType diff --git a/tests/automated/integration/api/agencies/test_core.py b/tests/automated/integration/api/agencies/test_core.py index cc2ddefc..a986cacc 100644 --- a/tests/automated/integration/api/agencies/test_core.py +++ b/tests/automated/integration/api/agencies/test_core.py @@ -28,12 +28,12 @@ async def test_agencies( ).model_dump(mode="json") ) - agency: Agency = await ath.adb_client().one_or_none(model=Agency) + agency: Agency = await ath.adb_client().one_or_none_model(model=Agency) assert agency.name == "Test Agency" assert agency.agency_type == AgencyType.LAW_ENFORCEMENT assert agency.jurisdiction_type == JurisdictionType.STATE - link: LinkAgencyLocation = await ath.adb_client().one_or_none(model=LinkAgencyLocation) + link: LinkAgencyLocation = await ath.adb_client().one_or_none_model(model=LinkAgencyLocation) assert link is not None assert link.agency_id == agency.agency_id assert link.location_id == california.location_id @@ -42,14 +42,14 @@ async def test_agencies( url=f"/agencies/{agency.agency_id}/locations/{california.location_id}", ) - link: LinkAgencyLocation | None = await ath.adb_client().one_or_none(model=LinkAgencyLocation) + link: LinkAgencyLocation | None = await ath.adb_client().one_or_none_model(model=LinkAgencyLocation) assert link is None rv.post_v3( url=f"/agencies/{agency.agency_id}/locations/{pennsylvania.location_id}", ) - link: LinkAgencyLocation = await ath.adb_client().one_or_none(model=LinkAgencyLocation) + link: LinkAgencyLocation = await ath.adb_client().one_or_none_model(model=LinkAgencyLocation) assert link is not None assert link.agency_id == agency.agency_id assert link.location_id == pennsylvania.location_id @@ -61,7 +61,7 @@ async def test_agencies( ).model_dump(mode="json") ) - agency: Agency = await ath.adb_client().one_or_none(model=Agency) + agency: Agency = await ath.adb_client().one_or_none_model(model=Agency) assert agency.name == "Test Agency Updated" assert agency.agency_type == AgencyType.LAW_ENFORCEMENT assert agency.jurisdiction_type == JurisdictionType.STATE @@ -71,5 +71,5 @@ async def test_agencies( url=f"/agencies/{agency.agency_id}", ) - agency: Agency | None = await ath.adb_client().one_or_none(model=Agency) + agency: Agency | None = await ath.adb_client().one_or_none_model(model=Agency) assert agency is None diff --git a/tests/automated/integration/api/submit/data_source/__init__.py b/tests/automated/integration/api/submit/data_source/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/submit/data_source/test_core.py b/tests/automated/integration/api/submit/data_source/test_core.py new file mode 100644 index 00000000..6b8a9022 --- /dev/null +++ b/tests/automated/integration/api/submit/data_source/test_core.py @@ -0,0 +1,118 @@ +from datetime import date + +import pytest + +from src.api.endpoints.submit.data_source.request import DataSourceSubmissionRequest +from src.collectors.enums import URLStatus +from src.core.enums import RecordType +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.core.enums import URLSource +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, UpdateMethodEnum, \ + RetentionScheduleEnum, AccessTypeEnum +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency +from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo + + +@pytest.mark.asyncio +async def test_submit_data_source( + api_test_helper: APITestHelper, + test_agency_id: int, + pittsburgh_locality: LocalityCreationInfo, +): + ath = api_test_helper + ath.request_validator.post_v3( + url="submit/data-source", + json=DataSourceSubmissionRequest( + source_url="https://example.com/", + name="Example name", + record_type=RecordType.COMPLAINTS_AND_MISCONDUCT, + coverage_start=date(year=2025, month=8, day=9), + coverage_end=date(year=2025, month=8, day=10), + supplying_entity="Test supplying entity", + agency_supplied=True, + agency_originated=False, + agency_aggregation=AgencyAggregationEnum.STATE, + agency_described_not_in_database="Test agency described not in database", + update_method=UpdateMethodEnum.NO_UPDATES, + readme_url="https://example.com/readme", + originating_entity="Test Originating Entity", + retention_schedule=RetentionScheduleEnum.GT_10_YEARS, + scraper_url="https://example.com/scraper", + submission_notes="Test submission notes", + data_portal="Test data portal", + access_notes="Test access notes", + access_types=[ + AccessTypeEnum.API, + AccessTypeEnum.DOWNLOAD, + AccessTypeEnum.WEBPAGE + ], + record_formats=[ + "Test record format", + "Test record format 2" + ], + + agency_ids=[test_agency_id], + location_ids=[pittsburgh_locality.location_id] + + ).model_dump(mode='json') + ) + + adb_client: AsyncDatabaseClient = api_test_helper.adb_client() + + # Check URL + url: URL = await adb_client.one_or_none_model(URL) + assert url is not None + assert url.url == "example.com" + assert url.scheme == "https" + assert url.trailing_slash == True + assert url.source == URLSource.MANUAL + assert url.status == URLStatus.OK + + # Check for Location Suggestion + location_suggestion: AnonymousAnnotationLocation = await adb_client.one_or_none_model(AnonymousAnnotationLocation) + assert location_suggestion is not None + assert location_suggestion.location_id == pittsburgh_locality.location_id + + # Check for Agency Suggestion + agency_suggestion: AnonymousAnnotationAgency = await adb_client.one_or_none_model(AnonymousAnnotationAgency) + assert agency_suggestion is not None + assert agency_suggestion.agency_id == test_agency_id + + # Check for Name Suggestion + name_suggestion: URLNameSuggestion = await adb_client.one_or_none_model(URLNameSuggestion) + assert name_suggestion is not None + assert name_suggestion.suggestion == "Example name" + + # Check for URL DS Optional Metadata + optional_ds: URLOptionalDataSourceMetadata = await adb_client.one_or_none_model(URLOptionalDataSourceMetadata) + assert optional_ds is not None + assert optional_ds.coverage_start == date(year=2025, month=8, day=9) + assert optional_ds.coverage_end == date(year=2025, month=8, day=10) + assert optional_ds.supplying_entity == "Test supplying entity" + assert optional_ds.agency_supplied + assert not optional_ds.agency_originated + assert optional_ds.agency_aggregation == AgencyAggregationEnum.STATE + assert optional_ds.agency_described_not_in_database == "Test agency described not in database" + assert optional_ds.data_portal == "Test data portal" + assert optional_ds.update_method == UpdateMethodEnum.NO_UPDATES + assert optional_ds.readme_url == "https://example.com/readme" + assert optional_ds.originating_entity == "Test Originating Entity" + assert optional_ds.retention_schedule == RetentionScheduleEnum.GT_10_YEARS + assert optional_ds.scraper_url == "https://example.com/scraper" + assert optional_ds.submission_notes == "Test submission notes" + assert optional_ds.access_notes == "Test access notes" + assert optional_ds.access_types == [ + AccessTypeEnum.API, + AccessTypeEnum.DOWNLOAD, + AccessTypeEnum.WEBPAGE + ] + assert optional_ds.record_formats == [ + "Test record format", + "Test record format 2" + ] + diff --git a/tests/automated/integration/api/test_manual_batch.py b/tests/automated/integration/api/test_manual_batch.py index 9be80c25..9e52d358 100644 --- a/tests/automated/integration/api/test_manual_batch.py +++ b/tests/automated/integration/api/test_manual_batch.py @@ -3,7 +3,7 @@ from src.api.endpoints.collector.dtos.manual_batch.post import ManualBatchInnerInputDTO, ManualBatchInputDTO from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.batch.sqlalchemy import Batch from src.collectors.enums import CollectorType diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index cca17136..b4466424 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -159,3 +159,11 @@ async def api_test_helper( ), ) await client.app.state.async_core.collector_manager.logger.clear_log_queue() + +@pytest_asyncio.fixture +async def test_agency_id( + db_data_creator: DBDataCreator +) -> int: + return await db_data_creator.agency( + name="Test Agency" + ) \ No newline at end of file diff --git a/tests/automated/integration/db/client/approve_url/test_basic.py b/tests/automated/integration/db/client/approve_url/test_basic.py index c9eb62b1..f090a4ea 100644 --- a/tests/automated/integration/db/client/approve_url/test_basic.py +++ b/tests/automated/integration/db/client/approve_url/test_basic.py @@ -6,7 +6,7 @@ from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType from src.db.models.impl.url.reviewing_user import ReviewingUserURL from tests.helpers.setup.final_review.core import setup_for_get_next_url_for_final_review diff --git a/tests/automated/integration/tasks/url/impl/probe/check/manager.py b/tests/automated/integration/tasks/url/impl/probe/check/manager.py index a8d89ba5..200f428a 100644 --- a/tests/automated/integration/tasks/url/impl/probe/check/manager.py +++ b/tests/automated/integration/tasks/url/impl/probe/check/manager.py @@ -20,7 +20,9 @@ async def check_url( url_id: int, expected_status: URLStatus ): - url: URL = await self.adb_client.one_or_none(select(URL).where(URL.id == url_id)) + url: URL = await self.adb_client.one_or_none( + statement=select(URL).where(URL.id == url_id) + ) assert url is not None assert url.status == expected_status diff --git a/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py b/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py index 0af83bff..93878562 100644 --- a/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py +++ b/tests/automated/integration/tasks/url/impl/test_url_miscellaneous_metadata_task.py @@ -3,7 +3,7 @@ import pytest from src.core.tasks.url.operators.misc_metadata.core import URLMiscellaneousMetadataTaskOperator -from src.db.models.impl.url.optional_data_source_metadata import URLOptionalDataSourceMetadata +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata from src.db.models.impl.url.core.sqlalchemy import URL from src.collectors.enums import CollectorType from src.core.tasks.url.enums import TaskOperatorOutcome From 20568a420e5d9959641351e7d424abde5acccbc1 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 21 Oct 2025 08:16:11 -0400 Subject: [PATCH 4/6] Fix tests --- ..._1620-6adf9d894180_enable_data_source_agency_submission.py | 1 - src/api/endpoints/submit/data_source/query.py | 2 +- src/api/endpoints/submit/data_source/request.py | 2 +- src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py | 2 +- .../automated/integration/api/submit/data_source/test_core.py | 4 ++-- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py index d1a72f7e..78e88dc0 100644 --- a/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py +++ b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py @@ -82,7 +82,6 @@ def _add_new_columns_to_optional_ds_metadata(): sa.Column('scraper_url', sa.Text(), nullable=True), sa.Column('submission_notes', sa.Text(), nullable=True), sa.Column('access_notes', sa.Text(), nullable=True), - sa.Column('data_portal', sa.Text(), nullable=True), sa.Column('access_types', ARRAY( access_type_enum ), nullable=True), diff --git a/src/api/endpoints/submit/data_source/query.py b/src/api/endpoints/submit/data_source/query.py index 2d3d685b..405390d3 100644 --- a/src/api/endpoints/submit/data_source/query.py +++ b/src/api/endpoints/submit/data_source/query.py @@ -89,7 +89,7 @@ async def run(self, session: AsyncSession) -> Any: agency_originated=self.request.agency_originated, agency_aggregation=self.request.agency_aggregation, agency_described_not_in_database=self.request.agency_described_not_in_database, - data_portal=self.request.data_portal, + data_portal_type=self.request.data_portal_type, update_method=self.request.update_method, readme_url=self.request.readme_url, originating_entity=self.request.originating_entity, diff --git a/src/api/endpoints/submit/data_source/request.py b/src/api/endpoints/submit/data_source/request.py index 011ebe2a..fc649f74 100644 --- a/src/api/endpoints/submit/data_source/request.py +++ b/src/api/endpoints/submit/data_source/request.py @@ -21,13 +21,13 @@ class DataSourceSubmissionRequest(BaseModel): agency_originated: bool | None = None agency_aggregation: AgencyAggregationEnum | None = None agency_described_not_in_database: str | None = None + data_portal_type: str | None = None update_method: UpdateMethodEnum | None = None readme_url: str | None = None originating_entity: str | None = None retention_schedule: RetentionScheduleEnum | None = None scraper_url: str | None = None submission_notes: str | None = None - data_portal: str | None = None access_notes: str | None = None access_types: list[AccessTypeEnum] = [] record_formats: list[str] = [] diff --git a/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py b/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py index 37d4638d..3f6e239b 100644 --- a/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py +++ b/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py @@ -12,7 +12,7 @@ class URLOptionalDataSourceMetadata(URLDependentMixin, WithIDBase): __tablename__ = 'url_optional_data_source_metadata' record_formats = Column(ARRAY(String), nullable=True) - data_portal = Column(String, nullable=True) + data_portal_type = Column(String, nullable=True) supplying_entity = Column(String, nullable=True) coverage_start = Column(Date, nullable=True) coverage_end = Column(Date, nullable=True) diff --git a/tests/automated/integration/api/submit/data_source/test_core.py b/tests/automated/integration/api/submit/data_source/test_core.py index 6b8a9022..566ff60a 100644 --- a/tests/automated/integration/api/submit/data_source/test_core.py +++ b/tests/automated/integration/api/submit/data_source/test_core.py @@ -44,7 +44,7 @@ async def test_submit_data_source( retention_schedule=RetentionScheduleEnum.GT_10_YEARS, scraper_url="https://example.com/scraper", submission_notes="Test submission notes", - data_portal="Test data portal", + data_portal_type="Test data portal", access_notes="Test access notes", access_types=[ AccessTypeEnum.API, @@ -98,7 +98,7 @@ async def test_submit_data_source( assert not optional_ds.agency_originated assert optional_ds.agency_aggregation == AgencyAggregationEnum.STATE assert optional_ds.agency_described_not_in_database == "Test agency described not in database" - assert optional_ds.data_portal == "Test data portal" + assert optional_ds.data_portal_type == "Test data portal" assert optional_ds.update_method == UpdateMethodEnum.NO_UPDATES assert optional_ds.readme_url == "https://example.com/readme" assert optional_ds.originating_entity == "Test Originating Entity" From 1fe2235e6cefee4d76b347165c3c93b5f78e2217 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 21 Oct 2025 08:20:01 -0400 Subject: [PATCH 5/6] Merge from dev --- ...20_1620-6adf9d894180_enable_data_source_agency_submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py index 78e88dc0..ffe51314 100644 --- a/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py +++ b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py @@ -1,7 +1,7 @@ """Enable data source/agency submission Revision ID: 6adf9d894180 -Revises: 7fc6502f1fa3 +Revises: 9d57b3b79d35 Create Date: 2025-10-20 16:20:44.081736 """ From 08c0c4a2410c6f5e831856fe571045978546f5b2 Mon Sep 17 00:00:00 2001 From: Max Chis Date: Tue, 21 Oct 2025 08:24:57 -0400 Subject: [PATCH 6/6] Fix alembic migration bug --- ...20_1620-6adf9d894180_enable_data_source_agency_submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py index ffe51314..c45f4f28 100644 --- a/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py +++ b/alembic/versions/2025_10_20_1620-6adf9d894180_enable_data_source_agency_submission.py @@ -14,7 +14,7 @@ # revision identifiers, used by Alembic. revision: str = '6adf9d894180' -down_revision: Union[str, None] = '7fc6502f1fa3' +down_revision: Union[str, None] = '9d57b3b79d35' branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None