Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ omit = [

# Ignore questionable code or code in the process of reimplementation
# Fastly code not currently used
"submit_ce/fastapi",
"submit_ce/fastapi/**",
#
"submit_ce/implementations/compile/common.py",
"submit_ce/implementations/compile/compile_at_gcp.py", # migrate existing tests or eliminate script
Expand Down
13 changes: 13 additions & 0 deletions submit_ce/api/CompileService.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,16 @@ def check(self, process_id: str, user: User, client: Client) -> ProcessStatus:
-------
ProcessStatus
"""
...

@abstractmethod
def is_available(self) -> bool:
"""
Check if the service is configured and available.

Returns
-------
str
`True` if service is configured and available.
"""
...
23 changes: 23 additions & 0 deletions submit_ce/api/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def save(self, *events: Event, submission_id: Optional[int] = None) \
"""
...

@abstractmethod
def upload(self, files: SubmitFile, submission_id: int, user: User, client: Client) -> Workspace:
"""Uploads a file to an existing submission.

Expand All @@ -247,6 +248,7 @@ def upload(self, files: SubmitFile, submission_id: int, user: User, client: Clie
# TODO Make this just an Event+save()
...

@abstractmethod
def licenses(self, active_only=True) -> List[License]:
"""Gets a list of licenses that submitters can be set on the contents of submissions.

Expand All @@ -258,6 +260,7 @@ def licenses(self, active_only=True) -> List[License]:
"""
...

@abstractmethod
def categories_for_user(self, user_id: str) -> list[str]:
"""Gets list of categories the user may submit to.

Expand All @@ -268,6 +271,7 @@ def categories_for_user(self, user_id: str) -> list[str]:
"""
...

@abstractmethod
def next_announcement_time(self, reference: Optional[datetime] = None) -> datetime:
"""Gets the next announce time. If `reference` is not passed, it does
the next announce time from now.
Expand All @@ -280,7 +284,9 @@ def next_announcement_time(self, reference: Optional[datetime] = None) -> dateti
Returns
--------
Time of next announce always TZ aware"""
...

@abstractmethod
def next_freeze_time(self, reference: Optional[datetime] = None) -> datetime:
"""Gets the next freeze time. If `reference` is not passed, it does
the next freeze time from now.
Expand All @@ -293,3 +299,20 @@ def next_freeze_time(self, reference: Optional[datetime] = None) -> datetime:
Returns
--------
Time of next freeze always TZ aware"""
...

@abstractmethod
def healthy(self)-> Tuple[bool, str]:
"""Return a message about this api that is safe to show the general
public, raise a :class:`RuntimeError` if api is not configured correctly
or services it depends on are not working.

Returns
-------
bool
`True` if this and all dependent services are healthy.
str
A message about the api that is safe to show to the general public.

"""
...
74 changes: 0 additions & 74 deletions submit_ce/domain/event/tests/test_event_util.py

This file was deleted.

27 changes: 0 additions & 27 deletions submit_ce/domain/event/util.py

This file was deleted.

2 changes: 1 addition & 1 deletion submit_ce/domain/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class SubmitFile(Protocol):
"""Name of the file as provided by the client."""
content_type: str
"""The MIME type of the file as provided by the client."""
stream: BytesIO
stream: Any # should be BytesIO but often SpooledTemporaryFile Not sure how to handle this
"""File contents as provided by the client."""

@classmethod
Expand Down
3 changes: 3 additions & 0 deletions submit_ce/domain/uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class FileStatus(BaseModel):
ancillary: bool = False
errors: List[FileError] = []

@property
def size(self):
return self.bytes

class UploadStatus(Enum): # type: ignore
"""The status of the upload workspace with respect to submission."""
Expand Down
23 changes: 20 additions & 3 deletions submit_ce/implementations/compile/compile_at_gcp_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from datetime import timezone, datetime
from typing import Optional
from venv import logger
import httpx
from typing_extensions import override
from zoneinfo import ZoneInfo

from arxiv.base.config import ARXIV_BUSINESS_TZ
Expand Down Expand Up @@ -37,6 +40,14 @@ def __init__(self,
self.max_tex_files = max_tex_files
self.timeout = timeout
self.timezone = ARXIV_BUSINESS_TZ

def __repr__(self) -> str:
return (f"{self.__class__.__name__}("
f"tex2pdf_url={self.tex2pdf_url},"
f"base_submissions_dir={self.base_submissions_dir}"
)

@override
def start_compile(self, submission: Submission,
user: User, client: Client,
api: SubmitApi,
Expand Down Expand Up @@ -78,10 +89,16 @@ def start_compile(self, submission: Submission,
duration_sec = json_data["total_time"],
utc_start_time=utc_start_time,
url=f"FAKE_URL_{__file__}"

)



@override
def check(self, process_id: str, user: User, client: Client) -> ProcessStatus:
pass

@override
def is_available(self) -> bool:
resp = httpx.get(self.tex2pdf_url)
if resp.status_code == 200:
return True
logger.error(f"Healthcheck failed: service at '{self.tex2pdf_url}' responded with status {resp.status_code}")
return False
56 changes: 20 additions & 36 deletions submit_ce/implementations/file_store/gs_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@

logger = logging.getLogger(__file__)

class SecurityError(RuntimeError):
"""Something suspicious happened."""


class UserFile:
pass


class GsFileStore(SubmissionFileStore, FileStoreMixin):
Expand Down Expand Up @@ -61,14 +55,20 @@ def __init__(self,
"""Prefix in the {gs_bucket}/{shard}/{id} directory to store the source."""
self.source_prefix = source_prefix
"""Prefix for source files"""

if self.gs_prefix.startswith("/"):
self.gs_prefix = self.gs_prefix[1:]

self.storage_client = storage.Client()
self.bucket = self.storage_client.bucket(self.gs_bucket)
self.obj_store = GsObjectStore(self.bucket)

def __repr__(self) -> str:
return (f"{self.__class__.__name__}("
f"gs_bucket={self.gs_bucket},"
f"gs_prefix={self.gs_prefix},"
f"source_prefix={self.source_prefix},"
)

def _blob_to_file_status(self, submission_id, blob) -> FileStatus:
src_dir = self._source_path(submission_id)
Expand Down Expand Up @@ -123,7 +123,7 @@ def store_source_file(self,
content: SubmitFile,
chunk_size: int) -> FileStatus:
"""Stores a file for a submisison."""
blob = self.bucket.blob(self._source_path(submission_id) / content.filename)
blob = self.bucket.blob(str(self._source_path(submission_id) / content.filename))
blob.upload_from_file(content.stream, content_type=content.content_type)
return self._blob_to_file_status(submission_id, blob)

Expand All @@ -134,16 +134,16 @@ def store_source_package(self,
"""Store a source package for a submission."""
files=[]
src_dir = self._source_path(submission_id)

with tarfile.open(fileobj=content.stream, mode="r:*") as tar:
for member in tar.getmembers():
if not member.isfile():
continue
continue
with tar.extractfile(member) as file:
blob = self.bucket.blob(str(src_dir / member.name))
blob.upload_from_file(file, size=member.size)
files.append( {"file":member.name, "bytes": member.size})

return files

def get_preview(self, submission_id: str) -> FileObj:
Expand Down Expand Up @@ -173,7 +173,7 @@ def does_source_exist(self, submission_id: str) -> bool:
return self.bucket.blob(str(self._source_package_path(submission_id))).exists()

def get_preview_checksum(self, submission_id: str) -> str:
"""Get the checksum of the preview PDF for a submission."""
"""Get the checksum of the preview PDF for a submission."""
return self._get_checksum(self._preview_path(submission_id))

def does_preview_exist(self, submission_id: str) -> bool:
Expand All @@ -183,7 +183,7 @@ def does_preview_exist(self, submission_id: str) -> bool:
def _get_checksum(self, path: str) -> str:
item = self.bucket.blob(path)
return item.crc32c

def _submission_path(self, submission_id: str|str) -> Path:
"""Gets GS filesystem structure ex /{rootdir}/{first 4 digits of submission id}/{submission id}"""
shard_dir = self.gs_prefix / Path(str(submission_id)[:4])
Expand Down Expand Up @@ -215,30 +215,14 @@ def delete_workspace(self, submission_id: str):
for blob in blobs:
blob.delete()


def is_available(self) -> bool:
"""Determine whether the filesystem is available."""
return self.bucket.exists()


# def _check_safe(self, workspace: Workspace, full_path: str, # TODO implement
# is_ancillary: bool = False, is_removed: bool = False,
# is_persisted: bool = False, is_system: bool = False,
# strict: bool = True) -> None:
# if not strict or is_system:
# wks_full_path = self.get_path_bare(workspace.base_path,
# is_persisted=is_persisted)
# elif is_ancillary:
# wks_full_path = self.get_path_bare(workspace.ancillary_path,
# is_persisted=is_persisted)
# elif is_removed:
# wks_full_path = self.get_path_bare(workspace.removed_path,
# is_persisted=is_persisted)
# else:
# wks_full_path = self.get_path_bare(workspace.source_path,
# is_persisted=is_persisted)
# if wks_full_path not in full_path:
# raise ValueError(f'Not a valid path for workspace: {full_path}')
try:
return self.bucket.exists()
except Exception as ex:
logger.error(f"Could not check if bucket exists: {ex}")
return False


def delete_all_source_files(self, submission_id: str) -> None:
pass
Expand Down
Loading