Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions model-engine/Dockerfile.chainguard
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
FROM cgr.dev/scale.com/python-fips:3.10.15-dev
WORKDIR /workspace
USER root

RUN apk add htop \
dumb-init \
libssh \
openssh-client \
iftop \
curl \
curl-dev \
procps \
libcurl-openssl4 \
vim \
kubectl

RUN curl -Lo /bin/aws-iam-authenticator https://github.com/kubernetes-sigs/aws-iam-authenticator/releases/download/v0.5.9/aws-iam-authenticator_0.5.9_linux_amd64
RUN chmod +x /bin/aws-iam-authenticator

RUN pip install pip==24.2
RUN chmod -R 777 /workspace

RUN pip install awscli==1.34.28 --no-cache-dir

COPY model-engine/sitecustomize.py /usr/lib/python3.10/site-packages/sitecustomize.py

WORKDIR /workspace/model-engine/
COPY model-engine/requirements-test.txt /workspace/model-engine/requirements-test.txt
COPY model-engine/requirements.txt /workspace/model-engine/requirements.txt
COPY model-engine/requirements_override.txt /workspace/model-engine/requirements_override.txt
RUN pip install -r requirements-test.txt --no-cache-dir
RUN pip install -r requirements.txt --no-cache-dir
RUN pip install -r requirements_override.txt --no-cache-dir
COPY model-engine/setup.py /workspace/model-engine/setup.py
COPY model-engine/model_engine_server /workspace/model-engine/model_engine_server
RUN pip install -e .

COPY integration_tests /workspace/integration_tests

WORKDIR /workspace
ENV PYTHONPATH /workspace
ENV WORKSPACE /workspace

USER nonroot

EXPOSE 5000
5 changes: 4 additions & 1 deletion model-engine/model_engine_server/api/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from fastapi.security import HTTPBasic, HTTPBasicCredentials, OAuth2PasswordBearer
from model_engine_server.common.config import hmi_config
from model_engine_server.common.dtos.model_endpoints import BrokerType
from model_engine_server.common.env_vars import CIRCLECI
from model_engine_server.common.env_vars import CIRCLECI, PROD
from model_engine_server.core.auth.authentication_repository import AuthenticationRepository, User
from model_engine_server.core.auth.fake_authentication_repository import (
FakeAuthenticationRepository,
Expand Down Expand Up @@ -238,6 +238,9 @@ def _get_external_interfaces(
elif infra_config().cloud_provider == "azure":
inference_task_queue_gateway = servicebus_task_queue_gateway
infra_task_queue_gateway = servicebus_task_queue_gateway
elif PROD:
inference_task_queue_gateway = redis_task_queue_gateway
infra_task_queue_gateway = redis_task_queue_gateway
else:
inference_task_queue_gateway = sqs_task_queue_gateway
infra_task_queue_gateway = sqs_task_queue_gateway
Expand Down
3 changes: 3 additions & 0 deletions model-engine/model_engine_server/common/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"LAUNCH_SERVICE_TEMPLATE_CONFIG_MAP_PATH",
"LAUNCH_SERVICE_TEMPLATE_FOLDER",
"LOCAL",
"PROD",
"SKIP_AUTH",
"WORKSPACE",
"get_boolean_env_var",
Expand All @@ -42,6 +43,8 @@ def get_boolean_env_var(name: str) -> bool:

CIRCLECI: bool = get_boolean_env_var("CIRCLECI")

PROD: bool = get_boolean_env_var("PROD")

LOCAL: bool = get_boolean_env_var("LOCAL")
"""Indicates that Launch is running in a local development environment. Also used for local testing.
"""
Expand Down
24 changes: 19 additions & 5 deletions model-engine/model_engine_server/common/service_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ def make_sync_request_with_retries(
# We should be creating a new requests Session each time, which should avoid sending requests to the same endpoint
# This is admittedly a hack until we get proper least-outstanding-requests load balancing to our http endpoints

logger.info(f"🔍 DEBUG: make_sync_request_with_retries to URL: {request_url}")
logger.info(f"🔍 DEBUG: Payload keys: {list(payload_json.keys()) if isinstance(payload_json, dict) else type(payload_json)}")

try:
for attempt in Retrying(
stop=stop_after_attempt(SYNC_ENDPOINT_RETRIES + 1),
Expand All @@ -39,14 +42,25 @@ def make_sync_request_with_retries(
with attempt:
if attempt.retry_state.attempt_number > 1: # pragma: no cover
logger.info(f"Retry number {attempt.retry_state.attempt_number}")
resp = requests.post(
request_url,
json=payload_json,
headers={"Content-Type": "application/json"},
)

# DEBUG: Log before making request
logger.info(f"🔍 DEBUG: About to POST to {request_url} (attempt {attempt.retry_state.attempt_number})")

try:
resp = requests.post(
request_url,
json=payload_json,
headers={"Content-Type": "application/json"},
)
logger.info(f"🔍 DEBUG: Response status: {resp.status_code}")
except Exception as e:
logger.error(f"🔍 DEBUG: Exception during requests.post: {type(e).__name__}: {e}")
raise

if resp.status_code == 429:
raise HTTP429Exception("429 returned")
elif resp.status_code != 200:
logger.warning(f"🔍 DEBUG: Non-200 response. Status: {resp.status_code}, Content: {resp.content}")
raise UpstreamHTTPSvcError(status_code=resp.status_code, content=resp.content)
return resp.json()
except RetryError:
Expand Down
17 changes: 16 additions & 1 deletion model-engine/model_engine_server/core/celery/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
copied from https://github.com/celery/celery/blob/81df81acf8605ba3802810c7901be7d905c5200b/celery/backends/s3.py"""

import threading
import base64
import hashlib

import tenacity
from celery.backends.base import KeyValueStoreBackend
Expand Down Expand Up @@ -77,7 +79,20 @@ def get(self, key):
def set(self, key, value):
key = bytes_to_str(key)
s3_object = self._get_s3_object(key)
s3_object.put(Body=value)

# Ensure value is bytes for hashing
if isinstance(value, str):
value_bytes = value.encode('utf-8')
else:
value_bytes = value

sha256_hash = hashlib.sha256(value_bytes).digest()
checksum_sha256 = base64.b64encode(sha256_hash).decode('utf-8')
s3_object.put(
Body=value, # S3 can handle both str and bytes
ChecksumAlgorithm='SHA256',
ChecksumSHA256=checksum_sha256
)

def delete(self, key):
key = bytes_to_str(key)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3444,6 +3444,7 @@ def infer_addition_engine_args_from_model_name(
gpu_memory_utilization=gpu_memory_utilization,
attention_backend=attention_backend,
trust_remote_code=trust_remote_code,
dtype="float16",
)


Expand Down
Loading