Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 218 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
name: CI

on:
push:
branches: [main, chore/configure-dev-tools]
pull_request:
branches: [main]

jobs:
code-quality:
name: Code Quality Checks
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true

- name: Set up Python
run: uv python install 3.11

- name: Install dependencies
run: |
uv sync --all-extras --dev

- name: Check code formatting with Black
run: |
uv run black --check gfmstudio/ tests/

- name: Lint with Ruff
run: |
uv run ruff check gfmstudio/ tests/

- name: Type checking with mypy (if configured)
run: |
uv run mypy gfmstudio/ || echo "mypy not configured, skipping"
continue-on-error: true

security:
name: Security Scanning
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true

- name: Set up Python
run: uv python install 3.11

- name: Install dependencies
run: |
uv sync --all-extras --dev

- name: Run Bandit security checks
run: |
uv run bandit -r gfmstudio/ -f json -o bandit-report.json || true
uv run bandit -r gfmstudio/
continue-on-error: true

- name: Check for security vulnerabilities with pip-audit
run: |
uv pip install pip-audit
uv run pip-audit || echo "Vulnerabilities found, please review"
continue-on-error: true

test:
name: Run Tests
runs-on: ubuntu-latest

services:
postgres:
image: postgres:15
env:
POSTGRES_USER: test_user
POSTGRES_PASSWORD: test_password
POSTGRES_DB: test_db
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5

redis:
image: redis:7
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true

- name: Set up Python
run: uv python install 3.11

- name: Install dependencies
run: |
uv sync --all-extras --dev

- name: Run pytest with coverage
env:
DATABASE_URI: postgresql+pg8000://test_user:test_password@localhost:5432/gfmstudio
AUTH_DATABASE_URI: postgresql+pg8000://test_user:test_password@localhost:5432/geostudio_auth
TEST_DATABASE_URI: postgresql+pg8000://test_user:test_password@localhost:5432/gfmstudio_test
# Caching Authentication Requests and Background Tasks
REDIS_URL: redis://localhost:6379
# COS INSTANCE and Buckets
OBJECT_STORAGE_KEY_ID: ...
OBJECT_STORAGE_SEC_KEY: ...
OBJECT_STORAGE_ENDPOINT: ...
OBJECT_STORAGE_REGION: 'us-south'
# COS Buckets
DATA_PVC: geoft-files-pvc
TUNES_FILES_BUCKET: geoft-service
DATASET_FILES_BUCKET: geoft-service-datasets
# FineTuning
TUNE_BASEDIR: /tmp/geoft-tunefiles/
DATA_MOUNT: /data
FILES_MOUNT: /geotunes/
FILES_PVC: geoft-files-pvc
CELERY_TASKS_ENABLED: True
# Inferencing
PIPELINES_V2_INFERENCE_ROOT_FOLDER: /tmp/inference_testing

run: |
uv run pytest tests/ \
--cov=gfmstudio \
--cov-report=xml \
--cov-report=html \
--cov-report=term-missing \
--junitxml=pytest-report.xml \
-v

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4
with:
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: false
continue-on-error: true

- name: Upload test results
if: always()
uses: actions/upload-artifact@v4
with:
name: test-results
path: |
pytest-report.xml
htmlcov/
retention-days: 30

docker-build:
name: Docker Build Test
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build Docker image
uses: docker/build-push-action@v5
with:
context: .
push: false
tags: geospatial-studio-core:test
cache-from: type=gha
cache-to: type=gha,mode=max

dependency-review:
name: Dependency Review
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Dependency Review
uses: actions/dependency-review-action@v4
with:
fail-on-severity: moderate

all-checks-passed:
name: All Checks Passed
runs-on: ubuntu-latest
needs: [code-quality, security, test, docker-build]
if: always()
steps:
- name: Check if all jobs passed
run: |
if [[ "${{ needs.code-quality.result }}" != "success" ]] || \
[[ "${{ needs.security.result }}" != "success" ]] || \
[[ "${{ needs.test.result }}" != "success" ]] || \
[[ "${{ needs.docker-build.result }}" != "success" ]]; then
echo "One or more checks failed"
exit 1
fi
echo "All checks passed successfully!"
8 changes: 5 additions & 3 deletions gfmstudio/amo/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,13 @@ async def onboard_inference_model(
OnboardingStatus.PRESIGNED_URL_FAILED,
]
):
msg = (
f"model_id: {item.model_id} already in use."
"You can only reuse a model_id for successfully offboarded models and models that fail onboarding."
)
raise HTTPException(
status_code=422,
detail={
"message": f"model_id: {item.model_id} already in use. You can only reuse a model_id for successfully offboarded models and models that fail onboarding."
},
detail={"message": msg},
)
else:
amo_task_manager.set_task_status(
Expand Down
1 change: 1 addition & 0 deletions gfmstudio/common/db/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@

from ...config import settings

print(f"================= {settings.DATABASE_URI}")
engine = create_engine(str(settings.DATABASE_URI), pool_size=30, max_overflow=10)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
3 changes: 2 additions & 1 deletion gfmstudio/data_advisor/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,8 @@ def find_data_bbox(connector: DataConnector, payload: DataAdvisorRequestSchema):
response["unique_dates"] = []
response.pop("available_data")
response["message"] = (
f"The modalities {collections_with_no_data} do not have any data for the selected dates. Try the Bef_Days: {before_mydatetimes} or Aft_Days: {after_mydatetimes}"
f"The modalities {collections_with_no_data} do not have any data for the selected dates. "
f"Try the Bef_Days: {before_mydatetimes} or Aft_Days: {after_mydatetimes}"
)
results.append(response)
return {"results": results}
Expand Down
56 changes: 40 additions & 16 deletions gfmstudio/fine_tuning/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,9 @@ class LossFunction(BaseModel):
type : Optional[str]
The type of loss function to be used. Defaults to "CrossEntropyLoss".
avg_non_ignore : bool
If True, the loss is averaged only over non-ignored targets, where the labels are present. Ignored targets (e.g., missing labels) are excluded from the averaging. Defaults to True.
If True, the loss is averaged only over non-ignored targets,
where the labels are present. Ignored targets (e.g., missing labels)
are excluded from the averaging. Defaults to True.

Examples
--------
Expand All @@ -328,7 +330,10 @@ class LossFunction(BaseModel):
)
avg_non_ignore: bool = Field(
default=True,
description="The loss is only averaged over non-ignored targets (ignored targets are usually where labels are missing in the dataset) if this is True",
description=(
"The loss is only averaged over non-ignored targets "
"(ignored targets are usually where labels are missing in the dataset) if this is True"
),
)


Expand All @@ -339,7 +344,8 @@ class AuxLossFunction(LossFunction):
Attributes
----------
loss_weight : Optional[float]
The weight to apply to the auxiliary loss. This controls the contribution of this loss to the total loss. Defaults to 0.2.
The weight to apply to the auxiliary loss.
This controls the contribution of this loss to the total loss. Defaults to 0.2.

Examples
--------
Expand Down Expand Up @@ -543,9 +549,13 @@ class DataLoading(BaseModel):
--------
Create a DataLoading object:

>>> custom_data_loading = DataLoading(batch_size=16, workers_per_gpu=4, random_flip=1, bands=['RED', 'GREEN', 'BLUE'], tuning_bands=['GREEN', 'BLUE'])
>>> custom_data_loading = DataLoading(
batch_size=16, workers_per_gpu=4, random_flip=1,
bands=['RED', 'GREEN', 'BLUE'], tuning_bands=['GREEN', 'BLUE'])
>>> print(custom_data_loading)
DataLoading(batch_size=16, bands=['RED', 'GREEN', 'BLUE'], workers_per_gpu=4, random_flip=1, tuning_bands=['GREEN', 'BLUE'])
DataLoading(
batch_size=16, bands=['RED', 'GREEN', 'BLUE'],
workers_per_gpu=4, random_flip=1, tuning_bands=['GREEN', 'BLUE'])
"""

batch_size: Optional[int] = None
Expand Down Expand Up @@ -787,22 +797,27 @@ class BaseModelNecks(BaseModel):

class AuxiliaryHead(DecodeHead):
"""
Auxiliary head used in a neural network model, extending the main decode head, and including an auxiliary loss function.
Auxiliary head used in a neural network model, extending the main decode head,
and including an auxiliary loss function.

Attributes
----------
channels : Optional[int]
Number of channels at each block of the auxiliary head, except the final one. Inherited from `DecodeHead`. Defaults to 32.
Number of channels at each block of the auxiliary head, except the final one. Inherited from `DecodeHead`.
Defaults to 32.
num_convs : Optional[int]
Number of convolutional blocks in the auxiliary head, excluding the final block. Inherited from `DecodeHead`. Defaults to 1.
Number of convolutional blocks in the auxiliary head, excluding the final block. Inherited from `DecodeHead`.
Defaults to 1.
loss_decode : Optional[AuxLossFunction]
Defines the auxiliary loss function used for training. Defaults to an `AuxLossFunction` object with a loss weight of 0.2.
Defines the auxiliary loss function used for training.
Defaults to an `AuxLossFunction` object with a loss weight of 0.2.

Examples
--------
Create a `AuxiliaryHead` object:

>>> custom_aux_head = AuxiliaryHead(channels=64, num_convs=2, loss_decode=AuxLossFunction(type='DiceLoss', loss_weight=0.3))
>>> custom_aux_head = AuxiliaryHead(
channels=64, num_convs=2, loss_decode=AuxLossFunction(type='DiceLoss', loss_weight=0.3))
>>> print(custom_aux_head)
AuxiliaryHead(
channels=64,
Expand Down Expand Up @@ -866,17 +881,21 @@ class TemplateUserDefinedParams(BaseModel):
dataset_id : Optional[str]
The dataset id. Defaults to None.
data : Optional[DataLoading]
Parameters for configuring data loading, including batch size and number of workers. Defaults to a DataLoading object.
Parameters for configuring data loading, including batch size and number of workers.
Defaults to a DataLoading object.
runner : Optional[Runner]
Defines training parameters such as the number of epochs and early stopping criteria. Defaults to a Runner object.
Defines training parameters such as the number of epochs and early stopping criteria.
Defaults to a Runner object.
optimizer : Optional[Optimizer]
The optimizer configuration, including type (e.g., Adam) and learning rate settings. Defaults to an Optimizer object.
The optimizer configuration, including type (e.g., Adam) and learning rate settings.
Defaults to an Optimizer object.
lr_config : Optional[LRPolicy]
Learning rate policy configuration, including warmup settings. Defaults to None.
evaluation : Optional[Evaluation]
Parameters for model evaluation, such as validation frequency and metric. Defaults to an Evaluation object.
model : Optional[Model]
The model configuration, including whether to freeze the backbone and the definition of the decode and auxiliary heads. Defaults to a Model object.
The model configuration, including whether to freeze the backbone and the
definition of the decode and auxiliary heads. Defaults to a Model object.
backbone_model_id : Optional[str]
Base model id. Defaults to None.

Expand All @@ -901,7 +920,9 @@ class TemplateUserDefinedParams(BaseModel):
optimizer=Optimizer(type='SGD', lr='0.01', weight_decay=None),
lr_config=None,
evaluation=Evaluation(interval=5, metric='accuracy'),
model=Model(frozen_backbone=True, decode_head=DecodeHead(channels=32, num_convs=1, loss_decode=LossFunction(type='CrossEntropyLoss', avg_non_ignore=True)), auxiliary_head=None),
model=Model(frozen_backbone=True, decode_head=DecodeHead(
channels=32, num_convs=1, loss_decode=LossFunction(
type='CrossEntropyLoss', avg_non_ignore=True)), auxiliary_head=None),
backbone_model_id=None
)
"""
Expand Down Expand Up @@ -1027,7 +1048,10 @@ class TuneTemplateParameters(
)
mlflow_tags: Optional[dict] = Field(
default=dict,
description="Mlflow tags to uniquely match an experiment with unique identifiers e.g user email address, name ...",
description=(
"Mlflow tags to uniquely match an experiment with unique"
"identifiers e.g user email address, name ..."
),
)

# Fields `num_layers` from the BaseModels
Expand Down
Loading
Loading