Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
890d7b5
Updated GPU,CPU,Memory resources for finetuning
WanjiruCate Feb 24, 2026
d9b3974
Merge branch 'main' of https://github.com/terrastackai/geospatial-stu…
WanjiruCate Feb 24, 2026
785b1ea
Merge branch 'main' of https://github.com/terrastackai/geospatial-stu…
WanjiruCate Feb 26, 2026
ffd56f7
Update templates to use v1.2.3 terratorch
WanjiruCate Mar 2, 2026
87ecadc
Update Segmentation template
WanjiruCate Mar 2, 2026
e43fd58
Update fire completed tune
WanjiruCate Mar 4, 2026
8ceb9e5
update correct tune
WanjiruCate Mar 4, 2026
9721fbc
Merge branch 'main' of https://github.com/terrastackai/geospatial-stu…
WanjiruCate Mar 4, 2026
5b18ad0
Update deloy_lima template to update resources
WanjiruCate Mar 4, 2026
77d993f
change mem requirements
WanjiruCate Mar 4, 2026
b319e7f
Updated terratorch version
WanjiruCate Mar 4, 2026
30ccc6e
Add check for non-interactive mode
WanjiruCate Mar 4, 2026
3c655ba
Merge branch 'main' into terramind-tiny
cwachira Mar 4, 2026
0425350
Merge branch 'main' into terramind-tiny
cwachira Mar 5, 2026
b8100a2
Update deploy_ocp.sh
WanjiruCate Mar 5, 2026
2481a57
Merge branch 'terramind-tiny' of https://github.com/terrastackai/geos…
WanjiruCate Mar 5, 2026
8b961d3
Merge branch 'main' into terramind-tiny
WanjiruCate Mar 18, 2026
adaf8a4
Merge branch 'main' of https://github.com/terrastackai/geospatial-stu…
WanjiruCate Mar 19, 2026
430d096
Update fire tune and dataset payload
WanjiruCate Mar 19, 2026
5ca591f
Merge branch 'terramind-tiny' of https://github.com/terrastackai/geos…
WanjiruCate Mar 19, 2026
2852a32
Combined logic for the GPU request
WanjiruCate Mar 24, 2026
04e30df
remove unnecessary gpu logs
WanjiruCate Mar 24, 2026
6ce6995
Formatted the text
WanjiruCate Mar 24, 2026
16cb0ce
Merge branch 'main' into terramind-tiny
WanjiruCate Mar 24, 2026
1c89942
Update OCP query
WanjiruCate Mar 25, 2026
af1c48f
fix for crc actions
WanjiruCate Mar 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 80 additions & 1 deletion deploy_studio_k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -291,14 +291,35 @@ sed -i -e "s|<pgbouncer_port>|${pgbouncer_port}|g" workspace/${DEPLOYMENT_ENV}/v
sed -i -e "s|<pgbouncer_user>|${pgbouncer_username}|g" workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml
sed -i -e "s|<pgbouncer_pass>|${pgbouncer_password}|g" workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml

# The line below removes GPUs from the pipeline components and Finetuning job, to leave GPUs activated, copy out this line

# Call the function
get_menu_selection \
"Select whether you have GPU available in your cluster: " \
gpu_configuration_type \
"$gpu_configuration_options"

# The line below removes GPUs from the pipeline components, to leave GPUs activated, copy out this line
NVIDIA_GPUS_AVAILABLE=$(kubectl describe node ${CLUSTER_NODE_NAME} | grep -c "nvidia.com")
if [ "$NVIDIA_GPUS_AVAILABLE" -gt 0 ]; then

if [[ "$gpu_configuration_type" == "GPU-Available" && "$NVIDIA_GPUS_AVAILABLE" -gt 0 ]]; then
# Get number of GPUs
echo "Cluster Type: nvkind"
python ./deployment-scripts/remove-pipeline-gpu.py --remove-affinity-only workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml

# Keep the Job GPU configuration as is.
echo -e "\n Keeping GPU configuration for Finetuning job in values.yaml. You can update these later in workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml "
echo -e "and update the cluster later using: helm upgrade geospatial-studio ./geospatial-studio/ \n"
else
echo "Cluster Type: standard kind"
python ./deployment-scripts/remove-pipeline-gpu.py workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml

# remove job GPU request
echo -e "\n Removing GPU configuration from values.yaml"
python ./deployment-scripts/update_jobs_gpu.py --filename workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml \
--gpu-limit 0 \
--gpu-request 0
echo -e "--------------------------- Removed GPUs in the Cluster ------------------- \n"
fi

echo "**********************************************************************"
Expand All @@ -312,6 +333,64 @@ if [[ "${NON_INTERACTIVE:-false}" != "true" ]]; then
read ans
fi

echo "**********************************************************************"
echo "**********************************************************************"
echo "------ Configure Fine-Tuning Job Resources -------------------------"
echo "**********************************************************************"
echo "**********************************************************************"


# Ask user if they want to alter memory, CPU requests and limits for finetuning.
if [[ "${NON_INTERACTIVE:-false}" != "true" ]]; then
printf "%s " "Do you want to alter memory, CPU requests and limits for finetuning? (y/n) "
read ans
else
# Non-interactive mode: use CONFIGURE_RESOURCES environment variable (default to "n")
ans="${CONFIGURE_RESOURCES:-n}"
echo "Non-interactive mode: CONFIGURE_RESOURCES=$ans"
fi

# If yes, prompt user for memory limit, CPU limit, memory request and CPU request.
if [ "$ans" = "y" ]; then
echo "Updating memory, CPU requests and limits for finetuning."
echo ""

# Prompt for CPU limit
printf "%s " "CPU limit in cores (default: 4): "
read cpu_limit
cpu_limit=${cpu_limit:-4}

# Prompt for CPU request
printf "%s " "CPU request in cores (default: 2): "
read cpu_request
cpu_request=${cpu_request:-2}

# Prompt for Memory limit
printf "%s " "Memory limit in GB (default: 10): "
read memory_limit
memory_limit=${memory_limit:-10}

# Prompt for Memory request
printf "%s " "Memory request in GB (default: 6): "
read memory_request
memory_request=${memory_request:-6}

echo -e "\n Applying configuration:"
echo " CPU Limit: ${cpu_limit} cores, CPU Request: ${cpu_request} cores"
echo -e " Memory Limit: ${memory_limit}GB, Memory Request: ${memory_request}GB \n"

# Call the update script with user-provided values
python3 ./deployment-scripts/update_jobs_gpu.py --filename workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml \
--cpu-limit "$cpu_limit" \
--cpu-request "$cpu_request" \
--memory-limit "$memory_limit" \
--memory-request "$memory_request"
echo -e "\n Updated finetuning resource configurations \n"
else
echo -e "\n Not updating resource configurations."
echo "You can manually edit workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml"
echo -e "and update the cluster later using: helm upgrade geospatial-studio ./geospatial-studio/ \n"
fi

echo "----------------------------------------------------------------------"
echo "---------------- Building Helm dependencies ------------------------"
Expand Down
81 changes: 81 additions & 0 deletions deploy_studio_lima.sh
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,87 @@ if [[ "${NON_INTERACTIVE:-false}" != "true" ]]; then
read ans
fi

echo "**********************************************************************"
echo "**********************************************************************"
echo "------ Configure Fine-Tuning Job Resources -------------------------"
echo "**********************************************************************"
echo "**********************************************************************"

# Ask user if they have GPUs in their cluster. If Yes, keep configuration as is. If No, remove GPU configuration via the values.yaml
if [[ "${NON_INTERACTIVE:-false}" != "true" ]]; then
printf "%s " "Do you have GPUs in your cluster? (y/n): "
read ans
else
# Non-interactive mode: use HAS_GPU environment variable (default to "n" for no GPU)
ans="${HAS_GPU:-n}"
echo "Non-interactive mode: HAS_GPU=$ans"
fi

if [ "$ans" = "y" ]; then
echo -e "\n Keeping GPU configuration in values.yaml. You can update these later in workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml "
echo -e "and update the cluster later using: helm upgrade geospatial-studio ./geospatial-studio/ \n"
else
echo -e "\n Removing GPU configuration from values.yaml"
python ./deployment-scripts/update_jobs_gpu.py --filename workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml \
--gpu-limit 0 \
--gpu-request 0
echo -e "\n --------------------------- Removed GPUs in the Cluster ------------------- \n"
fi


# Ask user if they want to alter memory, CPU requests and limits for finetuning.
if [[ "${NON_INTERACTIVE:-false}" != "true" ]]; then
printf "%s " "Do you want to alter memory, CPU requests and limits for finetuning? (y/n) "
read ans
else
# Non-interactive mode: use CONFIGURE_RESOURCES environment variable (default to "n")
ans="${CONFIGURE_RESOURCES:-n}"
echo "Non-interactive mode: CONFIGURE_RESOURCES=$ans"
fi

# If yes, prompt user for memory limit, CPU limit, memory request and CPU request.
if [ "$ans" = "y" ]; then
echo "Updating memory, CPU requests and limits for finetuning."
echo ""

# Prompt for CPU limit
printf "%s " "CPU limit in cores (default: 4): "
read cpu_limit
cpu_limit=${cpu_limit:-4}

# Prompt for CPU request
printf "%s " "CPU request in cores (default: 2): "
read cpu_request
cpu_request=${cpu_request:-2}

# Prompt for Memory limit
printf "%s " "Memory limit in GB (default: 10): "
read memory_limit
memory_limit=${memory_limit:-10}

# Prompt for Memory request
printf "%s " "Memory request in GB (default: 6): "
read memory_request
memory_request=${memory_request:-6}

echo -e "\n Applying configuration:"
echo " CPU Limit: ${cpu_limit} cores, CPU Request: ${cpu_request} cores"
echo -e " Memory Limit: ${memory_limit}GB, Memory Request: ${memory_request}GB \n"

# Call the update script with user-provided values
python3 ./deployment-scripts/update_jobs_gpu.py --filename workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml \
--cpu-limit "$cpu_limit" \
--cpu-request "$cpu_request" \
--memory-limit "$memory_limit" \
--memory-request "$memory_request"
echo -e " \n Updated finetuning resource configurations \n"
else
echo -e "\n Not updating resource configurations"
echo "You can manually edit workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml"
echo -e "and update the cluster later using: helm upgrade geospatial-studio ./geospatial-studio/ \n"
fi



echo "----------------------------------------------------------------------"
echo "---------------- Building Helm dependencies ------------------------"
Expand Down
81 changes: 78 additions & 3 deletions deploy_studio_ocp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ EOF
echo "-------------------------- COS_STORAGE_CLASS -------------------------------------"
echo "------------------------ NON_COS_STORAGE_CLASS ---------------------------------"
echo "***********************************************************************************"
echo "-- Check StorageClasses values in the cluster for COS storage and block storage ---"


while true; do
printf "%s " "Press enter to continue"
Expand Down Expand Up @@ -459,7 +461,7 @@ EOF

# For CRC, we need volume permissions enabled, so don't use DO_NOT_SET_SCC
# For other OpenShift environments, storage may be pre-configured
if [[ "$DEPLOYMENT_ENV" == "crc" ]] || [[ "$DEPLOYMENT_ENV" == "crc-local" ]]; then
if ([[ "$DEPLOYMENT_ENV" == "crc" ]] || [[ "$DEPLOYMENT_ENV" == "crc-local" ]]) && [[ "$OC_PROJECT" == "default" ]]; then
./deployment-scripts/install-postgres.sh UPDATE_STORAGE DISABLE_PV
else
./deployment-scripts/install-postgres.sh UPDATE_STORAGE DISABLE_PV DO_NOT_SET_SCC
Expand Down Expand Up @@ -791,8 +793,19 @@ EOF

if [[ "$gpu_configuration_type" == "GPU-Available" ]]; then
python ./deployment-scripts/remove-pipeline-gpu.py --remove-affinity-only workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml

# Keep the Job GPU configuration as is.
echo "Keeping GPU configuration for Finetuning job in values.yaml. You can update these later in workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml "
echo "and update the cluster later using: helm upgrade geospatial-studio ./geospatial-studio/"
else
python ./deployment-scripts/remove-pipeline-gpu.py workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml

# remove job GPU request
echo "Removing GPU configuration from values.yaml"
python ./deployment-scripts/update_jobs_gpu.py --filename workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml \
--gpu-limit 0 \
--gpu-request 0
echo "--------------------------- Removed GPUs in the Cluster -------------------"
fi

else
Expand All @@ -819,8 +832,70 @@ echo "----------- Make any changes to deployment values yaml --------------"
echo "**********************************************************************"
echo "**********************************************************************"

printf "%s " "Press enter to continue"
read ans
if [[ "${NON_INTERACTIVE:-false}" != "true" ]]; then
printf "%s " "Press enter to continue"
read ans
fi

echo "**********************************************************************"
echo "**********************************************************************"
echo "------ Configure Fine-Tuning Job Resources -------------------------"
echo "**********************************************************************"
echo "**********************************************************************"


# Ask user if they want to alter memory, CPU requests and limits for finetuning.
configure_resources_options="No Yes"
typeset configure_resources

# Call the function
get_menu_selection \
"Do you want to alter memory, CPU requests and limits for finetuning?" \
configure_resources \
"$configure_resources_options"

# If yes, prompt user for memory limit, CPU limit, memory request and CPU request.
if [ "$configure_resources" = "Yes" ]; then
echo "Updating memory, CPU requests and limits for finetuning."
echo ""

# Prompt for CPU limit
printf "%s " "CPU limit in cores (default: 4): "
read cpu_limit
cpu_limit=${cpu_limit:-4}

# Prompt for CPU request
printf "%s " "CPU request in cores (default: 2): "
read cpu_request
cpu_request=${cpu_request:-2}

# Prompt for Memory limit
printf "%s " "Memory limit in GB (default: 10): "
read memory_limit
memory_limit=${memory_limit:-10}

# Prompt for Memory request
printf "%s " "Memory request in GB (default: 6): "
read memory_request
memory_request=${memory_request:-6}

echo -e "\n Applying configuration:"
echo " CPU Limit: ${cpu_limit} cores, CPU Request: ${cpu_request} cores"
echo -e " Memory Limit: ${memory_limit}GB, Memory Request: ${memory_request}GB \n"

# Call the update script with user-provided values
python3 ./deployment-scripts/update_jobs_gpu.py --filename workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml \
--cpu-limit "$cpu_limit" \
--cpu-request "$cpu_request" \
--memory-limit "$memory_limit" \
--memory-request "$memory_request"
echo -e " \n Updated finetuning resource configurations \n"
else
echo -e "\n Not updating resource configurations."
echo "You can manually edit workspace/${DEPLOYMENT_ENV}/values/geospatial-studio/values-deploy.yaml"
echo -e "and update the cluster later using: helm upgrade geospatial-studio ./geospatial-studio/ \n"
fi


echo "----------------------------------------------------------------------"
echo "---------------- Building Helm dependencies ------------------------"
Expand Down
11 changes: 11 additions & 0 deletions geospatial-studio/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,17 @@ gfm-studio-gateway:
## If multiple values are given, the pod can be scheduled on any node
## matching one of them.
# NODE_GPU_SPEC: NVIDIA-A100-SXM4-80GB
# Use defaults in the defined jobs
# Resource requests (guaranteed minimum for fine-tuning jobs)
RESOURCE_LIMIT_GPU: 1 # Number of GPUs
RESOURCE_REQUEST_GPU: 1 # Number of GPUs

RESOURCE_LIMIT_CPU: 4 # CPU cores
RESOURCE_REQUEST_CPU: 2

RESOURCE_LIMIT_Memory: 10 # Memory in GB
RESOURCE_REQUEST_Memory: 6 # Memory in GB


# geofm-ui
geofm-ui:
Expand Down
Loading
Loading