Skip to content

Commit b0ff6ab

Browse files
authored
Converge Integration for startup (#211)
1 parent c8e02ae commit b0ff6ab

File tree

3 files changed

+153
-53
lines changed

3 files changed

+153
-53
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ used to overwrite the defaults.
357357
- `RF_PYTHON_EXECUTABLE` - Python executable (default: python3 falls back to python if not found)
358358
- `RF_PIP_EXECUTABLE` - pip executable (default: pip3 falls back to pip if not found)
359359
- `RF_CONVERGE_MODE` - Whether to use Rapidfire AI Converge frontend and backend if available (default: all)
360+
- `RF_NO_FRONTEND` - Option to disable starting the frontend
360361

361362
## Community & Governance
362363

rapidfireai/cli.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,13 @@ def main():
455455
help="Run in Colab mode (skips frontend, conditionally starts MLflow based on tracking backend)",
456456
)
457457

458+
parser.add_argument(
459+
"--no-frontend",
460+
action="store_true",
461+
help="Do not start the dashboard (Flask on RF_FRONTEND_PORT); MLflow and the API still start when enabled. "
462+
"With Converge, only the backend is started when --converge=all.",
463+
)
464+
458465
parser.add_argument(
459466
"--test-notebooks",
460467
action="store_true",
@@ -494,6 +501,9 @@ def main():
494501
os.environ["RF_COLAB_MODE"] = "true"
495502
elif ColabConfig.ON_COLAB and os.getenv("RF_COLAB_MODE") is None:
496503
os.environ["RF_COLAB_MODE"] = "true"
504+
505+
if args.no_frontend:
506+
os.environ["RF_START_FRONTEND"] = "false"
497507

498508
# Handle force command separately
499509
if args.force:

setup/start.sh

Lines changed: 142 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,24 @@ RF_DB_PATH="${RF_DB_PATH:=$RF_HOME/db}"
2929
RF_LOG_PATH="${RF_LOG_PATH:=$RF_HOME/logs}"
3030

3131
RF_TIMEOUT_TIME=${RF_TIMEOUT_TIME:=30}
32+
RF_PYTHON_EXECUTABLE=${RF_PYTHON_EXECUTABLE:-python3}
33+
RF_PIP_EXECUTABLE=${RF_PIP_EXECUTABLE:-pip3}
34+
35+
if ! command -v $RF_PYTHON_EXECUTABLE &> /dev/null; then
36+
RF_PYTHON_EXECUTABLE=python
37+
fi
38+
39+
if ! command -v $RF_PIP_EXECUTABLE &> /dev/null; then
40+
RF_PIP_EXECUTABLE=pip
41+
fi
3242

3343
# Converge mode: all (backend+frontend), none (original frontend only), backend, frontend
3444
RF_CONVERGE_MODE=${RF_CONVERGE_MODE:=all}
45+
CONVERGE_FOUND=$(${RF_PIP_EXECUTABLE} show rapidfireai-pro >/dev/null 2>&1; echo $?)
46+
if [[ $CONVERGE_FOUND -ne 0 ]]; then
47+
RF_CONVERGE_MODE="none"
48+
fi
49+
3550
case "$RF_CONVERGE_MODE" in
3651
all|none|backend|frontend) ;;
3752
*)
@@ -58,6 +73,10 @@ else
5873
RF_COLAB_MODE=${RF_COLAB_MODE:=true}
5974
fi
6075

76+
# When false, do not start the RapidFire dashboard (Flask) or Converge frontend; MLflow + API still run when enabled.
77+
RF_START_FRONTEND=${RF_START_FRONTEND:=true}
78+
79+
6180
# Colors for output
6281
RED='\033[0;31m'
6382
GREEN='\033[0;32m'
@@ -74,26 +93,11 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
7493
RAPIDFIRE_DIR="$SCRIPT_DIR/../rapidfireai"
7594
RAPIDFIRE_FIT_DIR="$RAPIDFIRE_DIR/fit"
7695
RAPIDFIRE_EVALS_DIR="$RAPIDFIRE_DIR/evals"
77-
if [[ -d "$RAPIDFIRE_DIR/frontend_pro" ]]; then
78-
FRONTEND_DIR="$RAPIDFIRE_DIR/frontend_pro"
79-
else
80-
FRONTEND_DIR="$RAPIDFIRE_DIR/frontend"
81-
fi
96+
FRONTEND_DIR="$RAPIDFIRE_DIR/frontend"
97+
8298
RAPIDFIRE_MODE=$(cat $RF_HOME/rf_mode.txt 2>/dev/null || echo "fit")
8399
DISPATCHER_DIR="$RAPIDFIRE_DIR/$RAPIDFIRE_MODE/dispatcher"
84100

85-
RF_PYTHON_EXECUTABLE=${RF_PYTHON_EXECUTABLE:-python3}
86-
RF_PIP_EXECUTABLE=${RF_PIP_EXECUTABLE:-pip3}
87-
88-
if ! command -v $RF_PYTHON_EXECUTABLE &> /dev/null; then
89-
RF_PYTHON_EXECUTABLE=python
90-
fi
91-
92-
if ! command -v $RF_PIP_EXECUTABLE &> /dev/null; then
93-
RF_PIP_EXECUTABLE=pip
94-
fi
95-
96-
97101
# Function to print colored output
98102
print_status() {
99103
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
@@ -116,6 +120,17 @@ has_rapidfireai_pro() {
116120
${RF_PIP_EXECUTABLE} show rapidfireai-pro >/dev/null 2>&1
117121
}
118122

123+
if [[ "$RF_CONVERGE_MODE" != "none" ]]; then
124+
if [[ "$RF_MLFLOW_ENABLED" != "true" ]]; then
125+
print_status "MLflow is not enabled, Converge requires MLflow, enabling MLflow"
126+
RF_MLFLOW_ENABLED="true"
127+
fi
128+
if [[ "$RF_TENSORBOARD_ENABLED" != "true" ]]; then
129+
print_status "TensorBoard is not enabled, Converge requires TensorBoard, enabling TensorBoard"
130+
RF_TENSORBOARD_ENABLED="true"
131+
fi
132+
fi
133+
119134
# Function to setup Python environment
120135
setup_python_env() {
121136
print_status "Setting up Python environment..."
@@ -298,9 +313,11 @@ check_startup_issues() {
298313
print_error "MLflow $RF_MLFLOW_HOST:$RF_MLFLOW_PORT in use"
299314
return 1
300315
fi
301-
if ping_port $RF_FRONTEND_HOST $RF_FRONTEND_PORT; then
302-
print_error "Frontend $RF_FRONTEND_HOST:$RF_FRONTEND_PORT in use"
303-
return 1
316+
if [[ "$RF_START_FRONTEND" == "true" ]]; then
317+
if ping_port $RF_FRONTEND_HOST $RF_FRONTEND_PORT; then
318+
print_error "Frontend $RF_FRONTEND_HOST:$RF_FRONTEND_PORT in use"
319+
return 1
320+
fi
304321
fi
305322
if ping_port $RF_API_HOST $RF_API_PORT; then
306323
print_error "API port $RF_API_HOST:$RF_API_PORT in use"
@@ -680,6 +697,10 @@ start_frontend_if_needed() {
680697
print_status "⊗ Skipping frontend (using TensorBoard in Colab mode)"
681698
return 0
682699
fi
700+
if [[ "$RF_START_FRONTEND" != "true" ]]; then
701+
print_status "⊗ Skipping frontend (RF_START_FRONTEND=false or --no-frontend)"
702+
return 0
703+
fi
683704

684705
# Otherwise start frontend
685706
start_frontend
@@ -722,15 +743,15 @@ show_status() {
722743
print_status " %tensorboard --logdir $RF_HOME/rapidfire_experiments/tensorboard_logs/{experiment_name}"
723744
fi
724745
else
725-
# if [[ "$rf_mode" == "fit" ]]; then
726-
if ping_port $RF_FRONTEND_HOST $RF_FRONTEND_PORT; then
727-
print_success "🚀 RapidFire Frontend is ready!"
728-
print_status "👉 Open your browser and navigate to: http://$RF_FRONTEND_HOST:$RF_FRONTEND_PORT"
729-
print_status " (Click the link above or copy/paste the URL into your browser)"
730-
else
731-
print_error "🚨 RapidFire Frontend is not ready!"
732-
fi
733-
# fi
746+
if [[ "$RF_START_FRONTEND" != "true" ]]; then
747+
print_status "⊗ Frontend not started (RF_START_FRONTEND=false or rapidfireai start --no-frontend)"
748+
elif ping_port $RF_FRONTEND_HOST $RF_FRONTEND_PORT; then
749+
print_success "🚀 RapidFire Frontend is ready!"
750+
print_status "👉 Open your browser and navigate to: http://$RF_FRONTEND_HOST:$RF_FRONTEND_PORT"
751+
print_status " (Click the link above or copy/paste the URL into your browser)"
752+
else
753+
print_error "🚨 RapidFire Frontend is not ready!"
754+
fi
734755
fi
735756
if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then
736757
if ping_port $RF_MLFLOW_HOST $RF_MLFLOW_PORT; then
@@ -775,14 +796,41 @@ show_status() {
775796
fi
776797

777798
# Only check frontend.log if frontend is running
778-
if [[ "$RF_COLAB_MODE" != "true" ]]; then
799+
if [[ "$RF_COLAB_MODE" != "true" ]] && [[ "$RF_CONVERGE_MODE" == "none" ]] && [[ "$RF_START_FRONTEND" == "true" ]]; then
779800
if [[ -f "$RF_LOG_PATH/frontend.log" ]]; then
780801
local size=$(du -h "$RF_LOG_PATH/frontend.log" | cut -f1)
781802
print_status "- $RF_LOG_PATH/frontend.log: $size"
782803
else
783804
print_warning "- $RF_LOG_PATH/frontend.log: not found"
784805
fi
785806
fi
807+
808+
if [[ "$RF_CONVERGE_MODE" != "none" ]]; then
809+
if [[ -f "$RF_LOG_PATH/converge.log" ]]; then
810+
local size=$(du -h "$RF_LOG_PATH/converge.log" | cut -f1)
811+
print_status "- $RF_LOG_PATH/converge.log: $size"
812+
else
813+
print_warning "- $RF_LOG_PATH/converge.log: not found"
814+
fi
815+
fi
816+
817+
if [[ "$RF_START_FRONTEND" == "true" ]] && { [[ "$RF_CONVERGE_MODE" == "all" ]] || [[ "$RF_CONVERGE_MODE" == "frontend" ]]; }; then
818+
if [[ -f "$RF_LOG_PATH/converge_frontend.log" ]]; then
819+
local size=$(du -h "$RF_LOG_PATH/converge_frontend.log" | cut -f1)
820+
print_status "- $RF_LOG_PATH/converge_frontend.log: $size"
821+
else
822+
print_warning "- $RF_LOG_PATH/converge_frontend.log: not found"
823+
fi
824+
fi
825+
826+
if [[ "$RF_CONVERGE_MODE" == "all" ]] || [[ "$RF_CONVERGE_MODE" == "backend" ]]; then
827+
if [[ -f "$RF_LOG_PATH/converge_backend.log" ]]; then
828+
local size=$(du -h "$RF_LOG_PATH/converge_backend.log" | cut -f1)
829+
print_status "- $RF_LOG_PATH/converge_backend.log: $size"
830+
else
831+
print_warning "- $RF_LOG_PATH/converge_backend.log: not found"
832+
fi
833+
fi
786834
}
787835

788836
# Function to start services based on mode
@@ -792,10 +840,20 @@ start_services() {
792840

793841
# Calculate total services based on mode
794842
# MLflow runs unless tensorboard-only in Colab
795-
# Frontend runs if MLflow runs
843+
# Third service: UI (classic frontend, full Converge, or Converge backend-only when --no-frontend)
796844
if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then
797845
((total_services++))
798-
((total_services++))
846+
local ui_slot=0
847+
if [[ "$RF_START_FRONTEND" == "true" ]]; then
848+
ui_slot=1
849+
elif [[ "$RF_CONVERGE_MODE" == "all" ]] || [[ "$RF_CONVERGE_MODE" == "backend" ]]; then
850+
if has_rapidfireai_pro; then
851+
ui_slot=1
852+
fi
853+
fi
854+
if [[ "$ui_slot" -eq 1 ]]; then
855+
((total_services++))
856+
fi
799857
fi
800858

801859
if [[ ! -d "$RF_LOG_PATH" ]]; then
@@ -825,34 +883,65 @@ start_services() {
825883

826884
# Start frontend server (conditionally)
827885
if [[ "$RF_MLFLOW_ENABLED" == "true" ]]; then
828-
case "$RF_CONVERGE_MODE" in
829-
none)
830-
if start_frontend; then
831-
((services_started++))
832-
else
833-
print_error "Failed to start frontend server"
834-
fi
835-
;;
836-
backend|frontend|all)
837-
if has_rapidfireai_pro; then
838-
if start_converge; then
886+
if [[ "$RF_START_FRONTEND" != "true" ]]; then
887+
print_status "⊗ Skipping frontend (RF_START_FRONTEND=false or --no-frontend)"
888+
case "$RF_CONVERGE_MODE" in
889+
none)
890+
;;
891+
all)
892+
if has_rapidfireai_pro; then
893+
if start_converge backend; then
894+
((services_started++))
895+
else
896+
print_error "Failed to start Converge backend"
897+
fi
898+
fi
899+
;;
900+
backend)
901+
if has_rapidfireai_pro; then
902+
if start_converge backend; then
903+
((services_started++))
904+
else
905+
print_error "Failed to start Converge backend"
906+
fi
907+
else
908+
print_error "rapidfireai-pro is not installed (required for --converge=$RF_CONVERGE_MODE)"
909+
fi
910+
;;
911+
frontend)
912+
print_status "⊗ Skipping Converge frontend (--no-frontend)"
913+
;;
914+
esac
915+
else
916+
case "$RF_CONVERGE_MODE" in
917+
none)
918+
if start_frontend; then
839919
((services_started++))
840920
else
841-
print_error "Failed to start Converge"
921+
print_error "Failed to start frontend server"
842922
fi
843-
else
844-
if [[ "$RF_CONVERGE_MODE" == "all" ]]; then
845-
if start_frontend; then
923+
;;
924+
backend|frontend|all)
925+
if has_rapidfireai_pro; then
926+
if start_converge; then
846927
((services_started++))
847928
else
848-
print_error "Failed to start frontend server"
929+
print_error "Failed to start Converge"
849930
fi
850931
else
851-
print_error "rapidfireai-pro is not installed (required for --converge=$RF_CONVERGE_MODE)"
932+
if [[ "$RF_CONVERGE_MODE" == "all" ]]; then
933+
if start_frontend; then
934+
((services_started++))
935+
else
936+
print_error "Failed to start frontend server"
937+
fi
938+
else
939+
print_error "rapidfireai-pro is not installed (required for --converge=$RF_CONVERGE_MODE)"
940+
fi
852941
fi
853-
fi
854-
;;
855-
esac
942+
;;
943+
esac
944+
fi
856945
else
857946
print_status "⊗ Skipping frontend (use TensorBoard if in Colab mode)"
858947
fi
@@ -915,7 +1004,7 @@ main() {
9151004

9161005
# Show summary of all log files for debugging
9171006
print_status "=== Startup Failure Summary ==="
918-
for log_file in "mlflow.log" "api.log" "frontend.log" "converge.log"; do
1007+
for log_file in "mlflow.log" "api.log" "frontend.log" "converge.log" "converge_frontend.log" "converge_backend.log"; do
9191008
if [[ -f "$RF_LOG_PATH/$log_file" ]]; then
9201009
echo ""
9211010
print_status "=== $log_file ==="

0 commit comments

Comments
 (0)