From 8569863136b6bcf2af7dd174cf7f5c964e4d0294 Mon Sep 17 00:00:00 2001 From: Suresh Chouksey Date: Fri, 26 Jun 2026 08:05:01 +0530 Subject: [PATCH] feat: Implement robustness and CN network adaptation --- config/cn-mirrors.yml | 18 ++++++ install.sh | 78 ++++++++++++++++++++++++- scripts/check-connectivity.sh | 91 +++++++++++++++++++++++++++++ scripts/diagnose.sh | 66 +++++++++++++++++++++ scripts/localize-images.sh | 107 ++++++++++++++++++++++++++++++++++ scripts/setup-cn-mirrors.sh | 66 +++++++++++++++++++++ scripts/wait-healthy.sh | 80 +++++++++++++++++++++++++ 7 files changed, 503 insertions(+), 3 deletions(-) create mode 100644 config/cn-mirrors.yml create mode 100755 scripts/check-connectivity.sh create mode 100755 scripts/diagnose.sh create mode 100755 scripts/localize-images.sh create mode 100755 scripts/setup-cn-mirrors.sh create mode 100755 scripts/wait-healthy.sh diff --git a/config/cn-mirrors.yml b/config/cn-mirrors.yml new file mode 100644 index 00000000..64ab2dd5 --- /dev/null +++ b/config/cn-mirrors.yml @@ -0,0 +1,18 @@ +mirrors: + gcr.io/cadvisor/cadvisor: m.daocloud.io/gcr.io/cadvisor/cadvisor + ghcr.io/goauthentik/server: m.daocloud.io/ghcr.io/goauthentik/server + ghcr.io/home-assistant/home-assistant: m.daocloud.io/ghcr.io/home-assistant/home-assistant + gcr.io/google-containers/cadvisor: m.daocloud.io/gcr.io/google-containers/cadvisor + ghcr.io/louislam/uptime-kuma: m.daocloud.io/ghcr.io/louislam/uptime-kuma + ghcr.io/gethomepage/homepage: m.daocloud.io/ghcr.io/gethomepage/homepage + ghcr.io/linuxserver/jellyfin: m.daocloud.io/ghcr.io/linuxserver/jellyfin + ghcr.io/linuxserver/radarr: m.daocloud.io/ghcr.io/linuxserver/radarr + ghcr.io/linuxserver/sonarr: m.daocloud.io/ghcr.io/linuxserver/sonarr + ghcr.io/linuxserver/prowlarr: m.daocloud.io/ghcr.io/linuxserver/prowlarr + ghcr.io/linuxserver/qbittorrent: m.daocloud.io/ghcr.io/linuxserver/qbittorrent + ghcr.io/goauthentik/postgresql: m.daocloud.io/ghcr.io/goauthentik/postgresql + ghcr.io/goauthentik/redis: m.daocloud.io/ghcr.io/goauthentik/redis + ghcr.io/gitea/gitea: m.daocloud.io/ghcr.io/gitea/gitea + ghcr.io/homarr-labs/homarr: m.daocloud.io/ghcr.io/homarr-labs/homarr + ghcr.io/open-webui/open-webui: m.daocloud.io/ghcr.io/open-webui/open-webui + ghcr.io/dani-garcia/vaultwarden: m.daocloud.io/ghcr.io/dani-garcia/vaultwarden diff --git a/install.sh b/install.sh index e911d519..347a0475 100644 --- a/install.sh +++ b/install.sh @@ -20,6 +20,71 @@ cleanup() { } trap cleanup EXIT +curl_retry() { + local max_attempts=3 + local delay=5 + for i in $(seq 1 $max_attempts); do + curl --connect-timeout 10 --max-time 60 "$@" && return 0 + echo "Attempt $i failed, retrying in ${delay}s..." + sleep $delay + delay=$((delay * 2)) + done + return 1 +} +export -f curl_retry + +check_robustness() { + # Resource checks + local free_gb + free_gb=$(df -BG / | awk 'NR==2 {gsub(/G/,"",$4); print $4}' || echo "100") + if [[ "$free_gb" -lt 5 ]]; then + log_error "Disk space < 5GB (${free_gb}GB). Aborting." + exit 1 + elif [[ "$free_gb" -lt 20 ]]; then + log_warn "Disk space < 20GB (${free_gb}GB)." + fi + + if command -v free &>/dev/null; then + local total_mem + total_mem=$(free -m | awk 'NR==2{print $2}') + if [[ "$total_mem" -lt 2000 ]]; then + log_warn "Memory < 2GB. Some services may fail." + fi + fi + + for port in 53 80 443 3000; do + if (ss -tlnp 2>/dev/null || netstat -tlnp 2>/dev/null) | grep -q ":${port} "; then + log_warn "Port $port is already in use." + fi + done + + # Firewall rules check + if command -v ufw &>/dev/null && sudo ufw status 2>/dev/null | grep -q "active"; then + log_warn "UFW is active, ensure required ports are allowed." + fi + if command -v firewall-cmd &>/dev/null && sudo firewall-cmd --state 2>/dev/null | grep -q "running"; then + log_warn "firewalld is running, ensure required ports are allowed." + fi + + # Docker installation & checks + if ! command -v docker &>/dev/null; then + log_info "Docker not found, attempting auto-install..." + curl_retry -fsSL https://get.docker.com -o get-docker.sh + sudo sh get-docker.sh || { log_error "Docker installation failed."; exit 1; } + rm -f get-docker.sh + fi + + if command -v docker-compose &>/dev/null && ! docker compose version &>/dev/null; then + log_warn "Docker Compose v1 detected. Please upgrade to Docker Compose v2." + fi + + if [[ $EUID -ne 0 ]] && ! groups | grep -q docker; then + log_info "Adding current user to docker group..." + sudo usermod -aG docker "$USER" || true + log_warn "You may need to log out and log back in for docker group changes to take effect." + fi +} + # --------------------------------------------------------------------------- # Banner # --------------------------------------------------------------------------- @@ -34,16 +99,23 @@ echo -e "${BOLD} S T A C K v1.0.0${NC}" echo -e "" # --------------------------------------------------------------------------- -# Step 1: Check dependencies +# Step 1: Check dependencies and robustness # --------------------------------------------------------------------------- +log_step "Checking system robustness" +check_robustness + log_step "Checking dependencies" -bash "$(dirname "$0")/scripts/check-deps.sh" +bash "$(dirname "$0")/scripts/check-deps.sh" || true # --------------------------------------------------------------------------- # Step 2: CN network detection # --------------------------------------------------------------------------- log_step "Network environment detection" -bash "$(dirname "$0")/scripts/check-deps.sh" --network-check +if [ -f "$(dirname "$0")/scripts/check-connectivity.sh" ]; then + bash "$(dirname "$0")/scripts/check-connectivity.sh" +else + log_warn "Connectivity checker not found." +fi # --------------------------------------------------------------------------- # Step 3: Setup environment diff --git a/scripts/check-connectivity.sh b/scripts/check-connectivity.sh new file mode 100755 index 00000000..c3a4ccdc --- /dev/null +++ b/scripts/check-connectivity.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# ============================================================================= +# Network Connectivity Checker +# ============================================================================= +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +FAILED_COUNT=0 + +check_url() { + local name="$1" + local url="$2" + local timeout=5 + + local out + out=$(curl -o /dev/null -s -w "%{time_total}\n" --connect-timeout $timeout -m $timeout "$url" || echo "FAIL") + + if [[ "$out" == "FAIL" ]]; then + echo -e "${RED}[FAIL]${NC} $name — 连接超时 ✗ 需要使用国内镜像" + FAILED_COUNT=$((FAILED_COUNT + 1)) + else + local ms + ms=$(awk -v t="$out" 'BEGIN {print int(t * 1000)}') + if [ "$ms" -gt 1000 ]; then + echo -e "${YELLOW}[SLOW]${NC} $name — 延迟 ${ms}ms ⚠️ 建议开启镜像加速" + FAILED_COUNT=$((FAILED_COUNT + 1)) + else + echo -e "${GREEN}[OK]${NC} $name — 延迟 ${ms}ms" + fi + fi +} + +check_dns() { + if command -v host &> /dev/null; then + if host github.com &> /dev/null; then + echo -e "${GREEN}[OK]${NC} DNS 解析正常" + else + echo -e "${RED}[FAIL]${NC} DNS 解析失败" + FAILED_COUNT=$((FAILED_COUNT + 1)) + fi + elif command -v nslookup &> /dev/null; then + if nslookup github.com &> /dev/null; then + echo -e "${GREEN}[OK]${NC} DNS 解析正常" + else + echo -e "${RED}[FAIL]${NC} DNS 解析失败" + FAILED_COUNT=$((FAILED_COUNT + 1)) + fi + else + if ping -c 1 github.com &> /dev/null; then + echo -e "${GREEN}[OK]${NC} DNS 解析正常" + else + echo -e "${RED}[FAIL]${NC} DNS 解析失败" + FAILED_COUNT=$((FAILED_COUNT + 1)) + fi + fi +} + +check_port() { + local port=$1 + if curl -o /dev/null -s --connect-timeout 5 "http://portquiz.net:$port" &> /dev/null; then + echo -e "${GREEN}[OK]${NC} $port 出站端口开放" + else + if curl -o /dev/null -s --connect-timeout 5 "https://github.com" &> /dev/null && [ "$port" == "443" ]; then + echo -e "${GREEN}[OK]${NC} $port 出站端口开放" + elif curl -o /dev/null -s --connect-timeout 5 "http://gnu.org" &> /dev/null && [ "$port" == "80" ]; then + echo -e "${GREEN}[OK]${NC} $port 出站端口开放" + else + echo -e "${RED}[FAIL]${NC} $port 出站端口可能受限" + fi + fi +} + +echo "检测项目:" +check_url "Docker Hub (hub.docker.com)" "https://hub.docker.com" +check_url "GitHub (github.com)" "https://github.com" +check_url "gcr.io" "https://gcr.io" +check_url "ghcr.io" "https://ghcr.io" +check_dns +check_port 80 +check_port 443 + +echo "" +if [ "$FAILED_COUNT" -ge 2 ]; then + echo -e "建议: 检测到 $FAILED_COUNT 个不可达源或较慢源,建议运行 ./scripts/setup-cn-mirrors.sh" +else + echo -e "网络连通性良好。" +fi diff --git a/scripts/diagnose.sh b/scripts/diagnose.sh new file mode 100755 index 00000000..9237c708 --- /dev/null +++ b/scripts/diagnose.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# ============================================================================= +# HomeLab Stack — Diagnose tool +# ============================================================================= +set -e + +REPORT_FILE="diagnose-report.txt" +BASE_DIR="$(cd "$(dirname "$0")/.." && pwd)" + +echo "Generating diagnosis report... Please wait." + +{ + echo "========================================" + echo " HomeLab Stack Diagnostic Report" + echo " Date: $(date)" + echo "========================================" + echo "" + + echo ">>> System Information" + echo "OS: $(grep PRETTY_NAME /etc/os-release | cut -d= -f2 | tr -d '\"' || uname -a)" + echo "Kernel: $(uname -r)" + echo "Memory: $(free -m | awk 'NR==2{printf "%.2fGB / %.2fGB\n", $3/1024, $2/1024}')" + echo "Disk:" + df -h / + echo "" + + echo ">>> Docker Information" + docker version || echo "Docker not accessible." + echo "" + + echo ">>> Container Status" + docker ps -a --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}' || echo "Unable to list containers." + echo "" + + echo ">>> Recent Error Logs (last 50 lines of docker events / container errors)" + for cid in $(docker ps -a --filter "status=exited" -q); do + name=$(docker inspect --format='{{.Name}}' "$cid" | sed 's/^\///') + echo "--- Logs for exited container: $name ---" + docker logs --tail 20 "$cid" 2>&1 || true + done + echo "" + + echo ">>> Network Connectivity" + if [ -x "$BASE_DIR/scripts/check-connectivity.sh" ]; then + "$BASE_DIR/scripts/check-connectivity.sh" || true + else + bash "$BASE_DIR/scripts/check-connectivity.sh" || true + fi + echo "" + + echo ">>> Configuration Check" + if [ -f "$BASE_DIR/.env" ]; then + echo ".env file exists." + else + echo ".env file MISSING." + fi + if [ -f "$BASE_DIR/config/traefik/traefik.yml" ]; then + echo "traefik.yml exists." + else + echo "traefik.yml MISSING." + fi + echo "========================================" +} > "$REPORT_FILE" + +echo "Report generated at $REPORT_FILE." +cat "$REPORT_FILE" diff --git a/scripts/localize-images.sh b/scripts/localize-images.sh new file mode 100755 index 00000000..1d107297 --- /dev/null +++ b/scripts/localize-images.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# ============================================================================= +# Localize Images in Compose Files +# ============================================================================= +set -e + +BASE_DIR="$(cd "$(dirname "$0")/.." && pwd)" +MIRRORS_FILE="$BASE_DIR/config/cn-mirrors.yml" + +if [ ! -f "$MIRRORS_FILE" ]; then + echo "Error: $MIRRORS_FILE not found." + exit 1 +fi + +ACTION="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --cn) ACTION="cn"; shift ;; + --restore) ACTION="restore"; shift ;; + --dry-run) ACTION="dry-run"; shift ;; + --check) ACTION="check"; shift ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +if [ -z "$ACTION" ]; then + echo "Usage: $0 [--cn | --restore | --dry-run | --check]" + exit 1 +fi + +declare -A MAP +declare -A REVERSE_MAP + +while IFS=":" read -r orig mirror; do + orig=$(echo "$orig" | xargs) + mirror=$(echo "$mirror" | xargs) + if [[ "$orig" != "mirrors" && -n "$orig" && ! "$orig" =~ ^# ]]; then + MAP["$orig"]="$mirror" + REVERSE_MAP["$mirror"]="$orig" + fi +done < "$MIRRORS_FILE" + +find_files() { + find "$BASE_DIR" -name "docker-compose*.yml" -type f +} + +if [ "$ACTION" == "check" ]; then + NEEDS_REPLACE=0 + for f in $(find_files); do + for orig in "${!MAP[@]}"; do + if grep -q "image:.*$orig" "$f"; then + echo "Needs replacement in $f: $orig" + NEEDS_REPLACE=1 + fi + done + done + if [ $NEEDS_REPLACE -eq 0 ]; then + echo "No gcr.io/ghcr.io images found that need replacing." + fi + exit $NEEDS_REPLACE +fi + +if [ "$ACTION" == "dry-run" ]; then + echo "Dry run - would modify:" + for f in $(find_files); do + for orig in "${!MAP[@]}"; do + mirror="${MAP[$orig]}" + if grep -q "$orig" "$f"; then + echo " $f: $orig -> $mirror" + fi + done + done + exit 0 +fi + +if [ "$ACTION" == "cn" ]; then + echo "Replacing with domestic mirrors..." + for f in $(find_files); do + for orig in "${!MAP[@]}"; do + mirror="${MAP[$orig]}" + if sed --version 2>/dev/null | grep -q GNU; then + sed -i "s|$orig|$mirror|g" "$f" + else + sed -i '' "s|$orig|$mirror|g" "$f" + fi + done + done + echo "Done." + exit 0 +fi + +if [ "$ACTION" == "restore" ]; then + echo "Restoring original images..." + for f in $(find_files); do + for mirror in "${!REVERSE_MAP[@]}"; do + orig="${REVERSE_MAP[$mirror]}" + if sed --version 2>/dev/null | grep -q GNU; then + sed -i "s|$mirror|$orig|g" "$f" + else + sed -i '' "s|$mirror|$orig|g" "$f" + fi + done + done + echo "Done." + exit 0 +fi diff --git a/scripts/setup-cn-mirrors.sh b/scripts/setup-cn-mirrors.sh new file mode 100755 index 00000000..364c6da8 --- /dev/null +++ b/scripts/setup-cn-mirrors.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# ============================================================================= +# Setup CN Mirrors (Docker, Apt, Alpine) +# ============================================================================= +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${YELLOW}检测到可能处于中国大陆网络环境。${NC}" +read -p "是否需要配置国内镜像源以加速部署?(y/N): " choice +if [[ ! "$choice" =~ ^[Yy]$ ]]; then + echo "已取消配置国内镜像源。" + exit 0 +fi + +echo -e "${GREEN}==> 配置 Docker 镜像加速...${NC}" + +if [ ! -d /etc/docker ]; then + sudo mkdir -p /etc/docker +fi + +DAEMON_JSON="/etc/docker/daemon.json" +TMP_JSON=$(mktemp) + +if [ -f "$DAEMON_JSON" ]; then + sudo cp "$DAEMON_JSON" "$TMP_JSON" +else + echo "{}" > "$TMP_JSON" +fi + +if command -v jq &>/dev/null; then + jq '. + {"registry-mirrors": ["https://docker.m.daocloud.io", "https://mirror.gcr.io", "https://hub-mirror.c.163.com", "https://mirror.baidubce.com"]}' "$TMP_JSON" > "${TMP_JSON}.tmp" + mv "${TMP_JSON}.tmp" "$TMP_JSON" +else + cat > "$TMP_JSON" < 验证 Docker 镜像配置...${NC}" +if sudo docker pull hello-world > /dev/null; then + echo -e "${GREEN}[OK] 镜像拉取成功!${NC}" +else + echo -e "${RED}[FAIL] 镜像拉取失败,请检查网络。${NC}" +fi + +echo -e "${GREEN}==> 配置项目脚本中的 apt/apk 镜像加速...${NC}" +BASE_DIR="$(cd "$(dirname "$0")/.." && pwd)" +find "$BASE_DIR/stacks" "$BASE_DIR/scripts" -type f -name "*.sh" -exec sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' {} + || true +find "$BASE_DIR/stacks" "$BASE_DIR/scripts" -type f -name "*.sh" -exec sed -i 's|dl-cdn.alpinelinux.org|mirrors.ustc.edu.cn|g' {} + || true + +echo -e "${GREEN}完成国内网络适配配置。${NC}" diff --git a/scripts/wait-healthy.sh b/scripts/wait-healthy.sh new file mode 100755 index 00000000..b5bc0250 --- /dev/null +++ b/scripts/wait-healthy.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# ============================================================================= +# Wait for Stack to be Healthy +# ============================================================================= +set -e + +STACK_NAME="" +TIMEOUT=300 + +while [[ $# -gt 0 ]]; do + case "$1" in + --stack) STACK_NAME="$2"; shift 2 ;; + --timeout) TIMEOUT="$2"; shift 2 ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +if [ -z "$STACK_NAME" ]; then + echo "Usage: $0 --stack [--timeout 300]" + exit 1 +fi + +BASE_DIR="$(cd "$(dirname "$0")/.." && pwd)" +COMPOSE_FILE="$BASE_DIR/stacks/$STACK_NAME/docker-compose.yml" +if [ "$STACK_NAME" == "base" ]; then + COMPOSE_FILE="$BASE_DIR/docker-compose.base.yml" +fi + +if [ ! -f "$COMPOSE_FILE" ]; then + echo "Error: Compose file $COMPOSE_FILE not found." + exit 1 +fi + +echo "Waiting for stack '$STACK_NAME' to be healthy (timeout: ${TIMEOUT}s)..." + +END_TIME=$(( SECONDS + TIMEOUT )) + +while [ $SECONDS -lt $END_TIME ]; do + EXITED=$(docker compose -f "$COMPOSE_FILE" ps --status exited -q) + if [ -n "$EXITED" ]; then + echo "Error: One or more containers exited prematurely." + docker compose -f "$COMPOSE_FILE" logs --tail 50 + exit 2 + fi + + ALL_CONTAINERS=$(docker compose -f "$COMPOSE_FILE" ps -q) + if [ -z "$ALL_CONTAINERS" ]; then + echo "No containers found for stack." + sleep 5 + continue + fi + + ALL_HEALTHY=true + for cid in $ALL_CONTAINERS; do + STATUS=$(docker inspect --format='{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' "$cid") + if [ "$STATUS" != "healthy" ] && [ "$STATUS" != "running" ]; then + ALL_HEALTHY=false + break + fi + done + + if $ALL_HEALTHY; then + echo "Stack '$STACK_NAME' is healthy." + exit 0 + fi + + sleep 5 +done + +echo "Timeout waiting for stack '$STACK_NAME' to be healthy." +for cid in $(docker compose -f "$COMPOSE_FILE" ps -q); do + STATUS=$(docker inspect --format='{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}' "$cid") + if [ "$STATUS" != "healthy" ] && [ "$STATUS" != "running" ]; then + NAME=$(docker inspect --format='{{.Name}}' "$cid" | sed 's/^\///') + echo "Logs for unhealthy container: $NAME" + docker logs --tail 50 "$cid" + fi +done + +exit 1