kimodo/docker-compose.yaml at main · nv-tlabs/kimodo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
services:
  text-encoder:
    build:
      context: .
      dockerfile: Dockerfile
    image: kimodo:1.0
    container_name: text-encoder
    working_dir: /workspace
    command: python -m kimodo.scripts.run_text_encoder_server
    volumes:
      - ./:/workspace
      # Cache HF downloads in host "system-wide" Hugging Face cache.
      - ${HOME}/.cache/huggingface:/workspace/.cache/huggingface
      # Mount the host HF auth token at the standard cache location in-container.
      - ${HOME}/.cache/huggingface/token:/workspace/.cache/huggingface/token:ro
    # expose to your host browser
    ports:
      - "9550:9550"
    environment:
      # Make Gradio reachable from other containers
      # - GRADIO_SERVER_NAME=0.0.0.0
      # - GRADIO_SERVER_PORT=9550
      - HF_HOME=/workspace/.cache/huggingface
      # Host user mapping (for non-root ownership + proper shell prompt)
      - HOST_USER=${USER:-user}

      # GPU
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility

    shm_size: "16gb"
    ipc: host

    # Wait until Gradio responds on HTTP
    healthcheck:
      test:
        ["CMD", "bash", "-lc", "curl -fsS http://localhost:9550/ > /dev/null"]
      interval: 3s
      timeout: 2s
      retries: 40

    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]

  demo:
    build:
      context: .
      dockerfile: Dockerfile
    image: kimodo:1.0
    container_name: demo
    working_dir: /workspace
    command: python -m kimodo.demo
    volumes:
      - ./:/workspace
      - ${HOME}/.cache/huggingface:/workspace/.cache/huggingface
      - ${HOME}/.cache/huggingface/token:/workspace/.cache/huggingface/token:ro
      # Explicit checkpoint mount (avoids surprises if the repo bind mount isn't what you expect).
      - ./checkpoints:/workspace/checkpoints:ro
    ports:
      - "${SERVER_PORT:-7860}:${SERVER_PORT:-7860}"
    environment:
      # Point the model at the text-encoder service.
      - TEXT_ENCODER_URL=http://text-encoder:9550/
      # Make checkpoint paths robust (Hydra config reads this).
      - SERVER_PORT=${SERVER_PORT:-7860}
      - HF_HOME=/workspace/.cache/huggingface
      # Host user mapping (for non-root ownership + proper shell prompt)
      - HOST_USER=${USER:-user}

      # GPU
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility

    shm_size: "16gb"
    ipc: host

    depends_on:
      text-encoder:
        condition: service_healthy

    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]