run-gemma-4/compose.yaml at main · wilsonwu/run-gemma-4 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
name: gemma-local

x-runtime-env: &runtime-env
  MODEL_PREPARE_MODE: ${MODEL_PREPARE_MODE:-init}
  SERVICE_PORT: "8080"
  MODEL_ALIAS: ${MODEL_ALIAS:-gemma-4-e2b-it-q4km-local}
  MODEL_PATH: ${MODEL_PATH:-/models/gemma-4-E2B-it-Q4_K_M.gguf}
  MODEL_URL: ${MODEL_URL:-https://www.modelscope.cn/models/lmstudio-community/gemma-4-E2B-it-GGUF/resolve/master/gemma-4-E2B-it-Q4_K_M.gguf}
  MODEL_SHA256: ${MODEL_SHA256:-406607e5fffbfaea5f7ce78cfc02ef8e8c4eb8618e365b6f6da441c04e4d6066}
  CONTEXT_SIZE: ${CONTEXT_SIZE:-8192}
  BATCH_SIZE: ${BATCH_SIZE:-256}
  LLAMA_THREADS: ${LLAMA_THREADS:-8}
  HTTP_PROXY: ${HTTP_PROXY:-}
  HTTPS_PROXY: ${HTTPS_PROXY:-}
  NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,::1,host.docker.internal}

x-runtime-common: &runtime-common
  image: ${IMAGE_REPO:-ghcr.io/wilsonwu/run-gemma-4}:${IMAGE_TAG:-latest}
  volumes:
    - model-cache:/models
    - tmp:/tmp
  environment: *runtime-env

services:
  prepare-model:
    <<: *runtime-common
    entrypoint:
      - /usr/local/bin/prepare-model.sh
    restart: "no"

  gemma:
    <<: *runtime-common
    depends_on:
      prepare-model:
        condition: service_completed_successfully
    ports:
      - ${HOST_PORT:-8080}:8080
    healthcheck:
      test:
        - CMD-SHELL
        - bash -lc 'exec 3<>/dev/tcp/127.0.0.1/8080'
      interval: 15s
      timeout: 5s
      retries: 20
      start_period: 20s

volumes:
  model-cache:
  tmp: