-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompose.yaml
More file actions
49 lines (44 loc) · 1.38 KB
/
compose.yaml
File metadata and controls
49 lines (44 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
name: gemma-local
x-runtime-env: &runtime-env
MODEL_PREPARE_MODE: ${MODEL_PREPARE_MODE:-init}
SERVICE_PORT: "8080"
MODEL_ALIAS: ${MODEL_ALIAS:-gemma-4-e2b-it-q4km-local}
MODEL_PATH: ${MODEL_PATH:-/models/gemma-4-E2B-it-Q4_K_M.gguf}
MODEL_URL: ${MODEL_URL:-https://www.modelscope.cn/models/lmstudio-community/gemma-4-E2B-it-GGUF/resolve/master/gemma-4-E2B-it-Q4_K_M.gguf}
MODEL_SHA256: ${MODEL_SHA256:-406607e5fffbfaea5f7ce78cfc02ef8e8c4eb8618e365b6f6da441c04e4d6066}
CONTEXT_SIZE: ${CONTEXT_SIZE:-8192}
BATCH_SIZE: ${BATCH_SIZE:-256}
LLAMA_THREADS: ${LLAMA_THREADS:-8}
HTTP_PROXY: ${HTTP_PROXY:-}
HTTPS_PROXY: ${HTTPS_PROXY:-}
NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,::1,host.docker.internal}
x-runtime-common: &runtime-common
image: ${IMAGE_REPO:-ghcr.io/wilsonwu/run-gemma-4}:${IMAGE_TAG:-latest}
volumes:
- model-cache:/models
- tmp:/tmp
environment: *runtime-env
services:
prepare-model:
<<: *runtime-common
entrypoint:
- /usr/local/bin/prepare-model.sh
restart: "no"
gemma:
<<: *runtime-common
depends_on:
prepare-model:
condition: service_completed_successfully
ports:
- ${HOST_PORT:-8080}:8080
healthcheck:
test:
- CMD-SHELL
- bash -lc 'exec 3<>/dev/tcp/127.0.0.1/8080'
interval: 15s
timeout: 5s
retries: 20
start_period: 20s
volumes:
model-cache:
tmp: