38 lines
1 KiB
YAML
38 lines
1 KiB
YAML
services:
|
|
fyp-inference-prod:
|
|
container_name: fyp-inference-prod
|
|
build:
|
|
context: ../../
|
|
dockerfile: compose/dev/inference/Dockerfile
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
restart: unless-stopped
|
|
environment:
|
|
- INFERENCE_HTTP_HOST=0.0.0.0
|
|
- INFERENCE_HTTP_PORT=8001
|
|
- NVIDIA_VISIBLE_DEVICES=all
|
|
- WATCHFILES_FORCE_POLLING=true
|
|
- PYTHONPATH=/app
|
|
- HF_HOME=/root/.cache/huggingface
|
|
- HF_HUB_OFFLINE=1
|
|
volumes:
|
|
- ../../:/app
|
|
- ../../models:/app/models
|
|
- ../../hf_cache:/root/.cache/huggingface
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.llm.rule=Host(`${LLM_DOMAIN_NAME}`)"
|
|
- "traefik.http.routers.llm.entrypoints=web"
|
|
- "traefik.http.services.llm.loadbalancer.server.port=${LLM_PORT}"
|
|
- "traefik.docker.network=proxy-net"
|
|
networks:
|
|
- proxy-net
|
|
|
|
networks:
|
|
proxy-net:
|
|
external: true
|