services: fyp-inference-prod: container_name: fyp-inference-prod build: context: ../../ dockerfile: compose/dev/inference/Dockerfile deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped environment: - INFERENCE_HTTP_HOST=0.0.0.0 - INFERENCE_HTTP_PORT=8001 - NVIDIA_VISIBLE_DEVICES=all - WATCHFILES_FORCE_POLLING=true - PYTHONPATH=/app - HF_HOME=/root/.cache/huggingface - HF_HUB_OFFLINE=1 volumes: - ../../:/app - ../../models:/app/models - ../../hf_cache:/root/.cache/huggingface labels: - "traefik.enable=true" - "traefik.http.routers.llm.rule=Host(`${LLM_DOMAIN_NAME}`)" - "traefik.http.routers.llm.entrypoints=web" - "traefik.http.services.llm.loadbalancer.server.port=${LLM_PORT}" - "traefik.docker.network=proxy-net" networks: - proxy-net networks: proxy-net: external: true