services:
  fyp-inference-prod:
    container_name: fyp-inference-prod
    build:
      context: ../../
      dockerfile: compose/dev/inference/Dockerfile
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped
    environment:
      - INFERENCE_HTTP_HOST=0.0.0.0
      - INFERENCE_HTTP_PORT=8001
      - NVIDIA_VISIBLE_DEVICES=all
      - WATCHFILES_FORCE_POLLING=true
      - PYTHONPATH=/app
      - HF_HOME=/root/.cache/huggingface
      - HF_HUB_OFFLINE=1
    volumes:
      - ../../:/app
      - ../../models:/app/models
      - ../../hf_cache:/root/.cache/huggingface
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.llm.rule=Host(`${LLM_DOMAIN_NAME}`)"
      - "traefik.http.routers.llm.entrypoints=web"
      - "traefik.http.services.llm.loadbalancer.server.port=${LLM_PORT}"
      - "traefik.docker.network=proxy-net"
    networks:
      - proxy-net

networks:
  proxy-net:
    external: true