Dynavera/compose/dev/inference/Dockerfile

35 lines
No EOL
1.1 KiB
Docker

FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS builder
WORKDIR /build
RUN apt-get update && apt-get install -y python3.10 python3-pip python3-dev cmake git
COPY requirements/inference.txt .
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
ENV CMAKE_ARGS="-DGGML_CUDA=on -DLLAVA_BUILD=off"
ENV FORCE_CMAKE=1
RUN pip install --no-cache-dir llama-cpp-python
RUN pip install --no-cache-dir -r inference.txt
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
WORKDIR /app
RUN apt-get update && apt-get install -y python3.10 python3-pip && \
rm -rf /var/lib/apt/lists/* && \
ln -sf /usr/bin/python3 /usr/bin/python
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin
COPY gpu_server.py .
ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app
EXPOSE 8001
CMD ["python", "-m", "uvicorn", "gpu_server:app", "--host", "0.0.0.0", "--port", "8001"]