2026-02-26 01:32:04 +00:00
|
|
|
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS builder
|
|
|
|
|
WORKDIR /build
|
|
|
|
|
|
|
|
|
|
RUN apt-get update && apt-get install -y python3.10 python3-pip python3-dev cmake git
|
|
|
|
|
|
|
|
|
|
COPY requirements/inference.txt .
|
|
|
|
|
|
|
|
|
|
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
|
|
|
|
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
|
|
|
|
|
|
|
|
|
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
|
|
|
|
|
|
|
|
|
|
ENV CMAKE_ARGS="-DGGML_CUDA=on -DLLAVA_BUILD=off"
|
|
|
|
|
ENV FORCE_CMAKE=1
|
|
|
|
|
|
|
|
|
|
RUN pip install --no-cache-dir llama-cpp-python
|
|
|
|
|
RUN pip install --no-cache-dir -r inference.txt
|
|
|
|
|
|
|
|
|
|
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
|
|
|
|
|
WORKDIR /app
|
|
|
|
|
|
|
|
|
|
RUN apt-get update && apt-get install -y python3.10 python3-pip && \
|
|
|
|
|
rm -rf /var/lib/apt/lists/* && \
|
|
|
|
|
ln -sf /usr/bin/python3 /usr/bin/python
|
|
|
|
|
|
|
|
|
|
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
|
|
|
|
|
COPY --from=builder /usr/local/bin /usr/local/bin
|
|
|
|
|
|
2026-02-27 11:42:26 +00:00
|
|
|
COPY gpu_server.py .
|
2026-02-26 01:32:04 +00:00
|
|
|
|
|
|
|
|
ENV PYTHONUNBUFFERED=1
|
|
|
|
|
ENV PYTHONPATH=/app
|
|
|
|
|
EXPOSE 8001
|
|
|
|
|
|
|
|
|
|
CMD ["python", "-m", "uvicorn", "gpu_server:app", "--host", "0.0.0.0", "--port", "8001"]
|