Files
runpod/vllm/Dockerfile

35 lines
694 B
Docker
Raw Normal View History

FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
WORKDIR /app
# Install Python and system dependencies
RUN apt-get update && apt-get install -y \
python3.11 \
python3-pip \
git \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip
RUN pip3 install --no-cache-dir --upgrade pip
# Install vLLM and dependencies
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt
# Copy application code
COPY server.py .
# Create directory for model cache
RUN mkdir -p /workspace/huggingface_cache
# Environment variables
ENV HF_HOME=/workspace/huggingface_cache
ENV VLLM_HOST=0.0.0.0
ENV VLLM_PORT=8000
# Expose port
EXPOSE 8000
# Run the server
CMD ["python3", "server.py"]