- Multi-modal AI infrastructure for RunPod RTX 4090 - Automatic model orchestration (text, image, music) - Text: vLLM + Qwen 2.5 7B Instruct - Image: Flux.1 Schnell via OpenEDAI - Music: MusicGen Medium via AudioCraft - Cost-optimized sequential loading on single GPU - Template preparation scripts for rapid deployment - Comprehensive documentation (README, DEPLOYMENT, TEMPLATE)
39 lines
870 B
Docker
39 lines
870 B
Docker
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
|
|
|
WORKDIR /app
|
|
|
|
# Install Python and system dependencies
|
|
RUN apt-get update && apt-get install -y \
|
|
python3.10 \
|
|
python3-pip \
|
|
ffmpeg \
|
|
git \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Upgrade pip
|
|
RUN pip3 install --no-cache-dir --upgrade pip
|
|
|
|
# Install PyTorch with CUDA support
|
|
RUN pip3 install --no-cache-dir torch==2.1.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
|
|
|
|
# Copy requirements and install dependencies
|
|
COPY requirements.txt .
|
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
|
|
|
# Copy application code
|
|
COPY server.py .
|
|
|
|
# Create directory for model cache
|
|
RUN mkdir -p /app/models
|
|
|
|
# Environment variables
|
|
ENV HF_HOME=/app/models
|
|
ENV TORCH_HOME=/app/models
|
|
ENV MODEL_NAME=facebook/musicgen-medium
|
|
|
|
# Expose port
|
|
EXPOSE 8000
|
|
|
|
# Run the server
|
|
CMD ["python3", "server.py"]
|