# ============================================================================ # vLLM Model Configuration # ============================================================================ # # This configuration file defines all available vLLM models for download. # Models are organized by category: text generation and text embeddings. # # Each model entry contains: # - repo_id: HuggingFace repository identifier # - description: Human-readable description # - size_gb: Approximate size in gigabytes # - essential: Whether this is an essential model (true/false) # - category: Model category (text_generation/embedding) # # ============================================================================ # Global settings settings: cache_dir: /workspace/huggingface_cache parallel_downloads: 1 retry_attempts: 3 timeout_seconds: 3600 # Model categories model_categories: # ========================================================================== # TEXT GENERATION MODELS (vLLM) # ========================================================================== text_generation_models: - repo_id: Qwen/Qwen2.5-7B-Instruct description: Qwen 2.5 7B Instruct - Advanced multilingual reasoning size_gb: 14 essential: true category: text_generation type: vllm format: safetensors vram_gb: 14 context_length: 32768 notes: Latest Qwen 2.5 model with enhanced reasoning capabilities files: - source: "model.safetensors" dest: "model.safetensors" - repo_id: meta-llama/Llama-3.1-8B-Instruct description: Llama 3.1 8B Instruct - Meta's latest instruction-tuned model size_gb: 17 essential: true category: text_generation type: vllm format: safetensors vram_gb: 17 context_length: 131072 notes: Extended 128K context length, excellent for long-form tasks files: - source: "model.safetensors" dest: "model.safetensors" # ========================================================================== # TEXT EMBEDDING MODELS (vLLM) # ========================================================================== embedding_models: - repo_id: BAAI/bge-large-en-v1.5 description: BGE Large English v1.5 - High-quality embeddings for RAG size_gb: 1.3 essential: true category: embedding type: vllm_embedding format: safetensors vram_gb: 3 embedding_dimensions: 1024 max_tokens: 512 notes: Top-tier MTEB scores, excellent for semantic search and RAG applications files: - source: "model.safetensors" dest: "model.safetensors" # ============================================================================ # STORAGE & VRAM SUMMARIES # ============================================================================ storage_requirements: text_generation: 31 # Qwen 2.5 7B + Llama 3.1 8B embedding: 1.3 # BGE Large total: 32.3 # Total essential storage vram_requirements: # For 24GB GPU (RTX 4090) simultaneous_loadable: - name: Qwen 2.5 7B Only models: [Qwen 2.5 7B Instruct] vram_used: 14 remaining: 10 - name: Llama 3.1 8B Only models: [Llama 3.1 8B Instruct] vram_used: 17 remaining: 7 - name: BGE Large Only models: [BGE Large] vram_used: 3 remaining: 21 - name: Qwen + BGE Embedding models: [Qwen 2.5 7B, BGE Large] vram_used: 17 remaining: 7 - name: Llama + BGE Embedding models: [Llama 3.1 8B, BGE Large] vram_used: 20 remaining: 4 # ============================================================================ # METADATA # ============================================================================ metadata: version: 1.0.0 last_updated: 2025-11-25 compatible_with: - vLLM >= 0.6.0 - Python >= 3.10 - HuggingFace Hub >= 0.20.0 maintainer: Valknar repository: https://github.com/yourusername/runpod