# Model Registry for AI Orchestrator # Add new models by appending to this file models: # Text Generation Models qwen-2.5-7b: type: text framework: vllm docker_service: vllm-qwen port: 8001 vram_gb: 14 startup_time_seconds: 120 endpoint: /v1/chat/completions description: "Qwen 2.5 7B Instruct - Fast text generation, no authentication required" # Image Generation Models flux-schnell: type: image framework: openedai-images docker_service: flux port: 8002 vram_gb: 14 startup_time_seconds: 60 endpoint: /v1/images/generations description: "Flux.1 Schnell - Fast high-quality image generation (4-5 sec/image)" # Music Generation Models musicgen-medium: type: audio framework: audiocraft docker_service: musicgen port: 8003 vram_gb: 11 startup_time_seconds: 45 endpoint: /v1/audio/generations description: "MusicGen Medium - Text-to-music generation (60-90 sec for 30s audio)" # Example: Add more models easily by uncommenting and customizing below # Future Text Models: # llama-3.1-8b: # type: text # framework: vllm # docker_service: vllm-llama # port: 8004 # vram_gb: 17 # startup_time_seconds: 120 # endpoint: /v1/chat/completions # description: "Llama 3.1 8B Instruct - Meta's latest model" # Future Image Models: # sdxl: # type: image # framework: openedai-images # docker_service: sdxl # port: 8005 # vram_gb: 10 # startup_time_seconds: 45 # endpoint: /v1/images/generations # description: "Stable Diffusion XL - High quality image generation" # Future Audio Models: # whisper-large: # type: audio # framework: faster-whisper # docker_service: whisper # port: 8006 # vram_gb: 3 # startup_time_seconds: 30 # endpoint: /v1/audio/transcriptions # description: "Whisper Large v3 - Speech-to-text transcription" # # xtts-v2: # type: audio # framework: openedai-speech # docker_service: tts # port: 8007 # vram_gb: 3 # startup_time_seconds: 30 # endpoint: /v1/audio/speech # description: "XTTS v2 - High-quality text-to-speech with voice cloning" # Configuration config: gpu_memory_total_gb: 24 allow_concurrent_loading: false # Sequential loading only model_switch_timeout_seconds: 300 # 5 minutes max for model switching health_check_interval_seconds: 10 default_model: qwen-2.5-7b