Files
runpod/model-orchestrator/models.yaml
Sebastian Krüger 57b706abe6 fix: correct vLLM service port to 8000
- Updated qwen-2.5-7b port from 8001 to 8000 in models.yaml
- Matches actual vLLM server default port configuration
- Tested and verified: orchestrator successfully loaded model and generated response
2025-11-21 16:28:54 +01:00

90 lines
2.4 KiB
YAML

# Model Registry for AI Orchestrator
# Add new models by appending to this file
models:
# Text Generation Models
qwen-2.5-7b:
type: text
framework: vllm
service_script: models/vllm/server.py
port: 8000
vram_gb: 14
startup_time_seconds: 120
endpoint: /v1/chat/completions
description: "Qwen 2.5 7B Instruct - Fast text generation, no authentication required"
# Image Generation Models
flux-schnell:
type: image
framework: openedai-images
service_script: models/flux/server.py
port: 8002
vram_gb: 14
startup_time_seconds: 60
endpoint: /v1/images/generations
description: "Flux.1 Schnell - Fast high-quality image generation (4-5 sec/image)"
# Music Generation Models
musicgen-medium:
type: audio
framework: audiocraft
service_script: models/musicgen/server.py
port: 8003
vram_gb: 11
startup_time_seconds: 45
endpoint: /v1/audio/generations
description: "MusicGen Medium - Text-to-music generation (60-90 sec for 30s audio)"
# Example: Add more models easily by uncommenting and customizing below
# Future Text Models:
# llama-3.1-8b:
# type: text
# framework: vllm
# docker_service: vllm-llama
# port: 8004
# vram_gb: 17
# startup_time_seconds: 120
# endpoint: /v1/chat/completions
# description: "Llama 3.1 8B Instruct - Meta's latest model"
# Future Image Models:
# sdxl:
# type: image
# framework: openedai-images
# docker_service: sdxl
# port: 8005
# vram_gb: 10
# startup_time_seconds: 45
# endpoint: /v1/images/generations
# description: "Stable Diffusion XL - High quality image generation"
# Future Audio Models:
# whisper-large:
# type: audio
# framework: faster-whisper
# docker_service: whisper
# port: 8006
# vram_gb: 3
# startup_time_seconds: 30
# endpoint: /v1/audio/transcriptions
# description: "Whisper Large v3 - Speech-to-text transcription"
#
# xtts-v2:
# type: audio
# framework: openedai-speech
# docker_service: tts
# port: 8007
# vram_gb: 3
# startup_time_seconds: 30
# endpoint: /v1/audio/speech
# description: "XTTS v2 - High-quality text-to-speech with voice cloning"
# Configuration
config:
gpu_memory_total_gb: 24
allow_concurrent_loading: false # Sequential loading only
model_switch_timeout_seconds: 300 # 5 minutes max for model switching
health_check_interval_seconds: 10
default_model: qwen-2.5-7b