Add Llama 3.1 8B model to orchestrator

This commit is contained in:
2025-11-21 21:29:57 +01:00
parent c4426ccc58
commit 7edf17551a

View File

@@ -13,6 +13,16 @@ models:
endpoint: /v1/chat/completions
description: "Qwen 2.5 7B Instruct - Fast text generation, no authentication required"
llama-3.1-8b:
type: text
framework: vllm
service_script: models/vllm/server.py
port: 8001
vram_gb: 17
startup_time_seconds: 120
endpoint: /v1/chat/completions
description: "Llama 3.1 8B Instruct - Meta's latest model"
# Example: Add more models easily by uncommenting and customizing below
# Future Text Models: