diff --git a/model-orchestrator/models.yaml b/model-orchestrator/models.yaml index ac5d2f1..c78a529 100644 --- a/model-orchestrator/models.yaml +++ b/model-orchestrator/models.yaml @@ -13,6 +13,16 @@ models: endpoint: /v1/chat/completions description: "Qwen 2.5 7B Instruct - Fast text generation, no authentication required" + llama-3.1-8b: + type: text + framework: vllm + service_script: models/vllm/server.py + port: 8001 + vram_gb: 17 + startup_time_seconds: 120 + endpoint: /v1/chat/completions + description: "Llama 3.1 8B Instruct - Meta's latest model" + # Example: Add more models easily by uncommenting and customizing below # Future Text Models: