Add Llama 3.1 8B model to orchestrator
This commit is contained in:
@@ -13,6 +13,16 @@ models:
|
|||||||
endpoint: /v1/chat/completions
|
endpoint: /v1/chat/completions
|
||||||
description: "Qwen 2.5 7B Instruct - Fast text generation, no authentication required"
|
description: "Qwen 2.5 7B Instruct - Fast text generation, no authentication required"
|
||||||
|
|
||||||
|
llama-3.1-8b:
|
||||||
|
type: text
|
||||||
|
framework: vllm
|
||||||
|
service_script: models/vllm/server.py
|
||||||
|
port: 8001
|
||||||
|
vram_gb: 17
|
||||||
|
startup_time_seconds: 120
|
||||||
|
endpoint: /v1/chat/completions
|
||||||
|
description: "Llama 3.1 8B Instruct - Meta's latest model"
|
||||||
|
|
||||||
# Example: Add more models easily by uncommenting and customizing below
|
# Example: Add more models easily by uncommenting and customizing below
|
||||||
|
|
||||||
# Future Text Models:
|
# Future Text Models:
|
||||||
|
|||||||
Reference in New Issue
Block a user