model: meta-llama/Llama-3.1-8B-Instruct host: "0.0.0.0" port: 8001 uvicorn-log-level: "info" gpu-memory-utilization: 0.70 max-model-len: 16384 dtype: auto enforce-eager: false enable-auto-tool-choice: true tool-call-parser: "llama3_json"