31 lines
644 B
JSON
31 lines
644 B
JSON
{
|
|
"host": "0.0.0.0",
|
|
"port": 8080,
|
|
"api_keys": ["your-secret-key"],
|
|
"poll_interval": 5,
|
|
"slot_wait_timeout": 30,
|
|
"session_idle_ttl": 300,
|
|
"default_slot_capacity": 1,
|
|
"default_max_models": 1,
|
|
"model_affinity_sched_bonus": 10,
|
|
"queue_aging_equalization": 30.0,
|
|
"model_unload_delay": 3.0,
|
|
"model_limits": {
|
|
"my-very-large-model": 1
|
|
},
|
|
"backends": [
|
|
{
|
|
"url": "http://localhost:8081",
|
|
"api_key": null,
|
|
"model_ids": [],
|
|
"max_models": 1
|
|
},
|
|
{
|
|
"url": "http://localhost:8082",
|
|
"api_key": "backend-secret",
|
|
"model_ids": ["llama3"],
|
|
"max_models": null
|
|
}
|
|
]
|
|
}
|