add per model limits

This commit is contained in:
2026-05-17 23:07:21 +02:00
parent 02d9747f68
commit 1dbb1c7f6f
8 changed files with 154 additions and 6 deletions

View File

@@ -5,16 +5,24 @@
"poll_interval": 5,
"slot_wait_timeout": 30,
"session_idle_ttl": 300,
"default_slot_capacity": 1,
"default_max_models": 1,
"max_queue_skip": 4,
"model_limits": {
"my-very-large-model": 1
},
"backends": [
{
"url": "http://localhost:8081",
"api_key": null,
"model_ids": []
"model_ids": [],
"max_models": 1
},
{
"url": "http://localhost:8082",
"api_key": "backend-secret",
"model_ids": ["llama3"]
"model_ids": ["llama3"],
"max_models": null
}
]
}