improve cache

This commit is contained in:
2026-05-18 01:02:57 +02:00
parent 5211b2f1a0
commit 7cf16dcace
4 changed files with 16 additions and 12 deletions

View File

@@ -5,7 +5,8 @@
"Bash(python -m pytest --tb=short -q)",
"Bash(python -m pytest tests/test_slot_tracker.py -v)",
"Bash(python -m pytest tests/test_scheduler.py -v)",
"Bash(python -m pytest tests/test_monitor.py::TestMonitorEndpoints::test_monitor_data_structure -v)"
"Bash(python -m pytest tests/test_monitor.py::TestMonitorEndpoints::test_monitor_data_structure -v)",
"Bash(python -m pytest -q)"
]
}
}

View File

@@ -23,6 +23,8 @@ class ProxyStats:
model_requests: dict[str, int] = field(default_factory=dict)
model_tokens: dict[str, int] = field(default_factory=dict)
backend_requests: dict[str, int] = field(default_factory=dict)
backend_session_hits: dict[str, int] = field(default_factory=dict)
backend_session_misses: dict[str, int] = field(default_factory=dict)
def increment_requests(self) -> None:
self.total_requests += 1
@@ -37,13 +39,16 @@ class ProxyStats:
def record_backend(self, url: str) -> None:
self.backend_requests[url] = self.backend_requests.get(url, 0) + 1
def record_session(self, had_session: bool, preferred_url: str | None, actual_url: str) -> None:
if had_session and preferred_url:
def record_session(self, preferred_url: str | None, actual_url: str) -> None:
if preferred_url:
if actual_url == preferred_url:
self.session_hits += 1
self.backend_session_hits[actual_url] = self.backend_session_hits.get(actual_url, 0) + 1
else:
self.session_misses += 1
elif not had_session:
# Count miss against the preferred backend (the one that was expected but missed).
self.backend_session_misses[preferred_url] = self.backend_session_misses.get(preferred_url, 0) + 1
else:
self.new_sessions += 1
def session_hit_rate(self) -> int | None:
@@ -287,8 +292,8 @@ def build_router(
backend_stats = {
url: {
"requests": count,
"session_hits": 0,
"session_misses": 0,
"session_hits": stats.backend_session_hits.get(url, 0),
"session_misses": stats.backend_session_misses.get(url, 0),
}
for url, count in stats.backend_requests.items()
}

View File

@@ -281,9 +281,7 @@ async def _inference_endpoint(
if not incoming_session_id:
await _recover_session_affinity(session_id, body.get("messages") or [], session_store)
preferred_url: str | None = None
if incoming_session_id:
preferred_url = await session_store.get_preferred_backend(session_id)
preferred_url = await session_store.get_preferred_backend(session_id)
result = await _dispatch_entry(
request_queue, stats, config, slot_tracker, scheduler, model_id, session_id, body
@@ -319,7 +317,7 @@ async def _inference_endpoint(
stats.record_model(model_id, _estimate_tokens(body))
stats.record_backend(backend.url)
stats.record_session(bool(incoming_session_id), preferred_url, backend.url)
stats.record_session(preferred_url, backend.url)
if model_id:
messages = body.get("messages", [])

View File

@@ -62,7 +62,7 @@ class SessionStore:
session.model_id = model_id
if messages is not None:
session.last_message_index = len(messages)
session.prefix_hash = compute_prefix_hash(messages)
session.prefix_hash = compute_prefix_hash([messages[-1]])
if preferred_backend is not None:
session.preferred_backend = preferred_backend
session.touch()
@@ -105,7 +105,7 @@ class SessionStore:
k = s.last_message_index
if s.is_expired(self._ttl) or not s.preferred_backend or not s.prefix_hash or k == 0 or k > len(messages):
return 0
return k if compute_prefix_hash(messages[:k]) == s.prefix_hash else 0
return k if compute_prefix_hash([messages[k - 1]]) == s.prefix_hash else 0
async def find_by_prefix(self, messages: list[dict]) -> str | None:
"""Return the preferred backend whose stored conversation is a prefix of messages.