improve cache
This commit is contained in:
@@ -5,7 +5,8 @@
|
||||
"Bash(python -m pytest --tb=short -q)",
|
||||
"Bash(python -m pytest tests/test_slot_tracker.py -v)",
|
||||
"Bash(python -m pytest tests/test_scheduler.py -v)",
|
||||
"Bash(python -m pytest tests/test_monitor.py::TestMonitorEndpoints::test_monitor_data_structure -v)"
|
||||
"Bash(python -m pytest tests/test_monitor.py::TestMonitorEndpoints::test_monitor_data_structure -v)",
|
||||
"Bash(python -m pytest -q)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,8 @@ class ProxyStats:
|
||||
model_requests: dict[str, int] = field(default_factory=dict)
|
||||
model_tokens: dict[str, int] = field(default_factory=dict)
|
||||
backend_requests: dict[str, int] = field(default_factory=dict)
|
||||
backend_session_hits: dict[str, int] = field(default_factory=dict)
|
||||
backend_session_misses: dict[str, int] = field(default_factory=dict)
|
||||
|
||||
def increment_requests(self) -> None:
|
||||
self.total_requests += 1
|
||||
@@ -37,13 +39,16 @@ class ProxyStats:
|
||||
def record_backend(self, url: str) -> None:
|
||||
self.backend_requests[url] = self.backend_requests.get(url, 0) + 1
|
||||
|
||||
def record_session(self, had_session: bool, preferred_url: str | None, actual_url: str) -> None:
|
||||
if had_session and preferred_url:
|
||||
def record_session(self, preferred_url: str | None, actual_url: str) -> None:
|
||||
if preferred_url:
|
||||
if actual_url == preferred_url:
|
||||
self.session_hits += 1
|
||||
self.backend_session_hits[actual_url] = self.backend_session_hits.get(actual_url, 0) + 1
|
||||
else:
|
||||
self.session_misses += 1
|
||||
elif not had_session:
|
||||
# Count miss against the preferred backend (the one that was expected but missed).
|
||||
self.backend_session_misses[preferred_url] = self.backend_session_misses.get(preferred_url, 0) + 1
|
||||
else:
|
||||
self.new_sessions += 1
|
||||
|
||||
def session_hit_rate(self) -> int | None:
|
||||
@@ -287,8 +292,8 @@ def build_router(
|
||||
backend_stats = {
|
||||
url: {
|
||||
"requests": count,
|
||||
"session_hits": 0,
|
||||
"session_misses": 0,
|
||||
"session_hits": stats.backend_session_hits.get(url, 0),
|
||||
"session_misses": stats.backend_session_misses.get(url, 0),
|
||||
}
|
||||
for url, count in stats.backend_requests.items()
|
||||
}
|
||||
|
||||
@@ -281,8 +281,6 @@ async def _inference_endpoint(
|
||||
if not incoming_session_id:
|
||||
await _recover_session_affinity(session_id, body.get("messages") or [], session_store)
|
||||
|
||||
preferred_url: str | None = None
|
||||
if incoming_session_id:
|
||||
preferred_url = await session_store.get_preferred_backend(session_id)
|
||||
|
||||
result = await _dispatch_entry(
|
||||
@@ -319,7 +317,7 @@ async def _inference_endpoint(
|
||||
|
||||
stats.record_model(model_id, _estimate_tokens(body))
|
||||
stats.record_backend(backend.url)
|
||||
stats.record_session(bool(incoming_session_id), preferred_url, backend.url)
|
||||
stats.record_session(preferred_url, backend.url)
|
||||
|
||||
if model_id:
|
||||
messages = body.get("messages", [])
|
||||
|
||||
@@ -62,7 +62,7 @@ class SessionStore:
|
||||
session.model_id = model_id
|
||||
if messages is not None:
|
||||
session.last_message_index = len(messages)
|
||||
session.prefix_hash = compute_prefix_hash(messages)
|
||||
session.prefix_hash = compute_prefix_hash([messages[-1]])
|
||||
if preferred_backend is not None:
|
||||
session.preferred_backend = preferred_backend
|
||||
session.touch()
|
||||
@@ -105,7 +105,7 @@ class SessionStore:
|
||||
k = s.last_message_index
|
||||
if s.is_expired(self._ttl) or not s.preferred_backend or not s.prefix_hash or k == 0 or k > len(messages):
|
||||
return 0
|
||||
return k if compute_prefix_hash(messages[:k]) == s.prefix_hash else 0
|
||||
return k if compute_prefix_hash([messages[k - 1]]) == s.prefix_hash else 0
|
||||
|
||||
async def find_by_prefix(self, messages: list[dict]) -> str | None:
|
||||
"""Return the preferred backend whose stored conversation is a prefix of messages.
|
||||
|
||||
Reference in New Issue
Block a user