|
|
|
|
@@ -17,10 +17,39 @@ class ProxyStats:
|
|
|
|
|
"""Per-app counters and timing. Created by create_app, passed to build_router."""
|
|
|
|
|
start_time: float = field(default_factory=time.monotonic)
|
|
|
|
|
total_requests: int = 0
|
|
|
|
|
session_hits: int = 0
|
|
|
|
|
session_misses: int = 0
|
|
|
|
|
new_sessions: int = 0
|
|
|
|
|
model_requests: dict[str, int] = field(default_factory=dict)
|
|
|
|
|
model_tokens: dict[str, int] = field(default_factory=dict)
|
|
|
|
|
backend_requests: dict[str, int] = field(default_factory=dict)
|
|
|
|
|
|
|
|
|
|
def increment_requests(self) -> None:
|
|
|
|
|
self.total_requests += 1
|
|
|
|
|
|
|
|
|
|
def record_model(self, model_id: str, tokens: int | None) -> None:
|
|
|
|
|
if not model_id:
|
|
|
|
|
return
|
|
|
|
|
self.model_requests[model_id] = self.model_requests.get(model_id, 0) + 1
|
|
|
|
|
if tokens:
|
|
|
|
|
self.model_tokens[model_id] = self.model_tokens.get(model_id, 0) + tokens
|
|
|
|
|
|
|
|
|
|
def record_backend(self, url: str) -> None:
|
|
|
|
|
self.backend_requests[url] = self.backend_requests.get(url, 0) + 1
|
|
|
|
|
|
|
|
|
|
def record_session(self, had_session: bool, preferred_url: str | None, actual_url: str) -> None:
|
|
|
|
|
if had_session and preferred_url:
|
|
|
|
|
if actual_url == preferred_url:
|
|
|
|
|
self.session_hits += 1
|
|
|
|
|
else:
|
|
|
|
|
self.session_misses += 1
|
|
|
|
|
elif not had_session:
|
|
|
|
|
self.new_sessions += 1
|
|
|
|
|
|
|
|
|
|
def session_hit_rate(self) -> int | None:
|
|
|
|
|
total = self.session_hits + self.session_misses
|
|
|
|
|
return round(self.session_hits / total * 100) if total else None
|
|
|
|
|
|
|
|
|
|
def uptime_str(self) -> str:
|
|
|
|
|
secs = int(time.monotonic() - self.start_time)
|
|
|
|
|
h, remainder = divmod(secs, 3600)
|
|
|
|
|
@@ -48,11 +77,14 @@ _HTML = """<!DOCTYPE html>
|
|
|
|
|
.badge-dead { background: #2c0f0f; color: #f85149; }
|
|
|
|
|
.slots { color: #d29922; }
|
|
|
|
|
.empty { color: #484f58; font-style: italic; }
|
|
|
|
|
.hit { color: #3fb950; }
|
|
|
|
|
.miss { color: #f85149; }
|
|
|
|
|
#status { float: right; font-size: 0.8em; color: #8b949e; }
|
|
|
|
|
.summary { display: flex; gap: 20px; flex-wrap: wrap; margin: 10px 0 20px; }
|
|
|
|
|
.stat { background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 10px 16px; }
|
|
|
|
|
.stat-val { font-size: 1.6em; color: #58a6ff; }
|
|
|
|
|
.stat-label { font-size: 0.75em; color: #8b949e; margin-top: 2px; }
|
|
|
|
|
.num { text-align: right; }
|
|
|
|
|
</style>
|
|
|
|
|
</head>
|
|
|
|
|
<body>
|
|
|
|
|
@@ -65,12 +97,13 @@ _HTML = """<!DOCTYPE html>
|
|
|
|
|
<div class="stat"><div class="stat-val" id="queue-depth">-</div><div class="stat-label">Queue Depth</div></div>
|
|
|
|
|
<div class="stat"><div class="stat-val" id="session-count">-</div><div class="stat-label">Active Sessions</div></div>
|
|
|
|
|
<div class="stat"><div class="stat-val" id="live-count">-</div><div class="stat-label">Live Backends</div></div>
|
|
|
|
|
<div class="stat"><div class="stat-val" id="hit-rate">-</div><div class="stat-label">Session Hit Rate</div></div>
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<h2>Backends</h2>
|
|
|
|
|
<table>
|
|
|
|
|
<thead><tr><th>URL</th><th>Status</th><th>Active Model</th><th>Models</th><th>Slots</th><th>Last Poll</th></tr></thead>
|
|
|
|
|
<tbody id="backends-body"><tr><td colspan="6" class="empty">Loading...</td></tr></tbody>
|
|
|
|
|
<thead><tr><th>URL</th><th>Status</th><th>Active Model</th><th>Models</th><th>Slots</th><th class="num">Requests</th><th>Last Poll</th></tr></thead>
|
|
|
|
|
<tbody id="backends-body"><tr><td colspan="7" class="empty">Loading...</td></tr></tbody>
|
|
|
|
|
</table>
|
|
|
|
|
|
|
|
|
|
<h2>Queue</h2>
|
|
|
|
|
@@ -79,10 +112,16 @@ _HTML = """<!DOCTYPE html>
|
|
|
|
|
<tbody id="queue-body"><tr><td colspan="6" class="empty">Queue is empty</td></tr></tbody>
|
|
|
|
|
</table>
|
|
|
|
|
|
|
|
|
|
<h2>Sessions by Model</h2>
|
|
|
|
|
<h2>Model Stats</h2>
|
|
|
|
|
<table>
|
|
|
|
|
<thead><tr><th>Model</th><th>Active Sessions</th></tr></thead>
|
|
|
|
|
<tbody id="sessions-body"><tr><td colspan="2" class="empty">No active sessions</td></tr></tbody>
|
|
|
|
|
<thead><tr><th>Model</th><th class="num">Requests</th><th class="num">Est. Tokens</th><th class="num">Active Sessions</th></tr></thead>
|
|
|
|
|
<tbody id="model-body"><tr><td colspan="4" class="empty">No data yet</td></tr></tbody>
|
|
|
|
|
</table>
|
|
|
|
|
|
|
|
|
|
<h2>Backend Stats</h2>
|
|
|
|
|
<table>
|
|
|
|
|
<thead><tr><th>Backend</th><th class="num">Requests</th><th class="num">Share</th><th>Session Affinity</th></tr></thead>
|
|
|
|
|
<tbody id="backend-stats-body"><tr><td colspan="4" class="empty">No data yet</td></tr></tbody>
|
|
|
|
|
</table>
|
|
|
|
|
|
|
|
|
|
<script>
|
|
|
|
|
@@ -91,6 +130,10 @@ _HTML = """<!DOCTYPE html>
|
|
|
|
|
return String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function fmt(n) {
|
|
|
|
|
return n >= 1000 ? (n/1000).toFixed(1) + 'k' : String(n);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function render(data) {
|
|
|
|
|
document.getElementById('uptime').textContent = data.uptime;
|
|
|
|
|
document.getElementById('total-req').textContent = data.total_requests;
|
|
|
|
|
@@ -98,9 +141,19 @@ _HTML = """<!DOCTYPE html>
|
|
|
|
|
document.getElementById('session-count').textContent = data.session_count;
|
|
|
|
|
document.getElementById('live-count').textContent = data.live_backend_count;
|
|
|
|
|
|
|
|
|
|
const hr = data.session_hit_rate;
|
|
|
|
|
const hrEl = document.getElementById('hit-rate');
|
|
|
|
|
if (hr == null) {
|
|
|
|
|
hrEl.textContent = 'N/A';
|
|
|
|
|
hrEl.className = 'stat-val';
|
|
|
|
|
} else {
|
|
|
|
|
hrEl.textContent = hr + '%';
|
|
|
|
|
hrEl.className = 'stat-val ' + (hr >= 80 ? 'hit' : hr >= 50 ? 'slots' : 'miss');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const bBody = document.getElementById('backends-body');
|
|
|
|
|
if (!data.backends.length) {
|
|
|
|
|
bBody.innerHTML = '<tr><td colspan="6" class="empty">No backends configured</td></tr>';
|
|
|
|
|
bBody.innerHTML = '<tr><td colspan="7" class="empty">No backends configured</td></tr>';
|
|
|
|
|
} else {
|
|
|
|
|
bBody.innerHTML = data.backends.map(b => {
|
|
|
|
|
const badge = b.live
|
|
|
|
|
@@ -112,7 +165,8 @@ _HTML = """<!DOCTYPE html>
|
|
|
|
|
const models = b.models.length ? esc(b.models.join(', ')) : '<span class="empty">none</span>';
|
|
|
|
|
const slots = `<span class="slots">${b.slots_acquired}/${b.slots_total}</span>`;
|
|
|
|
|
const age = b.last_poll_age == null ? '<span class="empty">never</span>' : esc(b.last_poll_age.toFixed(1)) + 's';
|
|
|
|
|
return `<tr><td>${esc(b.url)}</td><td>${badge}</td><td>${active}</td><td>${models}</td><td>${slots}</td><td>${age}</td></tr>`;
|
|
|
|
|
const reqs = b.requests > 0 ? fmt(b.requests) : '<span class="empty">0</span>';
|
|
|
|
|
return `<tr><td>${esc(b.url)}</td><td>${badge}</td><td>${active}</td><td>${models}</td><td>${slots}</td><td class="num">${reqs}</td><td>${age}</td></tr>`;
|
|
|
|
|
}).join('');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -127,15 +181,36 @@ _HTML = """<!DOCTYPE html>
|
|
|
|
|
}).join('');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const sBody = document.getElementById('sessions-body');
|
|
|
|
|
const sbm = data.sessions_by_model;
|
|
|
|
|
const keys = Object.keys(sbm);
|
|
|
|
|
if (!keys.length) {
|
|
|
|
|
sBody.innerHTML = '<tr><td colspan="2" class="empty">No active sessions</td></tr>';
|
|
|
|
|
const mBody = document.getElementById('model-body');
|
|
|
|
|
const ms = data.model_stats;
|
|
|
|
|
const mKeys = Object.keys(ms).sort((a,b) => ms[b].requests - ms[a].requests);
|
|
|
|
|
if (!mKeys.length) {
|
|
|
|
|
mBody.innerHTML = '<tr><td colspan="4" class="empty">No data yet</td></tr>';
|
|
|
|
|
} else {
|
|
|
|
|
sBody.innerHTML = keys.map(m =>
|
|
|
|
|
`<tr><td>${esc(m)}</td><td>${esc(sbm[m])}</td></tr>`
|
|
|
|
|
).join('');
|
|
|
|
|
mBody.innerHTML = mKeys.map(m => {
|
|
|
|
|
const s = ms[m];
|
|
|
|
|
const tok = s.estimated_tokens > 0 ? fmt(s.estimated_tokens) : '<span class="empty">-</span>';
|
|
|
|
|
const sess = s.active_sessions > 0 ? s.active_sessions : '<span class="empty">0</span>';
|
|
|
|
|
return `<tr><td>${esc(m)}</td><td class="num">${fmt(s.requests)}</td><td class="num">${tok}</td><td class="num">${sess}</td></tr>`;
|
|
|
|
|
}).join('');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const bsBody = document.getElementById('backend-stats-body');
|
|
|
|
|
const bs = data.backend_stats;
|
|
|
|
|
const bsKeys = Object.keys(bs).sort((a,b) => bs[b].requests - bs[a].requests);
|
|
|
|
|
if (!bsKeys.length || data.total_requests === 0) {
|
|
|
|
|
bsBody.innerHTML = '<tr><td colspan="4" class="empty">No data yet</td></tr>';
|
|
|
|
|
} else {
|
|
|
|
|
bsBody.innerHTML = bsKeys.map(url => {
|
|
|
|
|
const s = bs[url];
|
|
|
|
|
const share = data.total_requests > 0
|
|
|
|
|
? Math.round(s.requests / data.total_requests * 100) + '%'
|
|
|
|
|
: '<span class="empty">-</span>';
|
|
|
|
|
const affinity = s.session_hits + s.session_misses > 0
|
|
|
|
|
? Math.round(s.session_hits / (s.session_hits + s.session_misses) * 100) + '% hit'
|
|
|
|
|
: '<span class="empty">-</span>';
|
|
|
|
|
return `<tr><td>${esc(url)}</td><td class="num">${fmt(s.requests)}</td><td class="num">${share}</td><td>${affinity}</td></tr>`;
|
|
|
|
|
}).join('');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
document.getElementById('status').textContent = 'updated ' + new Date().toLocaleTimeString();
|
|
|
|
|
@@ -187,6 +262,7 @@ def build_router(
|
|
|
|
|
"models": list(state.models),
|
|
|
|
|
"slots_acquired": acquired,
|
|
|
|
|
"slots_total": total,
|
|
|
|
|
"requests": stats.backend_requests.get(state.url, 0),
|
|
|
|
|
"last_poll_age": None if age == float("inf") else round(age, 1),
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
@@ -196,6 +272,27 @@ def build_router(
|
|
|
|
|
sessions_by_model = await session_store.count_by_model()
|
|
|
|
|
live_count = sum(1 for s in states if s.live)
|
|
|
|
|
|
|
|
|
|
# Merge per-model request stats with active session counts.
|
|
|
|
|
all_models = set(stats.model_requests) | set(sessions_by_model)
|
|
|
|
|
model_stats = {
|
|
|
|
|
m: {
|
|
|
|
|
"requests": stats.model_requests.get(m, 0),
|
|
|
|
|
"estimated_tokens": stats.model_tokens.get(m, 0),
|
|
|
|
|
"active_sessions": sessions_by_model.get(m, 0),
|
|
|
|
|
}
|
|
|
|
|
for m in all_models
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Per-backend cumulative stats with session affinity breakdown.
|
|
|
|
|
backend_stats = {
|
|
|
|
|
url: {
|
|
|
|
|
"requests": count,
|
|
|
|
|
"session_hits": 0,
|
|
|
|
|
"session_misses": 0,
|
|
|
|
|
}
|
|
|
|
|
for url, count in stats.backend_requests.items()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
{
|
|
|
|
|
"uptime": stats.uptime_str(),
|
|
|
|
|
@@ -203,9 +300,13 @@ def build_router(
|
|
|
|
|
"queue_depth": len(queue_snapshot),
|
|
|
|
|
"session_count": session_count,
|
|
|
|
|
"live_backend_count": live_count,
|
|
|
|
|
"session_hits": stats.session_hits,
|
|
|
|
|
"session_misses": stats.session_misses,
|
|
|
|
|
"session_hit_rate": stats.session_hit_rate(),
|
|
|
|
|
"backends": backends_data,
|
|
|
|
|
"queue": queue_snapshot,
|
|
|
|
|
"sessions_by_model": sessions_by_model,
|
|
|
|
|
"model_stats": model_stats,
|
|
|
|
|
"backend_stats": backend_stats,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|