feat(report): add 24h slow-sql dashboard and metrics api

This commit is contained in:
2026-02-07 14:07:07 +08:00
parent 52dd7ac9e5
commit 6a9858cdec
29 changed files with 427 additions and 57 deletions

View File

@@ -0,0 +1,181 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
慢 SQL 指标(轻量内存版)
- 记录超过阈值的 SQL 执行样本
- 维护近窗口期默认24小时聚合统计
- 输出 TOP SQL 与最近慢 SQL 列表
"""
from __future__ import annotations
import os
import threading
import time
from collections import deque
_SLOW_SQL_THRESHOLD_MS = max(0.0, float(os.environ.get("DB_SLOW_QUERY_MS", "120") or 120))
_WINDOW_SECONDS = max(600, int(os.environ.get("DB_SLOW_SQL_WINDOW_SECONDS", "86400") or 86400))
_TOP_LIMIT = max(5, int(os.environ.get("DB_SLOW_SQL_TOP_LIMIT", "12") or 12))
_RECENT_LIMIT = max(10, int(os.environ.get("DB_SLOW_SQL_RECENT_LIMIT", "50") or 50))
_MAX_EVENTS = max(_RECENT_LIMIT, int(os.environ.get("DB_SLOW_SQL_MAX_EVENTS", "20000") or 20000))
_SQL_MAX_LEN = max(80, int(os.environ.get("DB_SLOW_QUERY_SQL_MAX_LEN", "240") or 240))
_lock = threading.Lock()
_state = {
"start_ts": time.time(),
"last_slow_ts": 0.0,
"events": deque(),
"recent": deque(maxlen=_RECENT_LIMIT),
}
def _compact_text(value: str, max_len: int) -> str:
text = " ".join(str(value or "").split())
if len(text) <= max_len:
return text
return f"{text[: max_len - 3]}..."
def _compact_sql(sql: str) -> str:
return _compact_text(str(sql or ""), _SQL_MAX_LEN)
def _compact_params(params_info: str) -> str:
return _compact_text(str(params_info or "none"), 64)
def _prune_events_locked(now_ts: float) -> None:
cutoff_ts = now_ts - float(_WINDOW_SECONDS)
events = _state["events"]
while events and float(events[0].get("time", 0.0) or 0.0) < cutoff_ts:
events.popleft()
overflow = len(events) - int(_MAX_EVENTS)
while overflow > 0 and events:
events.popleft()
overflow -= 1
def record_slow_sql(*, sql: str, duration_ms: float, params_info: str = "none") -> None:
duration = max(0.0, float(duration_ms or 0.0))
now = time.time()
sql_text = _compact_sql(sql)
params_text = _compact_params(params_info)
event = {
"time": now,
"sql": sql_text,
"duration_ms": round(duration, 2),
"params": params_text,
}
with _lock:
_prune_events_locked(now)
_state["events"].append(event)
_state["recent"].append(event)
_state["last_slow_ts"] = now
def get_slow_sql_metrics_snapshot() -> dict:
now = time.time()
with _lock:
_prune_events_locked(now)
events = list(_state["events"])
recent_rows = list(_state["recent"])
last_slow_ts = float(_state.get("last_slow_ts") or 0.0)
grouped: dict[str, dict] = {}
total_duration_ms = 0.0
max_duration_ms = 0.0
for item in events:
sql_text = str(item.get("sql") or "-")
duration = float(item.get("duration_ms") or 0.0)
ts = float(item.get("time") or 0.0)
params_text = str(item.get("params") or "none")
total_duration_ms += duration
if duration > max_duration_ms:
max_duration_ms = duration
bucket = grouped.get(sql_text)
if bucket is None:
bucket = {
"sql": sql_text,
"count": 0,
"total_ms": 0.0,
"max_ms": 0.0,
"last_ts": 0.0,
"params": params_text,
}
grouped[sql_text] = bucket
bucket["count"] = int(bucket["count"] or 0) + 1
bucket["total_ms"] = float(bucket["total_ms"] or 0.0) + duration
if duration > float(bucket["max_ms"] or 0.0):
bucket["max_ms"] = duration
bucket["params"] = params_text
if ts >= float(bucket["last_ts"] or 0.0):
bucket["last_ts"] = ts
top_sql_rows = sorted(
grouped.values(),
key=lambda row: (
int(row.get("count", 0) or 0),
float(row.get("max_ms", 0.0) or 0.0),
float(row.get("total_ms", 0.0) or 0.0),
),
reverse=True,
)[:_TOP_LIMIT]
top_sql = []
for idx, row in enumerate(top_sql_rows, start=1):
count = int(row.get("count", 0) or 0)
total_ms = float(row.get("total_ms", 0.0) or 0.0)
avg_ms = (total_ms / count) if count > 0 else 0.0
top_sql.append(
{
"rank": idx,
"sql": row.get("sql") or "-",
"count": count,
"avg_ms": round(avg_ms, 2),
"max_ms": round(float(row.get("max_ms", 0.0) or 0.0), 2),
"last_ts": int(float(row.get("last_ts", 0.0) or 0.0)),
"sample_params": row.get("params") or "none",
}
)
cutoff_ts = now - float(_WINDOW_SECONDS)
recent = [
{
"time": int(float(item.get("time") or 0.0)),
"sql": str(item.get("sql") or "-"),
"duration_ms": round(float(item.get("duration_ms") or 0.0), 2),
"params": str(item.get("params") or "none"),
}
for item in recent_rows
if float(item.get("time") or 0.0) >= cutoff_ts
]
total_events = len(events)
avg_duration_ms = round((total_duration_ms / total_events), 2) if total_events > 0 else 0.0
return {
"since_ts": int(float(events[0].get("time") or 0.0)) if events else 0,
"window_seconds": _WINDOW_SECONDS,
"top_limit": _TOP_LIMIT,
"recent_limit": _RECENT_LIMIT,
"slow_threshold_ms": _SLOW_SQL_THRESHOLD_MS,
"total_slow_queries": total_events,
"unique_sql": len(grouped),
"avg_duration_ms": avg_duration_ms,
"max_duration_ms": round(max_duration_ms, 2),
"last_slow_ts": int(last_slow_ts) if last_slow_ts > 0 else 0,
"top_sql": top_sql,
"recent_slow_sql": recent,
}