perf(stability): add request metrics and resilient API retries

This commit is contained in:
2026-02-07 11:58:21 +08:00
parent 04b94d7fb2
commit a50294933b
38 changed files with 447 additions and 97 deletions

79
app.py
View File

@@ -17,8 +17,9 @@ import os
import signal
import sys
import threading
import time
from flask import Flask, jsonify, redirect, request, send_from_directory, session, url_for
from flask import Flask, g, jsonify, redirect, request, send_from_directory, session, url_for
from flask_login import LoginManager, current_user
from flask_socketio import SocketIO
@@ -35,6 +36,7 @@ from routes import register_blueprints
from security import init_security_middleware
from services.checkpoints import init_checkpoint_manager
from services.maintenance import start_cleanup_scheduler, start_kdocs_monitor
from services.request_metrics import record_request_metric
from services.models import User
from services.runtime import init_runtime
from services.scheduler import scheduled_task_worker
@@ -98,6 +100,20 @@ init_logging(log_level=config.LOG_LEVEL, log_file=config.LOG_FILE)
logger = get_logger("app")
init_runtime(socketio=socketio, logger=logger)
_API_DIAGNOSTIC_LOG = str(os.environ.get("API_DIAGNOSTIC_LOG", "0")).strip().lower() in {
"1",
"true",
"yes",
"on",
}
_API_DIAGNOSTIC_SLOW_MS = max(0.0, float(os.environ.get("API_DIAGNOSTIC_SLOW_MS", "0") or 0.0))
def _is_api_or_health_path(path: str) -> bool:
raw = str(path or "")
return raw.startswith("/api/") or raw.startswith("/yuyx/api/") or raw == "/health"
# 初始化安全中间件(需在其他中间件/Blueprint 之前注册)
init_security_middleware(app)
@@ -131,6 +147,11 @@ def unauthorized():
return redirect(url_for("pages.login_page", next=request.url))
@app.before_request
def track_request_start_time():
g.request_start_perf = time.perf_counter()
@app.before_request
def enforce_csrf_protection():
if request.method in {"GET", "HEAD", "OPTIONS"}:
@@ -148,20 +169,52 @@ def enforce_csrf_protection():
return jsonify({"error": "CSRF token missing or invalid"}), 403
def _record_request_metric_after_response(response) -> None:
try:
started = float(getattr(g, "request_start_perf", 0.0) or 0.0)
if started <= 0:
return
duration_ms = max(0.0, (time.perf_counter() - started) * 1000.0)
path = request.path or "/"
method = request.method or "GET"
status_code = int(getattr(response, "status_code", 0) or 0)
is_api = _is_api_or_health_path(path)
record_request_metric(
path=path,
method=method,
status_code=status_code,
duration_ms=duration_ms,
is_api=is_api,
)
if _API_DIAGNOSTIC_LOG and is_api:
is_slow = _API_DIAGNOSTIC_SLOW_MS > 0 and duration_ms >= _API_DIAGNOSTIC_SLOW_MS
is_server_error = status_code >= 500
if is_slow or is_server_error:
logger.warning(
f"[API-DIAG] {method} {path} -> {status_code} ({duration_ms:.1f}ms)"
)
except Exception:
pass
@app.after_request
def ensure_csrf_cookie(response):
if request.path.startswith("/static/"):
return response
token = session.get("csrf_token")
if not token:
token = generate_csrf_token()
response.set_cookie(
"csrf_token",
token,
httponly=False,
secure=bool(config.SESSION_COOKIE_SECURE),
samesite=config.SESSION_COOKIE_SAMESITE,
)
if not request.path.startswith("/static/"):
token = session.get("csrf_token")
if not token:
token = generate_csrf_token()
response.set_cookie(
"csrf_token",
token,
httponly=False,
secure=bool(config.SESSION_COOKIE_SECURE),
samesite=config.SESSION_COOKIE_SAMESITE,
)
_record_request_metric_after_response(response)
return response