perf(db): tune sqlite pool and add maintenance scheduler

This commit is contained in:
2026-02-07 12:53:43 +08:00
parent d77e439712
commit ff67a9bbab
5 changed files with 170 additions and 11 deletions

View File

@@ -18,6 +18,17 @@ SESSION_COOKIE_SECURE=false # 使用HTTPS时设为true
# ==================== 数据库配置 ==================== # ==================== 数据库配置 ====================
DB_FILE=data/app_data.db DB_FILE=data/app_data.db
DB_POOL_SIZE=5 DB_POOL_SIZE=5
DB_CONNECT_TIMEOUT_SECONDS=10
DB_BUSY_TIMEOUT_MS=10000
DB_CACHE_SIZE_KB=8192
DB_WAL_AUTOCHECKPOINT_PAGES=1000
DB_MMAP_SIZE_MB=256
DB_LOCK_RETRY_COUNT=3
DB_LOCK_RETRY_BASE_MS=50
DB_PRAGMA_OPTIMIZE_INTERVAL_SECONDS=21600
DB_ANALYZE_INTERVAL_SECONDS=86400
DB_WAL_CHECKPOINT_INTERVAL_SECONDS=43200
DB_WAL_CHECKPOINT_MODE=PASSIVE
# ==================== 并发控制配置 ==================== # ==================== 并发控制配置 ====================
MAX_CONCURRENT_GLOBAL=2 MAX_CONCURRENT_GLOBAL=2

3
app.py
View File

@@ -35,7 +35,7 @@ from realtime.status_push import status_push_worker
from routes import register_blueprints from routes import register_blueprints
from security import init_security_middleware from security import init_security_middleware
from services.checkpoints import init_checkpoint_manager from services.checkpoints import init_checkpoint_manager
from services.maintenance import start_cleanup_scheduler, start_kdocs_monitor from services.maintenance import start_cleanup_scheduler, start_database_maintenance_scheduler, start_kdocs_monitor
from services.request_metrics import record_request_metric from services.request_metrics import record_request_metric
from services.models import User from services.models import User
from services.runtime import init_runtime from services.runtime import init_runtime
@@ -407,6 +407,7 @@ if __name__ == "__main__":
_init_optional_email_service() _init_optional_email_service()
start_cleanup_scheduler() start_cleanup_scheduler()
start_database_maintenance_scheduler()
start_kdocs_monitor() start_kdocs_monitor()
_load_and_apply_scheduler_limits() _load_and_apply_scheduler_limits()

View File

@@ -126,6 +126,17 @@ class Config:
# ==================== 数据库配置 ==================== # ==================== 数据库配置 ====================
DB_FILE = os.environ.get("DB_FILE", "data/app_data.db") DB_FILE = os.environ.get("DB_FILE", "data/app_data.db")
DB_POOL_SIZE = int(os.environ.get("DB_POOL_SIZE", "5")) DB_POOL_SIZE = int(os.environ.get("DB_POOL_SIZE", "5"))
DB_CONNECT_TIMEOUT_SECONDS = int(os.environ.get("DB_CONNECT_TIMEOUT_SECONDS", "10"))
DB_BUSY_TIMEOUT_MS = int(os.environ.get("DB_BUSY_TIMEOUT_MS", "10000"))
DB_CACHE_SIZE_KB = int(os.environ.get("DB_CACHE_SIZE_KB", "8192"))
DB_WAL_AUTOCHECKPOINT_PAGES = int(os.environ.get("DB_WAL_AUTOCHECKPOINT_PAGES", "1000"))
DB_MMAP_SIZE_MB = int(os.environ.get("DB_MMAP_SIZE_MB", "256"))
DB_LOCK_RETRY_COUNT = int(os.environ.get("DB_LOCK_RETRY_COUNT", "3"))
DB_LOCK_RETRY_BASE_MS = int(os.environ.get("DB_LOCK_RETRY_BASE_MS", "50"))
DB_PRAGMA_OPTIMIZE_INTERVAL_SECONDS = int(os.environ.get("DB_PRAGMA_OPTIMIZE_INTERVAL_SECONDS", "21600"))
DB_ANALYZE_INTERVAL_SECONDS = int(os.environ.get("DB_ANALYZE_INTERVAL_SECONDS", "86400"))
DB_WAL_CHECKPOINT_INTERVAL_SECONDS = int(os.environ.get("DB_WAL_CHECKPOINT_INTERVAL_SECONDS", "43200"))
DB_WAL_CHECKPOINT_MODE = os.environ.get("DB_WAL_CHECKPOINT_MODE", "PASSIVE")
# ==================== 浏览器配置 ==================== # ==================== 浏览器配置 ====================
SCREENSHOTS_DIR = os.environ.get("SCREENSHOTS_DIR", "截图") SCREENSHOTS_DIR = os.environ.get("SCREENSHOTS_DIR", "截图")
@@ -249,6 +260,20 @@ class Config:
if cls.DB_POOL_SIZE < 1: if cls.DB_POOL_SIZE < 1:
errors.append("DB_POOL_SIZE必须大于0") errors.append("DB_POOL_SIZE必须大于0")
if cls.DB_CONNECT_TIMEOUT_SECONDS < 1:
errors.append("DB_CONNECT_TIMEOUT_SECONDS必须大于0")
if cls.DB_BUSY_TIMEOUT_MS < 100:
errors.append("DB_BUSY_TIMEOUT_MS必须至少100毫秒")
if cls.DB_CACHE_SIZE_KB < 1024:
errors.append("DB_CACHE_SIZE_KB建议至少1024")
if cls.DB_WAL_AUTOCHECKPOINT_PAGES < 100:
errors.append("DB_WAL_AUTOCHECKPOINT_PAGES建议至少100")
if cls.DB_MMAP_SIZE_MB < 0:
errors.append("DB_MMAP_SIZE_MB不能为负数")
if cls.DB_LOCK_RETRY_COUNT < 0:
errors.append("DB_LOCK_RETRY_COUNT不能为负数")
if cls.DB_LOCK_RETRY_BASE_MS < 10:
errors.append("DB_LOCK_RETRY_BASE_MS建议至少10毫秒")
# 验证日志配置 # 验证日志配置
if cls.LOG_LEVEL not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: if cls.LOG_LEVEL not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:

View File

@@ -7,12 +7,28 @@
import sqlite3 import sqlite3
import threading import threading
import time
from queue import Empty, Full, Queue from queue import Empty, Full, Queue
from app_config import get_config
from app_logger import get_logger from app_logger import get_logger
logger = get_logger("database") logger = get_logger("database")
config = get_config()
DB_CONNECT_TIMEOUT_SECONDS = max(1, int(getattr(config, "DB_CONNECT_TIMEOUT_SECONDS", 10)))
DB_BUSY_TIMEOUT_MS = max(1000, int(getattr(config, "DB_BUSY_TIMEOUT_MS", 10000)))
DB_CACHE_SIZE_KB = max(1024, int(getattr(config, "DB_CACHE_SIZE_KB", 8192)))
DB_WAL_AUTOCHECKPOINT_PAGES = max(100, int(getattr(config, "DB_WAL_AUTOCHECKPOINT_PAGES", 1000)))
DB_MMAP_SIZE_MB = max(0, int(getattr(config, "DB_MMAP_SIZE_MB", 256)))
DB_LOCK_RETRY_COUNT = max(0, int(getattr(config, "DB_LOCK_RETRY_COUNT", 3)))
DB_LOCK_RETRY_BASE_MS = max(10, int(getattr(config, "DB_LOCK_RETRY_BASE_MS", 50)))
def _is_lock_conflict_error(error: sqlite3.OperationalError) -> bool:
message = str(error or "").lower()
return ("locked" in message) or ("busy" in message)
class ConnectionPool: class ConnectionPool:
@@ -46,16 +62,29 @@ class ConnectionPool:
def _create_connection(self): def _create_connection(self):
"""创建新的数据库连接""" """创建新的数据库连接"""
conn = sqlite3.connect(self.database, check_same_thread=False) conn = sqlite3.connect(
self.database,
check_same_thread=False,
timeout=DB_CONNECT_TIMEOUT_SECONDS,
)
conn.row_factory = sqlite3.Row conn.row_factory = sqlite3.Row
# 启用外键约束,确保 ON DELETE CASCADE 等约束生效 pragma_statements = [
conn.execute("PRAGMA foreign_keys=ON") "PRAGMA foreign_keys=ON",
# 设置WAL模式提高并发性能 "PRAGMA journal_mode=WAL",
conn.execute("PRAGMA journal_mode=WAL") "PRAGMA synchronous=NORMAL",
# 在WAL模式下使用NORMAL同步兼顾性能与可靠性 f"PRAGMA busy_timeout={DB_BUSY_TIMEOUT_MS}",
conn.execute("PRAGMA synchronous=NORMAL") "PRAGMA temp_store=MEMORY",
# 设置合理的超时时间 f"PRAGMA cache_size={-DB_CACHE_SIZE_KB}",
conn.execute("PRAGMA busy_timeout=5000") f"PRAGMA wal_autocheckpoint={DB_WAL_AUTOCHECKPOINT_PAGES}",
]
if DB_MMAP_SIZE_MB > 0:
pragma_statements.append(f"PRAGMA mmap_size={DB_MMAP_SIZE_MB * 1024 * 1024}")
for statement in pragma_statements:
try:
conn.execute(statement)
except sqlite3.DatabaseError as e:
logger.warning(f"设置数据库参数失败 ({statement}): {e}")
return conn return conn
def _close_connection(self, conn) -> None: def _close_connection(self, conn) -> None:
@@ -198,7 +227,20 @@ class PooledConnection:
def commit(self): def commit(self):
"""提交事务""" """提交事务"""
self._conn.commit() for attempt in range(DB_LOCK_RETRY_COUNT + 1):
try:
self._conn.commit()
return
except sqlite3.OperationalError as e:
if (not _is_lock_conflict_error(e)) or attempt >= DB_LOCK_RETRY_COUNT:
raise
sleep_seconds = (DB_LOCK_RETRY_BASE_MS * (2**attempt)) / 1000.0
logger.warning(
f"数据库提交遇到锁冲突,{sleep_seconds:.3f}s 后重试 "
f"({attempt + 1}/{DB_LOCK_RETRY_COUNT})"
)
time.sleep(sleep_seconds)
def rollback(self): def rollback(self):
"""回滚事务""" """回滚事务"""

View File

@@ -29,6 +29,12 @@ config = get_config()
USER_ACCOUNTS_EXPIRE_SECONDS = int(getattr(config, "USER_ACCOUNTS_EXPIRE_SECONDS", 3600)) USER_ACCOUNTS_EXPIRE_SECONDS = int(getattr(config, "USER_ACCOUNTS_EXPIRE_SECONDS", 3600))
BATCH_TASK_EXPIRE_SECONDS = int(getattr(config, "BATCH_TASK_EXPIRE_SECONDS", 21600)) BATCH_TASK_EXPIRE_SECONDS = int(getattr(config, "BATCH_TASK_EXPIRE_SECONDS", 21600))
PENDING_RANDOM_EXPIRE_SECONDS = int(getattr(config, "PENDING_RANDOM_EXPIRE_SECONDS", 7200)) PENDING_RANDOM_EXPIRE_SECONDS = int(getattr(config, "PENDING_RANDOM_EXPIRE_SECONDS", 7200))
DB_PRAGMA_OPTIMIZE_INTERVAL_SECONDS = max(300, int(getattr(config, "DB_PRAGMA_OPTIMIZE_INTERVAL_SECONDS", 21600)))
DB_ANALYZE_INTERVAL_SECONDS = max(0, int(getattr(config, "DB_ANALYZE_INTERVAL_SECONDS", 86400)))
DB_WAL_CHECKPOINT_INTERVAL_SECONDS = max(0, int(getattr(config, "DB_WAL_CHECKPOINT_INTERVAL_SECONDS", 43200)))
DB_WAL_CHECKPOINT_MODE = str(getattr(config, "DB_WAL_CHECKPOINT_MODE", "PASSIVE") or "PASSIVE").upper().strip()
if DB_WAL_CHECKPOINT_MODE not in {"PASSIVE", "FULL", "RESTART", "TRUNCATE"}:
DB_WAL_CHECKPOINT_MODE = "PASSIVE"
# 金山文档离线通知状态:每次掉线只通知一次,恢复在线后重置 # 金山文档离线通知状态:每次掉线只通知一次,恢复在线后重置
_kdocs_offline_notified: bool = False _kdocs_offline_notified: bool = False
@@ -275,6 +281,80 @@ def start_cleanup_scheduler() -> None:
logger.info("内存清理调度器已启动") logger.info("内存清理调度器已启动")
def _execute_db_statement(statement: str, *, commit: bool = False, fetchone: bool = False):
import db_pool
with db_pool.get_db() as conn:
conn.execute(statement)
row = conn.fetchone() if fetchone else None
if commit:
conn.commit()
return row
def optimize_database_runtime() -> None:
"""执行 SQLite 运行期优化,提升查询计划和页缓存命中率。"""
row = _execute_db_statement("PRAGMA optimize", fetchone=True)
if row:
logger.debug(f"[DB维护] PRAGMA optimize 已执行: {tuple(row)}")
else:
logger.debug("[DB维护] PRAGMA optimize 已执行")
def analyze_database_stats() -> None:
"""执行 ANALYZE刷新统计信息提升复杂查询稳定性。"""
_execute_db_statement("ANALYZE", commit=True)
logger.info("[DB维护] ANALYZE 已完成")
def checkpoint_database_wal() -> None:
"""定期执行 WAL checkpoint控制 WAL 文件体积。"""
row = _execute_db_statement(
f"PRAGMA wal_checkpoint({DB_WAL_CHECKPOINT_MODE})",
fetchone=True,
)
if row:
logger.debug(f"[DB维护] WAL checkpoint({DB_WAL_CHECKPOINT_MODE}) 结果: {tuple(row)}")
else:
logger.debug(f"[DB维护] WAL checkpoint({DB_WAL_CHECKPOINT_MODE}) 已执行")
def start_database_maintenance_scheduler() -> None:
"""启动数据库维护调度器。"""
_start_daemon_loop(
"db-optimize",
startup_delay=180,
interval_seconds=DB_PRAGMA_OPTIMIZE_INTERVAL_SECONDS,
job=optimize_database_runtime,
error_tag="[DB维护] PRAGMA optimize 执行失败",
)
if DB_ANALYZE_INTERVAL_SECONDS > 0:
_start_daemon_loop(
"db-analyze",
startup_delay=300,
interval_seconds=DB_ANALYZE_INTERVAL_SECONDS,
job=analyze_database_stats,
error_tag="[DB维护] ANALYZE 执行失败",
)
if DB_WAL_CHECKPOINT_INTERVAL_SECONDS > 0:
_start_daemon_loop(
"db-wal-checkpoint",
startup_delay=420,
interval_seconds=DB_WAL_CHECKPOINT_INTERVAL_SECONDS,
job=checkpoint_database_wal,
error_tag="[DB维护] WAL checkpoint 执行失败",
)
logger.info(
"[DB维护] 调度器已启动: "
f"optimize={DB_PRAGMA_OPTIMIZE_INTERVAL_SECONDS}s, "
f"analyze={DB_ANALYZE_INTERVAL_SECONDS}s, "
f"checkpoint={DB_WAL_CHECKPOINT_INTERVAL_SECONDS}s({DB_WAL_CHECKPOINT_MODE})"
)
def start_kdocs_monitor() -> None: def start_kdocs_monitor() -> None:
"""启动金山文档状态监控""" """启动金山文档状态监控"""
_start_daemon_loop( _start_daemon_loop(