refactor: optimize structure, stability and runtime performance
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
@@ -10,6 +11,8 @@ from app_config import get_config
|
||||
from app_logger import get_logger
|
||||
from services.state import (
|
||||
cleanup_expired_ip_rate_limits,
|
||||
cleanup_expired_ip_request_rates,
|
||||
cleanup_expired_login_security_state,
|
||||
safe_cleanup_expired_batches,
|
||||
safe_cleanup_expired_captcha,
|
||||
safe_cleanup_expired_pending_random,
|
||||
@@ -31,6 +34,69 @@ PENDING_RANDOM_EXPIRE_SECONDS = int(getattr(config, "PENDING_RANDOM_EXPIRE_SECON
|
||||
_kdocs_offline_notified: bool = False
|
||||
|
||||
|
||||
def _to_int(value, default: int = 0) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except Exception:
|
||||
return int(default)
|
||||
|
||||
|
||||
def _collect_active_user_ids() -> set[int]:
|
||||
active_user_ids: set[int] = set()
|
||||
for _, info in safe_iter_task_status_items():
|
||||
user_id = info.get("user_id") if isinstance(info, dict) else None
|
||||
if user_id is None:
|
||||
continue
|
||||
try:
|
||||
active_user_ids.add(int(user_id))
|
||||
except Exception:
|
||||
continue
|
||||
return active_user_ids
|
||||
|
||||
|
||||
def _find_expired_user_cache_ids(current_time: float, active_user_ids: set[int]) -> list[int]:
|
||||
expired_users = []
|
||||
for user_id, last_access in (safe_get_user_accounts_last_access_items() or []):
|
||||
try:
|
||||
user_id_int = int(user_id)
|
||||
last_access_ts = float(last_access)
|
||||
except Exception:
|
||||
continue
|
||||
if (current_time - last_access_ts) <= USER_ACCOUNTS_EXPIRE_SECONDS:
|
||||
continue
|
||||
if user_id_int in active_user_ids:
|
||||
continue
|
||||
if safe_has_user(user_id_int):
|
||||
expired_users.append(user_id_int)
|
||||
return expired_users
|
||||
|
||||
|
||||
def _find_completed_task_status_ids(current_time: float) -> list[str]:
|
||||
completed_task_ids = []
|
||||
for account_id, status_data in safe_iter_task_status_items():
|
||||
status = status_data.get("status") if isinstance(status_data, dict) else None
|
||||
if status not in ["已完成", "失败", "已停止"]:
|
||||
continue
|
||||
|
||||
start_time = float(status_data.get("start_time", 0) or 0)
|
||||
if (current_time - start_time) > 600: # 10分钟
|
||||
completed_task_ids.append(account_id)
|
||||
return completed_task_ids
|
||||
|
||||
|
||||
def _reap_zombie_processes() -> None:
|
||||
while True:
|
||||
try:
|
||||
pid, _ = os.waitpid(-1, os.WNOHANG)
|
||||
if pid == 0:
|
||||
break
|
||||
logger.debug(f"已回收僵尸进程: PID={pid}")
|
||||
except ChildProcessError:
|
||||
break
|
||||
except Exception:
|
||||
break
|
||||
|
||||
|
||||
def cleanup_expired_data() -> None:
|
||||
"""定期清理过期数据,防止内存泄漏(逻辑保持不变)。"""
|
||||
current_time = time.time()
|
||||
@@ -43,48 +109,36 @@ def cleanup_expired_data() -> None:
|
||||
if deleted_ips:
|
||||
logger.debug(f"已清理 {deleted_ips} 个过期IP限流记录")
|
||||
|
||||
expired_users = []
|
||||
last_access_items = safe_get_user_accounts_last_access_items()
|
||||
if last_access_items:
|
||||
task_items = safe_iter_task_status_items()
|
||||
active_user_ids = {int(info.get("user_id")) for _, info in task_items if info.get("user_id")}
|
||||
for user_id, last_access in last_access_items:
|
||||
if (current_time - float(last_access)) <= USER_ACCOUNTS_EXPIRE_SECONDS:
|
||||
continue
|
||||
if int(user_id) in active_user_ids:
|
||||
continue
|
||||
if safe_has_user(user_id):
|
||||
expired_users.append(int(user_id))
|
||||
deleted_ip_requests = cleanup_expired_ip_request_rates(current_time)
|
||||
if deleted_ip_requests:
|
||||
logger.debug(f"已清理 {deleted_ip_requests} 个过期IP请求频率记录")
|
||||
|
||||
login_cleanup_stats = cleanup_expired_login_security_state(current_time)
|
||||
login_cleanup_total = sum(int(v or 0) for v in login_cleanup_stats.values())
|
||||
if login_cleanup_total:
|
||||
logger.debug(
|
||||
"已清理登录风控缓存: "
|
||||
f"失败计数={login_cleanup_stats.get('failures', 0)}, "
|
||||
f"限流桶={login_cleanup_stats.get('rate_limits', 0)}, "
|
||||
f"扫描状态={login_cleanup_stats.get('scan_states', 0)}, "
|
||||
f"短时锁={login_cleanup_stats.get('ip_user_locks', 0)}, "
|
||||
f"告警状态={login_cleanup_stats.get('alerts', 0)}"
|
||||
)
|
||||
|
||||
active_user_ids = _collect_active_user_ids()
|
||||
expired_users = _find_expired_user_cache_ids(current_time, active_user_ids)
|
||||
for user_id in expired_users:
|
||||
safe_remove_user_accounts(user_id)
|
||||
if expired_users:
|
||||
logger.debug(f"已清理 {len(expired_users)} 个过期用户账号缓存")
|
||||
|
||||
completed_tasks = []
|
||||
for account_id, status_data in safe_iter_task_status_items():
|
||||
if status_data.get("status") in ["已完成", "失败", "已停止"]:
|
||||
start_time = float(status_data.get("start_time", 0) or 0)
|
||||
if (current_time - start_time) > 600: # 10分钟
|
||||
completed_tasks.append(account_id)
|
||||
for account_id in completed_tasks:
|
||||
completed_task_ids = _find_completed_task_status_ids(current_time)
|
||||
for account_id in completed_task_ids:
|
||||
safe_remove_task_status(account_id)
|
||||
if completed_tasks:
|
||||
logger.debug(f"已清理 {len(completed_tasks)} 个已完成任务状态")
|
||||
if completed_task_ids:
|
||||
logger.debug(f"已清理 {len(completed_task_ids)} 个已完成任务状态")
|
||||
|
||||
try:
|
||||
import os
|
||||
|
||||
while True:
|
||||
try:
|
||||
pid, status = os.waitpid(-1, os.WNOHANG)
|
||||
if pid == 0:
|
||||
break
|
||||
logger.debug(f"已回收僵尸进程: PID={pid}")
|
||||
except ChildProcessError:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
_reap_zombie_processes()
|
||||
|
||||
deleted_batches = safe_cleanup_expired_batches(BATCH_TASK_EXPIRE_SECONDS, current_time)
|
||||
if deleted_batches:
|
||||
@@ -95,52 +149,39 @@ def cleanup_expired_data() -> None:
|
||||
logger.debug(f"已清理 {deleted_random} 个过期随机延迟任务")
|
||||
|
||||
|
||||
def check_kdocs_online_status() -> None:
|
||||
"""检测金山文档登录状态,如果离线则发送邮件通知管理员(每次掉线只通知一次)"""
|
||||
global _kdocs_offline_notified
|
||||
def _load_kdocs_monitor_config():
|
||||
import database
|
||||
|
||||
cfg = database.get_system_config()
|
||||
if not cfg:
|
||||
return None
|
||||
|
||||
kdocs_enabled = _to_int(cfg.get("kdocs_enabled"), 0)
|
||||
if not kdocs_enabled:
|
||||
return None
|
||||
|
||||
admin_notify_enabled = _to_int(cfg.get("kdocs_admin_notify_enabled"), 0)
|
||||
admin_notify_email = str(cfg.get("kdocs_admin_notify_email") or "").strip()
|
||||
if (not admin_notify_enabled) or (not admin_notify_email):
|
||||
return None
|
||||
|
||||
return admin_notify_email
|
||||
|
||||
|
||||
def _is_kdocs_offline(status: dict) -> tuple[bool, bool, bool | None]:
|
||||
login_required = bool(status.get("login_required", False))
|
||||
last_login_ok = status.get("last_login_ok")
|
||||
is_offline = login_required or (last_login_ok is False)
|
||||
return is_offline, login_required, last_login_ok
|
||||
|
||||
|
||||
def _send_kdocs_offline_alert(admin_notify_email: str, *, login_required: bool, last_login_ok) -> bool:
|
||||
try:
|
||||
import database
|
||||
from services.kdocs_uploader import get_kdocs_uploader
|
||||
import email_service
|
||||
|
||||
# 获取系统配置
|
||||
cfg = database.get_system_config()
|
||||
if not cfg:
|
||||
return
|
||||
|
||||
# 检查是否启用了金山文档功能
|
||||
kdocs_enabled = int(cfg.get("kdocs_enabled") or 0)
|
||||
if not kdocs_enabled:
|
||||
return
|
||||
|
||||
# 检查是否启用了管理员通知
|
||||
admin_notify_enabled = int(cfg.get("kdocs_admin_notify_enabled") or 0)
|
||||
admin_notify_email = (cfg.get("kdocs_admin_notify_email") or "").strip()
|
||||
if not admin_notify_enabled or not admin_notify_email:
|
||||
return
|
||||
|
||||
# 获取金山文档状态
|
||||
kdocs = get_kdocs_uploader()
|
||||
status = kdocs.get_status()
|
||||
login_required = status.get("login_required", False)
|
||||
last_login_ok = status.get("last_login_ok")
|
||||
|
||||
# 如果需要登录或最后登录状态不是成功
|
||||
is_offline = login_required or (last_login_ok is False)
|
||||
|
||||
if is_offline:
|
||||
# 已经通知过了,不再重复通知
|
||||
if _kdocs_offline_notified:
|
||||
logger.debug("[KDocs监控] 金山文档离线,已通知过,跳过重复通知")
|
||||
return
|
||||
|
||||
# 发送邮件通知
|
||||
try:
|
||||
import email_service
|
||||
|
||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
subject = "【金山文档离线告警】需要重新登录"
|
||||
body = f"""
|
||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
subject = "【金山文档离线告警】需要重新登录"
|
||||
body = f"""
|
||||
您好,
|
||||
|
||||
系统检测到金山文档上传功能已离线,需要重新扫码登录。
|
||||
@@ -155,58 +196,92 @@ def check_kdocs_online_status() -> None:
|
||||
---
|
||||
此邮件由系统自动发送,请勿直接回复。
|
||||
"""
|
||||
email_service.send_email_async(
|
||||
to_email=admin_notify_email,
|
||||
subject=subject,
|
||||
body=body,
|
||||
email_type="kdocs_offline_alert",
|
||||
)
|
||||
_kdocs_offline_notified = True # 标记为已通知
|
||||
logger.warning(f"[KDocs监控] 金山文档离线,已发送通知邮件到 {admin_notify_email}")
|
||||
except Exception as e:
|
||||
logger.error(f"[KDocs监控] 发送离线通知邮件失败: {e}")
|
||||
else:
|
||||
# 恢复在线,重置通知状态
|
||||
email_service.send_email_async(
|
||||
to_email=admin_notify_email,
|
||||
subject=subject,
|
||||
body=body,
|
||||
email_type="kdocs_offline_alert",
|
||||
)
|
||||
logger.warning(f"[KDocs监控] 金山文档离线,已发送通知邮件到 {admin_notify_email}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[KDocs监控] 发送离线通知邮件失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def check_kdocs_online_status() -> None:
|
||||
"""检测金山文档登录状态,如果离线则发送邮件通知管理员(每次掉线只通知一次)"""
|
||||
global _kdocs_offline_notified
|
||||
|
||||
try:
|
||||
admin_notify_email = _load_kdocs_monitor_config()
|
||||
if not admin_notify_email:
|
||||
return
|
||||
|
||||
from services.kdocs_uploader import get_kdocs_uploader
|
||||
|
||||
kdocs = get_kdocs_uploader()
|
||||
status = kdocs.get_status() or {}
|
||||
is_offline, login_required, last_login_ok = _is_kdocs_offline(status)
|
||||
|
||||
if is_offline:
|
||||
if _kdocs_offline_notified:
|
||||
logger.info("[KDocs监控] 金山文档已恢复在线,重置通知状态")
|
||||
_kdocs_offline_notified = False
|
||||
logger.debug("[KDocs监控] 金山文档状态正常")
|
||||
logger.debug("[KDocs监控] 金山文档离线,已通知过,跳过重复通知")
|
||||
return
|
||||
|
||||
if _send_kdocs_offline_alert(
|
||||
admin_notify_email,
|
||||
login_required=login_required,
|
||||
last_login_ok=last_login_ok,
|
||||
):
|
||||
_kdocs_offline_notified = True
|
||||
return
|
||||
|
||||
if _kdocs_offline_notified:
|
||||
logger.info("[KDocs监控] 金山文档已恢复在线,重置通知状态")
|
||||
_kdocs_offline_notified = False
|
||||
logger.debug("[KDocs监控] 金山文档状态正常")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[KDocs监控] 检测失败: {e}")
|
||||
|
||||
|
||||
def start_cleanup_scheduler() -> None:
|
||||
"""启动定期清理调度器"""
|
||||
|
||||
def cleanup_loop():
|
||||
def _start_daemon_loop(name: str, *, startup_delay: float, interval_seconds: float, job, error_tag: str):
|
||||
def loop():
|
||||
if startup_delay > 0:
|
||||
time.sleep(startup_delay)
|
||||
while True:
|
||||
try:
|
||||
time.sleep(300) # 每5分钟执行一次清理
|
||||
cleanup_expired_data()
|
||||
job()
|
||||
time.sleep(interval_seconds)
|
||||
except Exception as e:
|
||||
logger.error(f"清理任务执行失败: {e}")
|
||||
logger.error(f"{error_tag}: {e}")
|
||||
time.sleep(min(60.0, max(1.0, interval_seconds / 5.0)))
|
||||
|
||||
cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True, name="cleanup-scheduler")
|
||||
cleanup_thread.start()
|
||||
thread = threading.Thread(target=loop, daemon=True, name=name)
|
||||
thread.start()
|
||||
return thread
|
||||
|
||||
|
||||
def start_cleanup_scheduler() -> None:
|
||||
"""启动定期清理调度器"""
|
||||
_start_daemon_loop(
|
||||
"cleanup-scheduler",
|
||||
startup_delay=300,
|
||||
interval_seconds=300,
|
||||
job=cleanup_expired_data,
|
||||
error_tag="清理任务执行失败",
|
||||
)
|
||||
logger.info("内存清理调度器已启动")
|
||||
|
||||
|
||||
def start_kdocs_monitor() -> None:
|
||||
"""启动金山文档状态监控"""
|
||||
|
||||
def monitor_loop():
|
||||
# 启动后等待 60 秒再开始检测(给系统初始化的时间)
|
||||
time.sleep(60)
|
||||
while True:
|
||||
try:
|
||||
check_kdocs_online_status()
|
||||
time.sleep(300) # 每5分钟检测一次
|
||||
except Exception as e:
|
||||
logger.error(f"[KDocs监控] 监控任务执行失败: {e}")
|
||||
time.sleep(60)
|
||||
|
||||
monitor_thread = threading.Thread(target=monitor_loop, daemon=True, name="kdocs-monitor")
|
||||
monitor_thread.start()
|
||||
_start_daemon_loop(
|
||||
"kdocs-monitor",
|
||||
startup_delay=60,
|
||||
interval_seconds=300,
|
||||
job=check_kdocs_online_status,
|
||||
error_tag="[KDocs监控] 监控任务执行失败",
|
||||
)
|
||||
logger.info("[KDocs监控] 金山文档状态监控已启动(每5分钟检测一次)")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user