refactor: optimize structure, stability and runtime performance

2026-02-07 00:35:11 +08:00
parent fae21329d7
commit bf29ac1924
44 changed files with 6894 additions and 4792 deletions
--- a/services/screenshots.py
+++ b/services/screenshots.py
@@ -6,12 +6,14 @@ import os
 import shutil
 import subprocess
 import time
+from urllib.parse import urlsplit

 import database
 import email_service
 from api_browser import APIBrowser, get_cookie_jar_path, is_cookie_jar_fresh
 from app_config import get_config
 from app_logger import get_logger
+from app_security import sanitize_filename
 from browser_pool_worker import get_browser_worker_pool
 from services.client_log import log_to_client
 from services.runtime import get_socketio
@@ -194,6 +196,293 @@ def _emit(event: str, data: object, *, room: str | None = None) -> None:
        pass


+def _set_screenshot_running_status(user_id: int, account_id: str) -> None:
+    """更新账号状态为截图中。"""
+    acc = safe_get_account(user_id, account_id)
+    if not acc:
+        return
+    acc.status = "截图中"
+    safe_update_task_status(account_id, {"status": "运行中", "detail_status": "正在截图"})
+    _emit("account_update", acc.to_dict(), room=f"user_{user_id}")
+
+
+def _get_worker_display_info(browser_instance) -> tuple[str, int]:
+    """获取截图 worker 的展示信息。"""
+    if isinstance(browser_instance, dict):
+        return str(browser_instance.get("worker_id", "?")), int(browser_instance.get("use_count", 0) or 0)
+    return "?", 0
+
+
+def _get_proxy_context(account) -> tuple[dict | None, str | None]:
+    """提取截图阶段代理配置。"""
+    proxy_config = account.proxy_config if hasattr(account, "proxy_config") else None
+    proxy_server = proxy_config.get("server") if proxy_config else None
+    return proxy_config, proxy_server
+
+
+def _build_screenshot_targets(browse_type: str) -> tuple[str, str, str]:
+    """构建截图目标 URL 与页面脚本。"""
+    parsed = urlsplit(config.ZSGL_LOGIN_URL)
+    base = f"{parsed.scheme}://{parsed.netloc}"
+    if "注册前" in str(browse_type):
+        bz = 0
+    else:
+        bz = 0
+
+    target_url = f"{base}/admin/center.aspx?bz={bz}"
+    index_url = config.ZSGL_INDEX_URL or f"{base}/admin/index.aspx"
+    run_script = (
+        "(function(){"
+        "function done(){window.status='ready';}"
+        "function ensureNav(){try{if(typeof loadMenuTree==='function'){loadMenuTree(true);}}catch(e){}}"
+        "function expandMenu(){"
+        "try{var body=document.body;if(body&&body.classList.contains('lay-mini')){body.classList.remove('lay-mini');}}catch(e){}"
+        "try{if(typeof mainPageResize==='function'){mainPageResize();}}catch(e){}"
+        "try{if(typeof toggleMainMenu==='function' && document.body && document.body.classList.contains('lay-mini')){toggleMainMenu();}}catch(e){}"
+        "try{var navRight=document.querySelector('.nav-right');if(navRight){navRight.style.display='block';}}catch(e){}"
+        "try{var mainNav=document.getElementById('main-nav');if(mainNav){mainNav.style.display='block';}}catch(e){}"
+        "}"
+        "function navReady(){"
+        "try{var nav=document.getElementById('sidebar-nav');return nav && nav.querySelectorAll('a').length>0;}catch(e){return false;}"
+        "}"
+        "function frameReady(){"
+        "try{var f=document.getElementById('mainframe');return f && f.contentDocument && f.contentDocument.readyState==='complete';}catch(e){return false;}"
+        "}"
+        "function check(){"
+        "if(navReady() && frameReady()){done();return;}"
+        "setTimeout(check,300);"
+        "}"
+        "var f=document.getElementById('mainframe');"
+        "ensureNav();"
+        "expandMenu();"
+        "if(!f){done();return;}"
+        f"f.src='{target_url}';"
+        "f.onload=function(){ensureNav();expandMenu();setTimeout(check,300);};"
+        "setTimeout(check,5000);"
+        "})();"
+    )
+    return index_url, target_url, run_script
+
+
+def _build_screenshot_output_path(username_prefix: str, account, browse_type: str) -> tuple[str, str]:
+    """构建截图输出文件名与路径。"""
+    timestamp = get_beijing_now().strftime("%Y%m%d_%H%M%S")
+    login_account = account.remark if account.remark else account.username
+    raw_filename = f"{username_prefix}_{login_account}_{browse_type}_{timestamp}.jpg"
+    screenshot_filename = sanitize_filename(raw_filename)
+    return screenshot_filename, os.path.join(SCREENSHOTS_DIR, screenshot_filename)
+
+
+def _ensure_screenshot_login_state(
+    *,
+    account,
+    proxy_config,
+    cookie_path: str,
+    attempt: int,
+    max_retries: int,
+    user_id: int,
+    account_id: str,
+    custom_log,
+) -> str:
+    """确保截图前登录态有效。返回: ok/retry/fail。"""
+    should_refresh_login = not is_cookie_jar_fresh(cookie_path)
+    if not should_refresh_login:
+        return "ok"
+
+    log_to_client("正在刷新登录态...", user_id, account_id)
+    if _ensure_login_cookies(account, proxy_config, custom_log):
+        return "ok"
+
+    if attempt > 1:
+        log_to_client("截图登录失败", user_id, account_id)
+        if attempt < max_retries:
+            log_to_client("将重试...", user_id, account_id)
+            time.sleep(2)
+            return "retry"
+
+    log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
+    return "fail"
+
+
+def _take_screenshot_once(
+    *,
+    index_url: str,
+    target_url: str,
+    screenshot_path: str,
+    cookie_path: str,
+    proxy_server: str | None,
+    run_script: str,
+    log_callback,
+) -> str:
+    """执行一次截图尝试并验证输出文件。返回: success/invalid/failed。"""
+    cookies_for_shot = cookie_path if is_cookie_jar_fresh(cookie_path) else None
+
+    attempts = [
+        {
+            "url": index_url,
+            "run_script": run_script,
+            "window_status": "ready",
+        },
+        {
+            "url": target_url,
+            "run_script": None,
+            "window_status": None,
+        },
+    ]
+
+    ok = False
+    for shot in attempts:
+        ok = take_screenshot_wkhtmltoimage(
+            shot["url"],
+            screenshot_path,
+            cookies_path=cookies_for_shot,
+            proxy_server=proxy_server,
+            run_script=shot["run_script"],
+            window_status=shot["window_status"],
+            log_callback=log_callback,
+        )
+        if ok:
+            break
+
+    if not ok:
+        return "failed"
+
+    if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 1000:
+        return "success"
+
+    if os.path.exists(screenshot_path):
+        os.remove(screenshot_path)
+    return "invalid"
+
+
+def _get_result_screenshot_path(result) -> str | None:
+    """从截图结果中提取截图文件绝对路径。"""
+    if result and result.get("success") and result.get("filename"):
+        return os.path.join(SCREENSHOTS_DIR, result["filename"])
+    return None
+
+
+def _enqueue_kdocs_upload_if_needed(user_id: int, account_id: str, account, screenshot_path: str | None) -> None:
+    """按配置提交金山文档上传任务。"""
+    if not screenshot_path:
+        return
+
+    cfg = database.get_system_config() or {}
+    if int(cfg.get("kdocs_enabled", 0) or 0) != 1:
+        return
+
+    doc_url = (cfg.get("kdocs_doc_url") or "").strip()
+    if not doc_url:
+        return
+
+    user_cfg = database.get_user_kdocs_settings(user_id) or {}
+    if int(user_cfg.get("kdocs_auto_upload", 0) or 0) != 1:
+        return
+
+    unit = (user_cfg.get("kdocs_unit") or cfg.get("kdocs_default_unit") or "").strip()
+    name = (account.remark or "").strip()
+    if not unit:
+        log_to_client("表格上传跳过: 未配置县区", user_id, account_id)
+        return
+    if not name:
+        log_to_client("表格上传跳过: 账号备注为空", user_id, account_id)
+        return
+
+    from services.kdocs_uploader import get_kdocs_uploader
+
+    ok = get_kdocs_uploader().enqueue_upload(
+        user_id=user_id,
+        account_id=account_id,
+        unit=unit,
+        name=name,
+        image_path=screenshot_path,
+    )
+    if not ok:
+        log_to_client("表格上传排队失败: 队列已满", user_id, account_id)
+
+
+def _dispatch_screenshot_result(
+    *,
+    user_id: int,
+    account_id: str,
+    source: str,
+    browse_type: str,
+    browse_result: dict,
+    result,
+    account,
+    user_info,
+) -> None:
+    """将截图结果发送到批次统计/邮件通知链路。"""
+    batch_id = _get_batch_id_from_source(source)
+    screenshot_path = _get_result_screenshot_path(result)
+    account_name = account.remark if account.remark else account.username
+
+    try:
+        if result and result.get("success") and screenshot_path:
+            _enqueue_kdocs_upload_if_needed(user_id, account_id, account, screenshot_path)
+    except Exception as kdocs_error:
+        logger.warning(f"表格上传任务提交失败: {kdocs_error}")
+
+    if batch_id:
+        _batch_task_record_result(
+            batch_id=batch_id,
+            account_name=account_name,
+            screenshot_path=screenshot_path,
+            total_items=browse_result.get("total_items", 0),
+            total_attachments=browse_result.get("total_attachments", 0),
+        )
+        return
+
+    if source and source.startswith("user_scheduled"):
+        if user_info and user_info.get("email") and database.get_user_email_notify(user_id):
+            email_service.send_task_complete_email_async(
+                user_id=user_id,
+                email=user_info["email"],
+                username=user_info["username"],
+                account_name=account_name,
+                browse_type=browse_type,
+                total_items=browse_result.get("total_items", 0),
+                total_attachments=browse_result.get("total_attachments", 0),
+                screenshot_path=screenshot_path,
+                log_callback=lambda msg: log_to_client(msg, user_id, account_id),
+            )
+
+
+def _finalize_screenshot_callback_state(user_id: int, account_id: str, account) -> None:
+    """截图回调的通用收尾状态变更。"""
+    account.is_running = False
+    account.status = "未开始"
+    safe_remove_task_status(account_id)
+    _emit("account_update", account.to_dict(), room=f"user_{user_id}")
+
+
+def _persist_browse_log_after_screenshot(
+    *,
+    user_id: int,
+    account_id: str,
+    account,
+    browse_type: str,
+    source: str,
+    task_start_time,
+    browse_result,
+) -> None:
+    """截图完成后写入任务日志（浏览完成日志）。"""
+    import time as time_module
+
+    total_elapsed = int(time_module.time() - task_start_time)
+    database.create_task_log(
+        user_id=user_id,
+        account_id=account_id,
+        username=account.username,
+        browse_type=browse_type,
+        status="success",
+        total_items=browse_result.get("total_items", 0),
+        total_attachments=browse_result.get("total_attachments", 0),
+        duration=total_elapsed,
+        source=source,
+    )
+
+
 def take_screenshot_for_account(
    user_id,
    account_id,
@@ -213,21 +502,21 @@ def take_screenshot_for_account(
    # 标记账号正在截图（防止重复提交截图任务）
    account.is_running = True

+
+    user_info = database.get_user_by_id(user_id)
+    username_prefix = user_info["username"] if user_info else f"user{user_id}"
+
    def screenshot_task(
        browser_instance, user_id, account_id, account, browse_type, source, task_start_time, browse_result
    ):
        """在worker线程中执行的截图任务"""
        # ✅ 获得worker后，立即更新状态为"截图中"
-        acc = safe_get_account(user_id, account_id)
-        if acc:
-            acc.status = "截图中"
-            safe_update_task_status(account_id, {"status": "运行中", "detail_status": "正在截图"})
-            _emit("account_update", acc.to_dict(), room=f"user_{user_id}")
+        _set_screenshot_running_status(user_id, account_id)

        max_retries = 3
-        proxy_config = account.proxy_config if hasattr(account, "proxy_config") else None
-        proxy_server = proxy_config.get("server") if proxy_config else None
+        proxy_config, proxy_server = _get_proxy_context(account)
        cookie_path = get_cookie_jar_path(account.username)
+        index_url, target_url, run_script = _build_screenshot_targets(browse_type)

        for attempt in range(1, max_retries + 1):
            try:
@@ -239,8 +528,7 @@ def take_screenshot_for_account(
                if attempt > 1:
                    log_to_client(f"🔄 第 {attempt} 次截图尝试...", user_id, account_id)

-                worker_id = browser_instance.get("worker_id", "?") if isinstance(browser_instance, dict) else "?"
-                use_count = browser_instance.get("use_count", 0) if isinstance(browser_instance, dict) else 0
+                worker_id, use_count = _get_worker_display_info(browser_instance)
                log_to_client(
                    f"使用Worker-{worker_id}执行截图（已执行{use_count}次）",
                    user_id,
@@ -250,99 +538,39 @@ def take_screenshot_for_account(
                def custom_log(message: str):
                    log_to_client(message, user_id, account_id)

-                # 智能登录状态检查：只在必要时才刷新登录
-                should_refresh_login = not is_cookie_jar_fresh(cookie_path)
-                if should_refresh_login and attempt > 1:
-                    # 重试时刷新登录（attempt > 1 表示第2次及以后的尝试）
-                    log_to_client("正在刷新登录态...", user_id, account_id)
-                    if not _ensure_login_cookies(account, proxy_config, custom_log):
-                        log_to_client("截图登录失败", user_id, account_id)
-                        if attempt < max_retries:
-                            log_to_client("将重试...", user_id, account_id)
-                            time.sleep(2)
-                            continue
-                        log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
-                        return {"success": False, "error": "登录失败"}
-                elif should_refresh_login:
-                    # 首次尝试时快速检查登录状态
-                    log_to_client("正在刷新登录态...", user_id, account_id)
-                    if not _ensure_login_cookies(account, proxy_config, custom_log):
-                        log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
-                        return {"success": False, "error": "登录失败"}
+                login_state = _ensure_screenshot_login_state(
+                    account=account,
+                    proxy_config=proxy_config,
+                    cookie_path=cookie_path,
+                    attempt=attempt,
+                    max_retries=max_retries,
+                    user_id=user_id,
+                    account_id=account_id,
+                    custom_log=custom_log,
+                )
+                if login_state == "retry":
+                    continue
+                if login_state == "fail":
+                    return {"success": False, "error": "登录失败"}

                log_to_client(f"导航到 '{browse_type}' 页面...", user_id, account_id)

-                from urllib.parse import urlsplit
-
-                parsed = urlsplit(config.ZSGL_LOGIN_URL)
-                base = f"{parsed.scheme}://{parsed.netloc}"
-                if "注册前" in str(browse_type):
-                    bz = 0
-                else:
-                    bz = 0  # 应读（网站更新后 bz=0 为应读）
-                target_url = f"{base}/admin/center.aspx?bz={bz}"
-                index_url = config.ZSGL_INDEX_URL or f"{base}/admin/index.aspx"
-                run_script = (
-                    "(function(){"
-                    "function done(){window.status='ready';}"
-                    "function ensureNav(){try{if(typeof loadMenuTree==='function'){loadMenuTree(true);}}catch(e){}}"
-                    "function expandMenu(){"
-                    "try{var body=document.body;if(body&&body.classList.contains('lay-mini')){body.classList.remove('lay-mini');}}catch(e){}"
-                    "try{if(typeof mainPageResize==='function'){mainPageResize();}}catch(e){}"
-                    "try{if(typeof toggleMainMenu==='function' && document.body && document.body.classList.contains('lay-mini')){toggleMainMenu();}}catch(e){}"
-                    "try{var navRight=document.querySelector('.nav-right');if(navRight){navRight.style.display='block';}}catch(e){}"
-                    "try{var mainNav=document.getElementById('main-nav');if(mainNav){mainNav.style.display='block';}}catch(e){}"
-                    "}"
-                    "function navReady(){"
-                    "try{var nav=document.getElementById('sidebar-nav');return nav && nav.querySelectorAll('a').length>0;}catch(e){return false;}"
-                    "}"
-                    "function frameReady(){"
-                    "try{var f=document.getElementById('mainframe');return f && f.contentDocument && f.contentDocument.readyState==='complete';}catch(e){return false;}"
-                    "}"
-                    "function check(){"
-                    "if(navReady() && frameReady()){done();return;}"
-                    "setTimeout(check,300);"
-                    "}"
-                    "var f=document.getElementById('mainframe');"
-                    "ensureNav();"
-                    "expandMenu();"
-                    "if(!f){done();return;}"
-                    f"f.src='{target_url}';"
-                    "f.onload=function(){ensureNav();expandMenu();setTimeout(check,300);};"
-                    "setTimeout(check,5000);"
-                    "})();"
-                )
-
-                timestamp = get_beijing_now().strftime("%Y%m%d_%H%M%S")
-
-                user_info = database.get_user_by_id(user_id)
-                username_prefix = user_info["username"] if user_info else f"user{user_id}"
-                login_account = account.remark if account.remark else account.username
-                screenshot_filename = f"{username_prefix}_{login_account}_{browse_type}_{timestamp}.jpg"
-                screenshot_path = os.path.join(SCREENSHOTS_DIR, screenshot_filename)
-
-                cookies_for_shot = cookie_path if is_cookie_jar_fresh(cookie_path) else None
-                if take_screenshot_wkhtmltoimage(
-                    index_url,
-                    screenshot_path,
-                    cookies_path=cookies_for_shot,
+                screenshot_filename, screenshot_path = _build_screenshot_output_path(username_prefix, account, browse_type)
+                shot_state = _take_screenshot_once(
+                    index_url=index_url,
+                    target_url=target_url,
+                    screenshot_path=screenshot_path,
+                    cookie_path=cookie_path,
                    proxy_server=proxy_server,
                    run_script=run_script,
-                    window_status="ready",
                    log_callback=custom_log,
-                ) or take_screenshot_wkhtmltoimage(
-                    target_url,
-                    screenshot_path,
-                    cookies_path=cookies_for_shot,
-                    proxy_server=proxy_server,
-                    log_callback=custom_log,
-                ):
-                    if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 1000:
-                        log_to_client(f"[OK] 截图成功: {screenshot_filename}", user_id, account_id)
-                        return {"success": True, "filename": screenshot_filename}
+                )
+                if shot_state == "success":
+                    log_to_client(f"[OK] 截图成功: {screenshot_filename}", user_id, account_id)
+                    return {"success": True, "filename": screenshot_filename}
+
+                if shot_state == "invalid":
                    log_to_client("截图文件异常,将重试", user_id, account_id)
-                    if os.path.exists(screenshot_path):
-                        os.remove(screenshot_path)
                else:
                    log_to_client("截图保存失败", user_id, account_id)

@@ -361,12 +589,7 @@ def take_screenshot_for_account(
    def screenshot_callback(result, error):
        """截图完成回调"""
        try:
-            account.is_running = False
-            account.status = "未开始"
-
-            safe_remove_task_status(account_id)
-
-            _emit("account_update", account.to_dict(), room=f"user_{user_id}")
+            _finalize_screenshot_callback_state(user_id, account_id, account)

            if error:
                log_to_client(f"❌ 截图失败: {error}", user_id, account_id)
@@ -375,84 +598,27 @@ def take_screenshot_for_account(
                log_to_client(f"❌ 截图失败: {error_msg}", user_id, account_id)

            if task_start_time and browse_result:
-                import time as time_module
-
-                total_elapsed = int(time_module.time() - task_start_time)
-                database.create_task_log(
+                _persist_browse_log_after_screenshot(
                    user_id=user_id,
                    account_id=account_id,
-                    username=account.username,
+                    account=account,
                    browse_type=browse_type,
-                    status="success",
-                    total_items=browse_result.get("total_items", 0),
-                    total_attachments=browse_result.get("total_attachments", 0),
-                    duration=total_elapsed,
                    source=source,
+                    task_start_time=task_start_time,
+                    browse_result=browse_result,
                )

                try:
-                    batch_id = _get_batch_id_from_source(source)
-
-                    screenshot_path = None
-                    if result and result.get("success") and result.get("filename"):
-                        screenshot_path = os.path.join(SCREENSHOTS_DIR, result["filename"])
-
-                    account_name = account.remark if account.remark else account.username
-
-                    try:
-                        if screenshot_path and result and result.get("success"):
-                            cfg = database.get_system_config() or {}
-                            if int(cfg.get("kdocs_enabled", 0) or 0) == 1:
-                                doc_url = (cfg.get("kdocs_doc_url") or "").strip()
-                                if doc_url:
-                                    user_cfg = database.get_user_kdocs_settings(user_id) or {}
-                                    if int(user_cfg.get("kdocs_auto_upload", 0) or 0) == 1:
-                                        unit = (
-                                            user_cfg.get("kdocs_unit") or cfg.get("kdocs_default_unit") or ""
-                                        ).strip()
-                                        name = (account.remark or "").strip()
-                                        if unit and name:
-                                            from services.kdocs_uploader import get_kdocs_uploader
-
-                                            ok = get_kdocs_uploader().enqueue_upload(
-                                                user_id=user_id,
-                                                account_id=account_id,
-                                                unit=unit,
-                                                name=name,
-                                                image_path=screenshot_path,
-                                            )
-                                            if not ok:
-                                                log_to_client("表格上传排队失败: 队列已满", user_id, account_id)
-                                        else:
-                                            if not unit:
-                                                log_to_client("表格上传跳过: 未配置县区", user_id, account_id)
-                                            if not name:
-                                                log_to_client("表格上传跳过: 账号备注为空", user_id, account_id)
-                    except Exception as kdocs_error:
-                        logger.warning(f"表格上传任务提交失败: {kdocs_error}")
-
-                    if batch_id:
-                        _batch_task_record_result(
-                            batch_id=batch_id,
-                            account_name=account_name,
-                            screenshot_path=screenshot_path,
-                            total_items=browse_result.get("total_items", 0),
-                            total_attachments=browse_result.get("total_attachments", 0),
-                        )
-                    elif source and source.startswith("user_scheduled"):
-                        user_info = database.get_user_by_id(user_id)
-                        if user_info and user_info.get("email") and database.get_user_email_notify(user_id):
-                            email_service.send_task_complete_email_async(
-                                user_id=user_id,
-                                email=user_info["email"],
-                                username=user_info["username"],
-                                account_name=account_name,
-                                browse_type=browse_type,
-                                total_items=browse_result.get("total_items", 0),
-                                total_attachments=browse_result.get("total_attachments", 0),
-                                screenshot_path=screenshot_path,
-                                log_callback=lambda msg: log_to_client(msg, user_id, account_id),
-                            )
+                    _dispatch_screenshot_result(
+                        user_id=user_id,
+                        account_id=account_id,
+                        source=source,
+                        browse_type=browse_type,
+                        browse_result=browse_result,
+                        result=result,
+                        account=account,
+                        user_info=user_info,
+                    )
                except Exception as email_error:
                    logger.warning(f"发送任务完成邮件失败: {email_error}")
        except Exception as e: