refactor: optimize structure, stability and runtime performance

This commit is contained in:
2026-02-07 00:35:11 +08:00
parent fae21329d7
commit bf29ac1924
44 changed files with 6894 additions and 4792 deletions

View File

@@ -6,12 +6,14 @@ import os
import shutil
import subprocess
import time
from urllib.parse import urlsplit
import database
import email_service
from api_browser import APIBrowser, get_cookie_jar_path, is_cookie_jar_fresh
from app_config import get_config
from app_logger import get_logger
from app_security import sanitize_filename
from browser_pool_worker import get_browser_worker_pool
from services.client_log import log_to_client
from services.runtime import get_socketio
@@ -194,6 +196,293 @@ def _emit(event: str, data: object, *, room: str | None = None) -> None:
pass
def _set_screenshot_running_status(user_id: int, account_id: str) -> None:
"""更新账号状态为截图中。"""
acc = safe_get_account(user_id, account_id)
if not acc:
return
acc.status = "截图中"
safe_update_task_status(account_id, {"status": "运行中", "detail_status": "正在截图"})
_emit("account_update", acc.to_dict(), room=f"user_{user_id}")
def _get_worker_display_info(browser_instance) -> tuple[str, int]:
"""获取截图 worker 的展示信息。"""
if isinstance(browser_instance, dict):
return str(browser_instance.get("worker_id", "?")), int(browser_instance.get("use_count", 0) or 0)
return "?", 0
def _get_proxy_context(account) -> tuple[dict | None, str | None]:
"""提取截图阶段代理配置。"""
proxy_config = account.proxy_config if hasattr(account, "proxy_config") else None
proxy_server = proxy_config.get("server") if proxy_config else None
return proxy_config, proxy_server
def _build_screenshot_targets(browse_type: str) -> tuple[str, str, str]:
"""构建截图目标 URL 与页面脚本。"""
parsed = urlsplit(config.ZSGL_LOGIN_URL)
base = f"{parsed.scheme}://{parsed.netloc}"
if "注册前" in str(browse_type):
bz = 0
else:
bz = 0
target_url = f"{base}/admin/center.aspx?bz={bz}"
index_url = config.ZSGL_INDEX_URL or f"{base}/admin/index.aspx"
run_script = (
"(function(){"
"function done(){window.status='ready';}"
"function ensureNav(){try{if(typeof loadMenuTree==='function'){loadMenuTree(true);}}catch(e){}}"
"function expandMenu(){"
"try{var body=document.body;if(body&&body.classList.contains('lay-mini')){body.classList.remove('lay-mini');}}catch(e){}"
"try{if(typeof mainPageResize==='function'){mainPageResize();}}catch(e){}"
"try{if(typeof toggleMainMenu==='function' && document.body && document.body.classList.contains('lay-mini')){toggleMainMenu();}}catch(e){}"
"try{var navRight=document.querySelector('.nav-right');if(navRight){navRight.style.display='block';}}catch(e){}"
"try{var mainNav=document.getElementById('main-nav');if(mainNav){mainNav.style.display='block';}}catch(e){}"
"}"
"function navReady(){"
"try{var nav=document.getElementById('sidebar-nav');return nav && nav.querySelectorAll('a').length>0;}catch(e){return false;}"
"}"
"function frameReady(){"
"try{var f=document.getElementById('mainframe');return f && f.contentDocument && f.contentDocument.readyState==='complete';}catch(e){return false;}"
"}"
"function check(){"
"if(navReady() && frameReady()){done();return;}"
"setTimeout(check,300);"
"}"
"var f=document.getElementById('mainframe');"
"ensureNav();"
"expandMenu();"
"if(!f){done();return;}"
f"f.src='{target_url}';"
"f.onload=function(){ensureNav();expandMenu();setTimeout(check,300);};"
"setTimeout(check,5000);"
"})();"
)
return index_url, target_url, run_script
def _build_screenshot_output_path(username_prefix: str, account, browse_type: str) -> tuple[str, str]:
"""构建截图输出文件名与路径。"""
timestamp = get_beijing_now().strftime("%Y%m%d_%H%M%S")
login_account = account.remark if account.remark else account.username
raw_filename = f"{username_prefix}_{login_account}_{browse_type}_{timestamp}.jpg"
screenshot_filename = sanitize_filename(raw_filename)
return screenshot_filename, os.path.join(SCREENSHOTS_DIR, screenshot_filename)
def _ensure_screenshot_login_state(
*,
account,
proxy_config,
cookie_path: str,
attempt: int,
max_retries: int,
user_id: int,
account_id: str,
custom_log,
) -> str:
"""确保截图前登录态有效。返回: ok/retry/fail。"""
should_refresh_login = not is_cookie_jar_fresh(cookie_path)
if not should_refresh_login:
return "ok"
log_to_client("正在刷新登录态...", user_id, account_id)
if _ensure_login_cookies(account, proxy_config, custom_log):
return "ok"
if attempt > 1:
log_to_client("截图登录失败", user_id, account_id)
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
return "retry"
log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
return "fail"
def _take_screenshot_once(
*,
index_url: str,
target_url: str,
screenshot_path: str,
cookie_path: str,
proxy_server: str | None,
run_script: str,
log_callback,
) -> str:
"""执行一次截图尝试并验证输出文件。返回: success/invalid/failed。"""
cookies_for_shot = cookie_path if is_cookie_jar_fresh(cookie_path) else None
attempts = [
{
"url": index_url,
"run_script": run_script,
"window_status": "ready",
},
{
"url": target_url,
"run_script": None,
"window_status": None,
},
]
ok = False
for shot in attempts:
ok = take_screenshot_wkhtmltoimage(
shot["url"],
screenshot_path,
cookies_path=cookies_for_shot,
proxy_server=proxy_server,
run_script=shot["run_script"],
window_status=shot["window_status"],
log_callback=log_callback,
)
if ok:
break
if not ok:
return "failed"
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 1000:
return "success"
if os.path.exists(screenshot_path):
os.remove(screenshot_path)
return "invalid"
def _get_result_screenshot_path(result) -> str | None:
"""从截图结果中提取截图文件绝对路径。"""
if result and result.get("success") and result.get("filename"):
return os.path.join(SCREENSHOTS_DIR, result["filename"])
return None
def _enqueue_kdocs_upload_if_needed(user_id: int, account_id: str, account, screenshot_path: str | None) -> None:
"""按配置提交金山文档上传任务。"""
if not screenshot_path:
return
cfg = database.get_system_config() or {}
if int(cfg.get("kdocs_enabled", 0) or 0) != 1:
return
doc_url = (cfg.get("kdocs_doc_url") or "").strip()
if not doc_url:
return
user_cfg = database.get_user_kdocs_settings(user_id) or {}
if int(user_cfg.get("kdocs_auto_upload", 0) or 0) != 1:
return
unit = (user_cfg.get("kdocs_unit") or cfg.get("kdocs_default_unit") or "").strip()
name = (account.remark or "").strip()
if not unit:
log_to_client("表格上传跳过: 未配置县区", user_id, account_id)
return
if not name:
log_to_client("表格上传跳过: 账号备注为空", user_id, account_id)
return
from services.kdocs_uploader import get_kdocs_uploader
ok = get_kdocs_uploader().enqueue_upload(
user_id=user_id,
account_id=account_id,
unit=unit,
name=name,
image_path=screenshot_path,
)
if not ok:
log_to_client("表格上传排队失败: 队列已满", user_id, account_id)
def _dispatch_screenshot_result(
*,
user_id: int,
account_id: str,
source: str,
browse_type: str,
browse_result: dict,
result,
account,
user_info,
) -> None:
"""将截图结果发送到批次统计/邮件通知链路。"""
batch_id = _get_batch_id_from_source(source)
screenshot_path = _get_result_screenshot_path(result)
account_name = account.remark if account.remark else account.username
try:
if result and result.get("success") and screenshot_path:
_enqueue_kdocs_upload_if_needed(user_id, account_id, account, screenshot_path)
except Exception as kdocs_error:
logger.warning(f"表格上传任务提交失败: {kdocs_error}")
if batch_id:
_batch_task_record_result(
batch_id=batch_id,
account_name=account_name,
screenshot_path=screenshot_path,
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
)
return
if source and source.startswith("user_scheduled"):
if user_info and user_info.get("email") and database.get_user_email_notify(user_id):
email_service.send_task_complete_email_async(
user_id=user_id,
email=user_info["email"],
username=user_info["username"],
account_name=account_name,
browse_type=browse_type,
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
screenshot_path=screenshot_path,
log_callback=lambda msg: log_to_client(msg, user_id, account_id),
)
def _finalize_screenshot_callback_state(user_id: int, account_id: str, account) -> None:
"""截图回调的通用收尾状态变更。"""
account.is_running = False
account.status = "未开始"
safe_remove_task_status(account_id)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
def _persist_browse_log_after_screenshot(
*,
user_id: int,
account_id: str,
account,
browse_type: str,
source: str,
task_start_time,
browse_result,
) -> None:
"""截图完成后写入任务日志(浏览完成日志)。"""
import time as time_module
total_elapsed = int(time_module.time() - task_start_time)
database.create_task_log(
user_id=user_id,
account_id=account_id,
username=account.username,
browse_type=browse_type,
status="success",
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
duration=total_elapsed,
source=source,
)
def take_screenshot_for_account(
user_id,
account_id,
@@ -213,21 +502,21 @@ def take_screenshot_for_account(
# 标记账号正在截图(防止重复提交截图任务)
account.is_running = True
user_info = database.get_user_by_id(user_id)
username_prefix = user_info["username"] if user_info else f"user{user_id}"
def screenshot_task(
browser_instance, user_id, account_id, account, browse_type, source, task_start_time, browse_result
):
"""在worker线程中执行的截图任务"""
# ✅ 获得worker后立即更新状态为"截图中"
acc = safe_get_account(user_id, account_id)
if acc:
acc.status = "截图中"
safe_update_task_status(account_id, {"status": "运行中", "detail_status": "正在截图"})
_emit("account_update", acc.to_dict(), room=f"user_{user_id}")
_set_screenshot_running_status(user_id, account_id)
max_retries = 3
proxy_config = account.proxy_config if hasattr(account, "proxy_config") else None
proxy_server = proxy_config.get("server") if proxy_config else None
proxy_config, proxy_server = _get_proxy_context(account)
cookie_path = get_cookie_jar_path(account.username)
index_url, target_url, run_script = _build_screenshot_targets(browse_type)
for attempt in range(1, max_retries + 1):
try:
@@ -239,8 +528,7 @@ def take_screenshot_for_account(
if attempt > 1:
log_to_client(f"🔄 第 {attempt} 次截图尝试...", user_id, account_id)
worker_id = browser_instance.get("worker_id", "?") if isinstance(browser_instance, dict) else "?"
use_count = browser_instance.get("use_count", 0) if isinstance(browser_instance, dict) else 0
worker_id, use_count = _get_worker_display_info(browser_instance)
log_to_client(
f"使用Worker-{worker_id}执行截图(已执行{use_count}次)",
user_id,
@@ -250,99 +538,39 @@ def take_screenshot_for_account(
def custom_log(message: str):
log_to_client(message, user_id, account_id)
# 智能登录状态检查:只在必要时才刷新登录
should_refresh_login = not is_cookie_jar_fresh(cookie_path)
if should_refresh_login and attempt > 1:
# 重试时刷新登录attempt > 1 表示第2次及以后的尝试
log_to_client("正在刷新登录态...", user_id, account_id)
if not _ensure_login_cookies(account, proxy_config, custom_log):
log_to_client("截图登录失败", user_id, account_id)
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
continue
log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
return {"success": False, "error": "登录失败"}
elif should_refresh_login:
# 首次尝试时快速检查登录状态
log_to_client("正在刷新登录态...", user_id, account_id)
if not _ensure_login_cookies(account, proxy_config, custom_log):
log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
return {"success": False, "error": "登录失败"}
login_state = _ensure_screenshot_login_state(
account=account,
proxy_config=proxy_config,
cookie_path=cookie_path,
attempt=attempt,
max_retries=max_retries,
user_id=user_id,
account_id=account_id,
custom_log=custom_log,
)
if login_state == "retry":
continue
if login_state == "fail":
return {"success": False, "error": "登录失败"}
log_to_client(f"导航到 '{browse_type}' 页面...", user_id, account_id)
from urllib.parse import urlsplit
parsed = urlsplit(config.ZSGL_LOGIN_URL)
base = f"{parsed.scheme}://{parsed.netloc}"
if "注册前" in str(browse_type):
bz = 0
else:
bz = 0 # 应读(网站更新后 bz=0 为应读)
target_url = f"{base}/admin/center.aspx?bz={bz}"
index_url = config.ZSGL_INDEX_URL or f"{base}/admin/index.aspx"
run_script = (
"(function(){"
"function done(){window.status='ready';}"
"function ensureNav(){try{if(typeof loadMenuTree==='function'){loadMenuTree(true);}}catch(e){}}"
"function expandMenu(){"
"try{var body=document.body;if(body&&body.classList.contains('lay-mini')){body.classList.remove('lay-mini');}}catch(e){}"
"try{if(typeof mainPageResize==='function'){mainPageResize();}}catch(e){}"
"try{if(typeof toggleMainMenu==='function' && document.body && document.body.classList.contains('lay-mini')){toggleMainMenu();}}catch(e){}"
"try{var navRight=document.querySelector('.nav-right');if(navRight){navRight.style.display='block';}}catch(e){}"
"try{var mainNav=document.getElementById('main-nav');if(mainNav){mainNav.style.display='block';}}catch(e){}"
"}"
"function navReady(){"
"try{var nav=document.getElementById('sidebar-nav');return nav && nav.querySelectorAll('a').length>0;}catch(e){return false;}"
"}"
"function frameReady(){"
"try{var f=document.getElementById('mainframe');return f && f.contentDocument && f.contentDocument.readyState==='complete';}catch(e){return false;}"
"}"
"function check(){"
"if(navReady() && frameReady()){done();return;}"
"setTimeout(check,300);"
"}"
"var f=document.getElementById('mainframe');"
"ensureNav();"
"expandMenu();"
"if(!f){done();return;}"
f"f.src='{target_url}';"
"f.onload=function(){ensureNav();expandMenu();setTimeout(check,300);};"
"setTimeout(check,5000);"
"})();"
)
timestamp = get_beijing_now().strftime("%Y%m%d_%H%M%S")
user_info = database.get_user_by_id(user_id)
username_prefix = user_info["username"] if user_info else f"user{user_id}"
login_account = account.remark if account.remark else account.username
screenshot_filename = f"{username_prefix}_{login_account}_{browse_type}_{timestamp}.jpg"
screenshot_path = os.path.join(SCREENSHOTS_DIR, screenshot_filename)
cookies_for_shot = cookie_path if is_cookie_jar_fresh(cookie_path) else None
if take_screenshot_wkhtmltoimage(
index_url,
screenshot_path,
cookies_path=cookies_for_shot,
screenshot_filename, screenshot_path = _build_screenshot_output_path(username_prefix, account, browse_type)
shot_state = _take_screenshot_once(
index_url=index_url,
target_url=target_url,
screenshot_path=screenshot_path,
cookie_path=cookie_path,
proxy_server=proxy_server,
run_script=run_script,
window_status="ready",
log_callback=custom_log,
) or take_screenshot_wkhtmltoimage(
target_url,
screenshot_path,
cookies_path=cookies_for_shot,
proxy_server=proxy_server,
log_callback=custom_log,
):
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 1000:
log_to_client(f"[OK] 截图成功: {screenshot_filename}", user_id, account_id)
return {"success": True, "filename": screenshot_filename}
)
if shot_state == "success":
log_to_client(f"[OK] 截图成功: {screenshot_filename}", user_id, account_id)
return {"success": True, "filename": screenshot_filename}
if shot_state == "invalid":
log_to_client("截图文件异常,将重试", user_id, account_id)
if os.path.exists(screenshot_path):
os.remove(screenshot_path)
else:
log_to_client("截图保存失败", user_id, account_id)
@@ -361,12 +589,7 @@ def take_screenshot_for_account(
def screenshot_callback(result, error):
"""截图完成回调"""
try:
account.is_running = False
account.status = "未开始"
safe_remove_task_status(account_id)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
_finalize_screenshot_callback_state(user_id, account_id, account)
if error:
log_to_client(f"❌ 截图失败: {error}", user_id, account_id)
@@ -375,84 +598,27 @@ def take_screenshot_for_account(
log_to_client(f"❌ 截图失败: {error_msg}", user_id, account_id)
if task_start_time and browse_result:
import time as time_module
total_elapsed = int(time_module.time() - task_start_time)
database.create_task_log(
_persist_browse_log_after_screenshot(
user_id=user_id,
account_id=account_id,
username=account.username,
account=account,
browse_type=browse_type,
status="success",
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
duration=total_elapsed,
source=source,
task_start_time=task_start_time,
browse_result=browse_result,
)
try:
batch_id = _get_batch_id_from_source(source)
screenshot_path = None
if result and result.get("success") and result.get("filename"):
screenshot_path = os.path.join(SCREENSHOTS_DIR, result["filename"])
account_name = account.remark if account.remark else account.username
try:
if screenshot_path and result and result.get("success"):
cfg = database.get_system_config() or {}
if int(cfg.get("kdocs_enabled", 0) or 0) == 1:
doc_url = (cfg.get("kdocs_doc_url") or "").strip()
if doc_url:
user_cfg = database.get_user_kdocs_settings(user_id) or {}
if int(user_cfg.get("kdocs_auto_upload", 0) or 0) == 1:
unit = (
user_cfg.get("kdocs_unit") or cfg.get("kdocs_default_unit") or ""
).strip()
name = (account.remark or "").strip()
if unit and name:
from services.kdocs_uploader import get_kdocs_uploader
ok = get_kdocs_uploader().enqueue_upload(
user_id=user_id,
account_id=account_id,
unit=unit,
name=name,
image_path=screenshot_path,
)
if not ok:
log_to_client("表格上传排队失败: 队列已满", user_id, account_id)
else:
if not unit:
log_to_client("表格上传跳过: 未配置县区", user_id, account_id)
if not name:
log_to_client("表格上传跳过: 账号备注为空", user_id, account_id)
except Exception as kdocs_error:
logger.warning(f"表格上传任务提交失败: {kdocs_error}")
if batch_id:
_batch_task_record_result(
batch_id=batch_id,
account_name=account_name,
screenshot_path=screenshot_path,
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
)
elif source and source.startswith("user_scheduled"):
user_info = database.get_user_by_id(user_id)
if user_info and user_info.get("email") and database.get_user_email_notify(user_id):
email_service.send_task_complete_email_async(
user_id=user_id,
email=user_info["email"],
username=user_info["username"],
account_name=account_name,
browse_type=browse_type,
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
screenshot_path=screenshot_path,
log_callback=lambda msg: log_to_client(msg, user_id, account_id),
)
_dispatch_screenshot_result(
user_id=user_id,
account_id=account_id,
source=source,
browse_type=browse_type,
browse_result=browse_result,
result=result,
account=account,
user_info=user_info,
)
except Exception as email_error:
logger.warning(f"发送任务完成邮件失败: {email_error}")
except Exception as e: