Files
zsglpt/services/screenshots.py

641 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import os
import shutil
import subprocess
import time
from urllib.parse import urlsplit
import database
import email_service
from api_browser import APIBrowser, get_cookie_jar_path, is_cookie_jar_fresh
from app_config import get_config
from app_logger import get_logger
from app_security import sanitize_filename
from browser_pool_worker import get_browser_worker_pool
from services.client_log import log_to_client
from services.runtime import get_socketio
from services.state import safe_get_account, safe_remove_task_status, safe_update_task_status
from services.task_batches import _batch_task_record_result, _get_batch_id_from_source
from services.time_utils import get_beijing_now
logger = get_logger("app")
config = get_config()
SCREENSHOTS_DIR = config.SCREENSHOTS_DIR
os.makedirs(SCREENSHOTS_DIR, exist_ok=True)
_WKHTMLTOIMAGE_TIMEOUT_SECONDS = int(os.environ.get("WKHTMLTOIMAGE_TIMEOUT_SECONDS", "60"))
_WKHTMLTOIMAGE_JS_DELAY_MS = int(os.environ.get("WKHTMLTOIMAGE_JS_DELAY_MS", "3000"))
_WKHTMLTOIMAGE_WIDTH = int(os.environ.get("WKHTMLTOIMAGE_WIDTH", "1920"))
_WKHTMLTOIMAGE_HEIGHT = int(os.environ.get("WKHTMLTOIMAGE_HEIGHT", "1080"))
_WKHTMLTOIMAGE_QUALITY = int(os.environ.get("WKHTMLTOIMAGE_QUALITY", "95"))
_WKHTMLTOIMAGE_ZOOM = float(os.environ.get("WKHTMLTOIMAGE_ZOOM", "1.0"))
_WKHTMLTOIMAGE_FULL_PAGE = str(os.environ.get("WKHTMLTOIMAGE_FULL_PAGE", "")).strip().lower() in (
"1",
"true",
"yes",
"on",
)
_env_crop_w = os.environ.get("WKHTMLTOIMAGE_CROP_WIDTH")
_env_crop_h = os.environ.get("WKHTMLTOIMAGE_CROP_HEIGHT")
_WKHTMLTOIMAGE_CROP_WIDTH = int(_env_crop_w) if _env_crop_w is not None else _WKHTMLTOIMAGE_WIDTH
_WKHTMLTOIMAGE_CROP_HEIGHT = (
int(_env_crop_h) if _env_crop_h is not None else (_WKHTMLTOIMAGE_HEIGHT if _WKHTMLTOIMAGE_HEIGHT > 0 else 0)
)
_WKHTMLTOIMAGE_CROP_X = int(os.environ.get("WKHTMLTOIMAGE_CROP_X", "0"))
_WKHTMLTOIMAGE_CROP_Y = int(os.environ.get("WKHTMLTOIMAGE_CROP_Y", "0"))
_WKHTMLTOIMAGE_UA = os.environ.get(
"WKHTMLTOIMAGE_USER_AGENT",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
)
def _resolve_wkhtmltoimage_path() -> str | None:
return os.environ.get("WKHTMLTOIMAGE_PATH") or shutil.which("wkhtmltoimage")
def _read_cookie_pairs(cookies_path: str) -> list[tuple[str, str]]:
if not cookies_path or not os.path.exists(cookies_path):
return []
pairs = []
try:
with open(cookies_path, "r", encoding="utf-8", errors="ignore") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split("\t")
if len(parts) < 7:
continue
name = parts[5].strip()
value = parts[6].strip()
if name:
pairs.append((name, value))
except Exception:
return []
return pairs
def _select_cookie_pairs(pairs: list[tuple[str, str]]) -> list[tuple[str, str]]:
preferred_names = {"ASP.NET_SessionId", ".ASPXAUTH"}
preferred = [(name, value) for name, value in pairs if name in preferred_names and value]
if preferred:
return preferred
return [(name, value) for name, value in pairs if name and value and name.isascii() and value.isascii()]
def _ensure_login_cookies(account, proxy_config, log_callback) -> bool:
"""确保有可用的登录 cookies通过 API 登录刷新)"""
try:
with APIBrowser(log_callback=log_callback, proxy_config=proxy_config) as api_browser:
if not api_browser.login(account.username, account.password):
return False
return api_browser.save_cookies_for_screenshot(account.username)
except Exception:
return False
def take_screenshot_wkhtmltoimage(
url: str,
output_path: str,
cookies_path: str | None = None,
proxy_server: str | None = None,
run_script: str | None = None,
window_status: str | None = None,
log_callback=None,
) -> bool:
wkhtmltoimage_path = _resolve_wkhtmltoimage_path()
if not wkhtmltoimage_path:
if log_callback:
log_callback("wkhtmltoimage 未安装或不在 PATH 中")
return False
ext = os.path.splitext(output_path)[1].lower()
image_format = "jpg" if ext in (".jpg", ".jpeg") else "png"
cmd = [
wkhtmltoimage_path,
"--format",
image_format,
"--width",
str(_WKHTMLTOIMAGE_WIDTH),
"--disable-smart-width",
"--javascript-delay",
str(_WKHTMLTOIMAGE_JS_DELAY_MS),
"--load-error-handling",
"ignore",
"--enable-local-file-access",
"--encoding",
"utf-8",
]
if _WKHTMLTOIMAGE_UA:
cmd.extend(["--custom-header", "User-Agent", _WKHTMLTOIMAGE_UA, "--custom-header-propagation"])
if image_format in ("jpg", "jpeg"):
cmd.extend(["--quality", str(_WKHTMLTOIMAGE_QUALITY)])
if _WKHTMLTOIMAGE_HEIGHT > 0 and not _WKHTMLTOIMAGE_FULL_PAGE:
cmd.extend(["--height", str(_WKHTMLTOIMAGE_HEIGHT)])
if abs(_WKHTMLTOIMAGE_ZOOM - 1.0) > 1e-6:
cmd.extend(["--zoom", str(_WKHTMLTOIMAGE_ZOOM)])
if not _WKHTMLTOIMAGE_FULL_PAGE and (_WKHTMLTOIMAGE_CROP_WIDTH > 0 or _WKHTMLTOIMAGE_CROP_HEIGHT > 0):
cmd.extend(["--crop-x", str(_WKHTMLTOIMAGE_CROP_X), "--crop-y", str(_WKHTMLTOIMAGE_CROP_Y)])
if _WKHTMLTOIMAGE_CROP_WIDTH > 0:
cmd.extend(["--crop-w", str(_WKHTMLTOIMAGE_CROP_WIDTH)])
if _WKHTMLTOIMAGE_CROP_HEIGHT > 0:
cmd.extend(["--crop-h", str(_WKHTMLTOIMAGE_CROP_HEIGHT)])
if run_script:
cmd.extend(["--run-script", run_script])
if window_status:
cmd.extend(["--window-status", window_status])
if cookies_path:
cookie_pairs = _select_cookie_pairs(_read_cookie_pairs(cookies_path))
if cookie_pairs:
for name, value in cookie_pairs:
cmd.extend(["--cookie", name, value])
else:
cmd.extend(["--cookie-jar", cookies_path])
if proxy_server:
cmd.extend(["--proxy", proxy_server])
cmd.extend([url, output_path])
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=_WKHTMLTOIMAGE_TIMEOUT_SECONDS)
if result.returncode != 0:
if log_callback:
err_msg = (result.stderr or result.stdout or "").strip()
log_callback(f"wkhtmltoimage 截图失败: {err_msg[:200]}")
return False
return True
except subprocess.TimeoutExpired:
if log_callback:
log_callback("wkhtmltoimage 截图超时")
return False
except Exception as e:
if log_callback:
log_callback(f"wkhtmltoimage 截图异常: {e}")
return False
def _emit(event: str, data: object, *, room: str | None = None) -> None:
try:
socketio = get_socketio()
socketio.emit(event, data, room=room)
except Exception:
# runtime 未初始化时(如测试/离线脚本),忽略推送
pass
def _set_screenshot_running_status(user_id: int, account_id: str) -> None:
"""更新账号状态为截图中。"""
acc = safe_get_account(user_id, account_id)
if not acc:
return
acc.status = "截图中"
safe_update_task_status(account_id, {"status": "运行中", "detail_status": "正在截图"})
_emit("account_update", acc.to_dict(), room=f"user_{user_id}")
def _get_worker_display_info(browser_instance) -> tuple[str, int]:
"""获取截图 worker 的展示信息。"""
if isinstance(browser_instance, dict):
return str(browser_instance.get("worker_id", "?")), int(browser_instance.get("use_count", 0) or 0)
return "?", 0
def _get_proxy_context(account) -> tuple[dict | None, str | None]:
"""提取截图阶段代理配置。"""
proxy_config = account.proxy_config if hasattr(account, "proxy_config") else None
proxy_server = proxy_config.get("server") if proxy_config else None
return proxy_config, proxy_server
def _build_screenshot_targets(browse_type: str) -> tuple[str, str, str]:
"""构建截图目标 URL 与页面脚本。"""
parsed = urlsplit(config.ZSGL_LOGIN_URL)
base = f"{parsed.scheme}://{parsed.netloc}"
if "注册前" in str(browse_type):
bz = 0
else:
bz = 0
target_url = f"{base}/admin/center.aspx?bz={bz}"
index_url = config.ZSGL_INDEX_URL or f"{base}/admin/index.aspx"
run_script = (
"(function(){"
"function done(){window.status='ready';}"
"function ensureNav(){try{if(typeof loadMenuTree==='function'){loadMenuTree(true);}}catch(e){}}"
"function expandMenu(){"
"try{var body=document.body;if(body&&body.classList.contains('lay-mini')){body.classList.remove('lay-mini');}}catch(e){}"
"try{if(typeof mainPageResize==='function'){mainPageResize();}}catch(e){}"
"try{if(typeof toggleMainMenu==='function' && document.body && document.body.classList.contains('lay-mini')){toggleMainMenu();}}catch(e){}"
"try{var navRight=document.querySelector('.nav-right');if(navRight){navRight.style.display='block';}}catch(e){}"
"try{var mainNav=document.getElementById('main-nav');if(mainNav){mainNav.style.display='block';}}catch(e){}"
"}"
"function navReady(){"
"try{var nav=document.getElementById('sidebar-nav');return nav && nav.querySelectorAll('a').length>0;}catch(e){return false;}"
"}"
"function frameReady(){"
"try{var f=document.getElementById('mainframe');return f && f.contentDocument && f.contentDocument.readyState==='complete';}catch(e){return false;}"
"}"
"function check(){"
"if(navReady() && frameReady()){done();return;}"
"setTimeout(check,300);"
"}"
"var f=document.getElementById('mainframe');"
"ensureNav();"
"expandMenu();"
"if(!f){done();return;}"
f"f.src='{target_url}';"
"f.onload=function(){ensureNav();expandMenu();setTimeout(check,300);};"
"setTimeout(check,5000);"
"})();"
)
return index_url, target_url, run_script
def _build_screenshot_output_path(username_prefix: str, account, browse_type: str) -> tuple[str, str]:
"""构建截图输出文件名与路径。"""
timestamp = get_beijing_now().strftime("%Y%m%d_%H%M%S")
login_account = account.remark if account.remark else account.username
raw_filename = f"{username_prefix}_{login_account}_{browse_type}_{timestamp}.jpg"
screenshot_filename = sanitize_filename(raw_filename)
return screenshot_filename, os.path.join(SCREENSHOTS_DIR, screenshot_filename)
def _ensure_screenshot_login_state(
*,
account,
proxy_config,
cookie_path: str,
attempt: int,
max_retries: int,
user_id: int,
account_id: str,
custom_log,
) -> str:
"""确保截图前登录态有效。返回: ok/retry/fail。"""
should_refresh_login = not is_cookie_jar_fresh(cookie_path)
if not should_refresh_login:
return "ok"
log_to_client("正在刷新登录态...", user_id, account_id)
if _ensure_login_cookies(account, proxy_config, custom_log):
return "ok"
if attempt > 1:
log_to_client("截图登录失败", user_id, account_id)
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
return "retry"
log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
return "fail"
def _take_screenshot_once(
*,
index_url: str,
target_url: str,
screenshot_path: str,
cookie_path: str,
proxy_server: str | None,
run_script: str,
log_callback,
) -> str:
"""执行一次截图尝试并验证输出文件。返回: success/invalid/failed。"""
cookies_for_shot = cookie_path if is_cookie_jar_fresh(cookie_path) else None
attempts = [
{
"url": index_url,
"run_script": run_script,
"window_status": "ready",
},
{
"url": target_url,
"run_script": None,
"window_status": None,
},
]
ok = False
for shot in attempts:
ok = take_screenshot_wkhtmltoimage(
shot["url"],
screenshot_path,
cookies_path=cookies_for_shot,
proxy_server=proxy_server,
run_script=shot["run_script"],
window_status=shot["window_status"],
log_callback=log_callback,
)
if ok:
break
if not ok:
return "failed"
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 1000:
return "success"
if os.path.exists(screenshot_path):
os.remove(screenshot_path)
return "invalid"
def _get_result_screenshot_path(result) -> str | None:
"""从截图结果中提取截图文件绝对路径。"""
if result and result.get("success") and result.get("filename"):
return os.path.join(SCREENSHOTS_DIR, result["filename"])
return None
def _enqueue_kdocs_upload_if_needed(user_id: int, account_id: str, account, screenshot_path: str | None) -> None:
"""按配置提交金山文档上传任务。"""
if not screenshot_path:
return
cfg = database.get_system_config() or {}
if int(cfg.get("kdocs_enabled", 0) or 0) != 1:
return
doc_url = (cfg.get("kdocs_doc_url") or "").strip()
if not doc_url:
return
user_cfg = database.get_user_kdocs_settings(user_id) or {}
if int(user_cfg.get("kdocs_auto_upload", 0) or 0) != 1:
return
unit = (user_cfg.get("kdocs_unit") or cfg.get("kdocs_default_unit") or "").strip()
name = (account.remark or "").strip()
if not unit:
log_to_client("表格上传跳过: 未配置县区", user_id, account_id)
return
if not name:
log_to_client("表格上传跳过: 账号备注为空", user_id, account_id)
return
from services.kdocs_uploader import get_kdocs_uploader
ok = get_kdocs_uploader().enqueue_upload(
user_id=user_id,
account_id=account_id,
unit=unit,
name=name,
image_path=screenshot_path,
)
if not ok:
log_to_client("表格上传排队失败: 队列已满", user_id, account_id)
def _dispatch_screenshot_result(
*,
user_id: int,
account_id: str,
source: str,
browse_type: str,
browse_result: dict,
result,
account,
user_info,
) -> None:
"""将截图结果发送到批次统计/邮件通知链路。"""
batch_id = _get_batch_id_from_source(source)
screenshot_path = _get_result_screenshot_path(result)
account_name = account.remark if account.remark else account.username
try:
if result and result.get("success") and screenshot_path:
_enqueue_kdocs_upload_if_needed(user_id, account_id, account, screenshot_path)
except Exception as kdocs_error:
logger.warning(f"表格上传任务提交失败: {kdocs_error}")
if batch_id:
_batch_task_record_result(
batch_id=batch_id,
account_name=account_name,
screenshot_path=screenshot_path,
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
)
return
if source and source.startswith("user_scheduled"):
if user_info and user_info.get("email") and database.get_user_email_notify(user_id):
email_service.send_task_complete_email_async(
user_id=user_id,
email=user_info["email"],
username=user_info["username"],
account_name=account_name,
browse_type=browse_type,
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
screenshot_path=screenshot_path,
log_callback=lambda msg: log_to_client(msg, user_id, account_id),
)
def _finalize_screenshot_callback_state(user_id: int, account_id: str, account) -> None:
"""截图回调的通用收尾状态变更。"""
account.is_running = False
account.status = "未开始"
safe_remove_task_status(account_id)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
def _persist_browse_log_after_screenshot(
*,
user_id: int,
account_id: str,
account,
browse_type: str,
source: str,
task_start_time,
browse_result,
) -> None:
"""截图完成后写入任务日志(浏览完成日志)。"""
import time as time_module
total_elapsed = int(time_module.time() - task_start_time)
database.create_task_log(
user_id=user_id,
account_id=account_id,
username=account.username,
browse_type=browse_type,
status="success",
total_items=browse_result.get("total_items", 0),
total_attachments=browse_result.get("total_attachments", 0),
duration=total_elapsed,
source=source,
)
def take_screenshot_for_account(
user_id,
account_id,
browse_type="应读",
source="manual",
task_start_time=None,
browse_result=None,
):
"""为账号任务完成后截图(使用截图线程池并发执行)"""
account = safe_get_account(user_id, account_id)
if not account:
return
# 以本次调用的 browse_type 为准(避免 last_browse_type 被刷新/重载导致截图页面不一致)
if browse_type:
account.last_browse_type = browse_type
# 标记账号正在截图(防止重复提交截图任务)
account.is_running = True
user_info = database.get_user_by_id(user_id)
username_prefix = user_info["username"] if user_info else f"user{user_id}"
def screenshot_task(
browser_instance, user_id, account_id, account, browse_type, source, task_start_time, browse_result
):
"""在worker线程中执行的截图任务"""
# ✅ 获得worker后立即更新状态为"截图中"
_set_screenshot_running_status(user_id, account_id)
max_retries = 3
proxy_config, proxy_server = _get_proxy_context(account)
cookie_path = get_cookie_jar_path(account.username)
index_url, target_url, run_script = _build_screenshot_targets(browse_type)
for attempt in range(1, max_retries + 1):
try:
safe_update_task_status(
account_id,
{"detail_status": f"正在截图{f' (第{attempt}次)' if attempt > 1 else ''}"},
)
if attempt > 1:
log_to_client(f"🔄 第 {attempt} 次截图尝试...", user_id, account_id)
worker_id, use_count = _get_worker_display_info(browser_instance)
log_to_client(
f"使用Worker-{worker_id}执行截图(已执行{use_count}次)",
user_id,
account_id,
)
def custom_log(message: str):
log_to_client(message, user_id, account_id)
login_state = _ensure_screenshot_login_state(
account=account,
proxy_config=proxy_config,
cookie_path=cookie_path,
attempt=attempt,
max_retries=max_retries,
user_id=user_id,
account_id=account_id,
custom_log=custom_log,
)
if login_state == "retry":
continue
if login_state == "fail":
return {"success": False, "error": "登录失败"}
log_to_client(f"导航到 '{browse_type}' 页面...", user_id, account_id)
screenshot_filename, screenshot_path = _build_screenshot_output_path(username_prefix, account, browse_type)
shot_state = _take_screenshot_once(
index_url=index_url,
target_url=target_url,
screenshot_path=screenshot_path,
cookie_path=cookie_path,
proxy_server=proxy_server,
run_script=run_script,
log_callback=custom_log,
)
if shot_state == "success":
log_to_client(f"[OK] 截图成功: {screenshot_filename}", user_id, account_id)
return {"success": True, "filename": screenshot_filename}
if shot_state == "invalid":
log_to_client("截图文件异常,将重试", user_id, account_id)
else:
log_to_client("截图保存失败", user_id, account_id)
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
except Exception as e:
log_to_client(f"截图出错: {str(e)}", user_id, account_id)
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
return {"success": False, "error": "截图失败已重试3次"}
def screenshot_callback(result, error):
"""截图完成回调"""
try:
_finalize_screenshot_callback_state(user_id, account_id, account)
if error:
log_to_client(f"❌ 截图失败: {error}", user_id, account_id)
elif not result or not result.get("success"):
error_msg = result.get("error", "未知错误") if result else "未知错误"
log_to_client(f"❌ 截图失败: {error_msg}", user_id, account_id)
if task_start_time and browse_result:
_persist_browse_log_after_screenshot(
user_id=user_id,
account_id=account_id,
account=account,
browse_type=browse_type,
source=source,
task_start_time=task_start_time,
browse_result=browse_result,
)
try:
_dispatch_screenshot_result(
user_id=user_id,
account_id=account_id,
source=source,
browse_type=browse_type,
browse_result=browse_result,
result=result,
account=account,
user_info=user_info,
)
except Exception as email_error:
logger.warning(f"发送任务完成邮件失败: {email_error}")
except Exception as e:
logger.error(f"截图回调出错: {e}")
pool = get_browser_worker_pool()
submitted = pool.submit_task(
screenshot_task,
screenshot_callback,
user_id,
account_id,
account,
browse_type,
source,
task_start_time,
browse_result,
)
if not submitted:
screenshot_callback(None, "截图队列已满,请稍后重试")