Files
zsglpt/services/tasks.py

896 lines
30 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import os
import threading
import time
import database
import email_service
from api_browser import APIBrowser
from app_config import get_config
from app_logger import LoggerAdapter, get_logger
from services.checkpoints import get_checkpoint_mgr
from services.client_log import log_to_client
from services.proxy import get_proxy_from_api
from services.runtime import get_socketio
from services.screenshots import take_screenshot_for_account
from services.state import (
safe_get_account,
safe_remove_task,
safe_remove_task_status,
safe_set_task_status,
safe_update_task_status,
)
from services.task_batches import _batch_task_record_result, _get_batch_id_from_source
from services.task_scheduler import TaskScheduler
from task_checkpoint import TaskStage
logger = get_logger("app")
config = get_config()
# 并发默认值(启动后会由系统配置覆盖并调用 update_limits
max_concurrent_per_account = config.MAX_CONCURRENT_PER_ACCOUNT
max_concurrent_global = config.MAX_CONCURRENT_GLOBAL
_SOURCE_UNSET = object()
def _emit(event: str, data: object, *, room: str | None = None) -> None:
try:
socketio = get_socketio()
socketio.emit(event, data, room=room)
except Exception:
pass
_task_scheduler = None
_task_scheduler_lock = threading.Lock()
def get_task_scheduler() -> TaskScheduler:
"""获取全局任务调度器(单例)"""
global _task_scheduler
with _task_scheduler_lock:
if _task_scheduler is None:
try:
max_queue_size = int(os.environ.get("TASK_QUEUE_MAXSIZE", "1000"))
except Exception:
max_queue_size = 1000
_task_scheduler = TaskScheduler(
max_global=max_concurrent_global,
max_per_user=max_concurrent_per_account,
max_queue_size=max_queue_size,
run_task_fn=run_task,
)
return _task_scheduler
def submit_account_task(
user_id: int,
account_id: str,
browse_type: str,
enable_screenshot: bool = True,
source: str = "manual",
retry_count: int = 0,
done_callback=None,
):
"""统一入口:提交账号任务进入队列"""
account = safe_get_account(user_id, account_id)
if not account:
return False, "账号不存在"
if getattr(account, "is_running", False):
return False, "任务已在运行中"
try:
is_vip_user = bool(database.is_user_vip(user_id))
except Exception:
is_vip_user = False
account.is_running = True
account.should_stop = False
account.status = "排队中" + (" (VIP)" if is_vip_user else "")
safe_set_task_status(
account_id,
{
"user_id": user_id,
"username": account.username,
"status": "排队中",
"detail_status": "等待资源" + (" [VIP优先]" if is_vip_user else ""),
"browse_type": browse_type,
"start_time": time.time(),
"source": source,
"progress": {"items": 0, "attachments": 0},
"is_vip": is_vip_user,
},
)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
scheduler = get_task_scheduler()
ok, message = scheduler.submit_task(
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
enable_screenshot=enable_screenshot,
source=source,
retry_count=retry_count,
is_vip=is_vip_user,
done_callback=done_callback,
)
if not ok:
account.is_running = False
account.status = "未开始"
safe_remove_task_status(account_id)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
return False, message
log_to_client(message + (" [VIP优先]" if is_vip_user else ""), user_id, account_id)
return True, message
def _account_display_name(account) -> str:
return account.remark if account.remark else account.username
def _record_batch_result(batch_id, account, total_items: int, total_attachments: int) -> None:
if not batch_id:
return
_batch_task_record_result(
batch_id=batch_id,
account_name=_account_display_name(account),
screenshot_path=None,
total_items=total_items,
total_attachments=total_attachments,
)
def _close_account_automation(account, *, on_error=None) -> bool:
automation = getattr(account, "automation", None)
if not automation:
return False
closed = False
try:
automation.close()
closed = True
except Exception as e:
if on_error:
try:
on_error(e)
except Exception:
pass
finally:
account.automation = None
return closed
def _create_task_log(
*,
user_id: int,
account_id: str,
account,
browse_type: str,
status: str,
total_items: int,
total_attachments: int,
error_message: str,
duration: int,
source=_SOURCE_UNSET,
) -> None:
payload = {
"user_id": user_id,
"account_id": account_id,
"username": account.username,
"browse_type": browse_type,
"status": status,
"total_items": total_items,
"total_attachments": total_attachments,
"error_message": error_message,
"duration": duration,
}
if source is not _SOURCE_UNSET:
payload["source"] = source
database.create_task_log(**payload)
def _handle_stop_requested(
*,
account,
user_id: int,
account_id: str,
batch_id,
remove_task_status: bool = False,
record_batch: bool = False,
) -> bool:
if not account.should_stop:
return False
log_to_client("任务已取消", user_id, account_id)
account.status = "已停止"
account.is_running = False
if remove_task_status:
safe_remove_task_status(account_id)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
if record_batch and batch_id:
_record_batch_result(batch_id=batch_id, account=account, total_items=0, total_attachments=0)
return True
def _resolve_proxy_config(user_id: int, account_id: str, account):
proxy_config = None
system_config = database.get_system_config()
if system_config.get("proxy_enabled") != 1:
return proxy_config
proxy_api_url = system_config.get("proxy_api_url", "").strip()
if not proxy_api_url:
log_to_client("⚠ 代理已启用但未配置API地址", user_id, account_id)
return proxy_config
log_to_client("正在获取代理IP...", user_id, account_id)
proxy_server = get_proxy_from_api(proxy_api_url, max_retries=3)
if proxy_server:
proxy_config = {"server": proxy_server}
log_to_client(f"[OK] 将使用代理: {proxy_server}", user_id, account_id)
account.proxy_config = proxy_config
else:
log_to_client("✗ 代理获取失败,将不使用代理继续", user_id, account_id)
return proxy_config
def _refresh_account_remark(api_browser, account, user_id: int, account_id: str) -> None:
if account.remark:
return
try:
real_name = api_browser.get_real_name()
if not real_name:
return
account.remark = real_name
database.update_account_remark(account_id, real_name)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
logger.info(f"[自动备注] 账号 {account.username} 自动设置备注为: {real_name}")
except Exception as e:
logger.warning(f"[自动备注] 获取姓名失败: {e}")
def _handle_login_failed(
*,
account,
user_id: int,
account_id: str,
browse_type: str,
source: str,
task_start_time: float,
task_id: str,
checkpoint_mgr,
time_module,
) -> None:
error_message = "登录失败"
log_to_client(f"{error_message}", user_id, account_id)
is_suspended = database.increment_account_login_fail(account_id, error_message)
if is_suspended:
log_to_client("⚠ 该账号连续3次密码错误已自动暂停", user_id, account_id)
log_to_client("请在前台修改密码后才能继续使用", user_id, account_id)
retry_action = checkpoint_mgr.record_error(task_id, error_message)
if retry_action == "paused":
logger.warning(f"[断点] 任务 {task_id} 已暂停(登录失败)")
account.status = "登录失败"
account.is_running = False
_create_task_log(
user_id=user_id,
account_id=account_id,
account=account,
browse_type=browse_type,
status="failed",
total_items=0,
total_attachments=0,
error_message=error_message,
duration=int(time_module.time() - task_start_time),
source=source,
)
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
def _execute_single_attempt(
*,
account,
user_id: int,
account_id: str,
browse_type: str,
source: str,
checkpoint_mgr,
task_id: str,
task_start_time: float,
time_module,
):
proxy_config = _resolve_proxy_config(user_id=user_id, account_id=account_id, account=account)
checkpoint_mgr.update_stage(task_id, TaskStage.STARTING, progress_percent=10)
def custom_log(message: str):
log_to_client(message, user_id, account_id)
log_to_client("开始登录...", user_id, account_id)
safe_update_task_status(account_id, {"detail_status": "正在登录"})
checkpoint_mgr.update_stage(task_id, TaskStage.LOGGING_IN, progress_percent=25)
with APIBrowser(log_callback=custom_log, proxy_config=proxy_config) as api_browser:
if not api_browser.login(account.username, account.password):
_handle_login_failed(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
task_start_time=task_start_time,
task_id=task_id,
checkpoint_mgr=checkpoint_mgr,
time_module=time_module,
)
return "login_failed", None
log_to_client("[OK] 首次登录成功,刷新登录时间...", user_id, account_id)
if api_browser.login(account.username, account.password):
log_to_client("[OK] 二次登录成功!", user_id, account_id)
else:
log_to_client("⚠ 二次登录失败,继续使用首次登录状态", user_id, account_id)
api_browser.save_cookies_for_screenshot(account.username)
database.reset_account_login_status(account_id)
_refresh_account_remark(api_browser, account, user_id, account_id)
safe_update_task_status(account_id, {"detail_status": "正在浏览"})
log_to_client(f"开始浏览 '{browse_type}' 内容...", user_id, account_id)
account.total_items = 0
safe_update_task_status(account_id, {"progress": {"items": 0, "attachments": 0}})
def should_stop():
return account.should_stop
def on_browse_progress(progress: dict):
try:
total_items = int(progress.get("total_items") or 0)
browsed_items = int(progress.get("browsed_items") or 0)
if total_items > 0:
account.total_items = total_items
safe_update_task_status(account_id, {"progress": {"items": browsed_items, "attachments": 0}})
except Exception:
pass
checkpoint_mgr.update_stage(task_id, TaskStage.BROWSING, progress_percent=50)
result = api_browser.browse_content(
browse_type=browse_type,
should_stop_callback=should_stop,
progress_callback=on_browse_progress,
)
return "ok", result
def _record_success_without_screenshot(
*,
account,
user_id: int,
account_id: str,
browse_type: str,
source: str,
task_start_time: float,
result,
batch_id,
time_module,
) -> bool:
_create_task_log(
user_id=user_id,
account_id=account_id,
account=account,
browse_type=browse_type,
status="success",
total_items=result.total_items,
total_attachments=result.total_attachments,
error_message="",
duration=int(time_module.time() - task_start_time),
source=source,
)
if batch_id:
_record_batch_result(
batch_id=batch_id,
account=account,
total_items=result.total_items,
total_attachments=result.total_attachments,
)
return True
if source and source.startswith("user_scheduled"):
try:
user_info = database.get_user_by_id(user_id)
if user_info and user_info.get("email") and database.get_user_email_notify(user_id):
email_service.send_task_complete_email_async(
user_id=user_id,
email=user_info["email"],
username=user_info["username"],
account_name=_account_display_name(account),
browse_type=browse_type,
total_items=result.total_items,
total_attachments=result.total_attachments,
screenshot_path=None,
log_callback=lambda msg: log_to_client(msg, user_id, account_id),
)
except Exception as email_error:
logger.warning(f"发送任务完成邮件失败: {email_error}")
return False
def _is_timeout_error(error_msg: str) -> bool:
return "Timeout" in error_msg or "timeout" in error_msg
def _record_failed_task_log(
*,
account,
user_id: int,
account_id: str,
browse_type: str,
source: str,
total_items: int,
total_attachments: int,
error_message: str,
task_start_time: float,
time_module,
) -> None:
account.status = "出错"
_create_task_log(
user_id=user_id,
account_id=account_id,
account=account,
browse_type=browse_type,
status="failed",
total_items=total_items,
total_attachments=total_attachments,
error_message=error_message,
duration=int(time_module.time() - task_start_time),
source=source,
)
def _handle_failed_browse_result(
*,
account,
result,
error_msg: str,
user_id: int,
account_id: str,
browse_type: str,
source: str,
attempt: int,
max_attempts: int,
task_start_time: float,
time_module,
) -> str:
if _is_timeout_error(error_msg):
log_to_client(f"⚠ 检测到超时错误: {error_msg}", user_id, account_id)
close_ok = _close_account_automation(
account,
on_error=lambda e: logger.debug(f"关闭超时浏览器实例失败: {e}"),
)
if close_ok:
log_to_client("已关闭超时的浏览器实例", user_id, account_id)
if attempt < max_attempts:
log_to_client(f"⚠ 代理可能速度过慢将换新IP重试 ({attempt}/{max_attempts})", user_id, account_id)
time_module.sleep(2)
return "continue"
log_to_client(f"❌ 已达到最大重试次数({max_attempts}),任务失败", user_id, account_id)
_record_failed_task_log(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
total_items=result.total_items,
total_attachments=result.total_attachments,
error_message=f"重试{max_attempts}次后仍失败: {error_msg}",
task_start_time=task_start_time,
time_module=time_module,
)
return "break"
log_to_client(f"浏览出错: {error_msg}", user_id, account_id)
_record_failed_task_log(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
total_items=result.total_items,
total_attachments=result.total_attachments,
error_message=error_msg,
task_start_time=task_start_time,
time_module=time_module,
)
return "break"
def _handle_attempt_exception(
*,
account,
error_msg: str,
user_id: int,
account_id: str,
browse_type: str,
source: str,
attempt: int,
max_attempts: int,
task_start_time: float,
time_module,
) -> str:
_close_account_automation(
account,
on_error=lambda e: logger.debug(f"关闭浏览器实例失败: {e}"),
)
if _is_timeout_error(error_msg):
log_to_client(f"⚠ 执行超时: {error_msg}", user_id, account_id)
if attempt < max_attempts:
log_to_client(f"⚠ 将换新IP重试 ({attempt}/{max_attempts})", user_id, account_id)
time_module.sleep(2)
return "continue"
log_to_client(f"❌ 已达到最大重试次数({max_attempts}),任务失败", user_id, account_id)
_record_failed_task_log(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
total_items=account.total_items,
total_attachments=account.total_attachments,
error_message=f"重试{max_attempts}次后仍失败: {error_msg}",
task_start_time=task_start_time,
time_module=time_module,
)
return "break"
log_to_client(f"任务执行异常: {error_msg}", user_id, account_id)
_record_failed_task_log(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
total_items=account.total_items,
total_attachments=account.total_attachments,
error_message=error_msg,
task_start_time=task_start_time,
time_module=time_module,
)
return "break"
def _schedule_auto_retry(
*,
user_id: int,
account_id: str,
browse_type: str,
enable_screenshot: bool,
source: str,
retry_count: int,
) -> None:
def delayed_retry_submit():
fresh_account = safe_get_account(user_id, account_id)
if not fresh_account:
log_to_client("自动重试取消: 账户不存在", user_id, account_id)
return
if fresh_account.should_stop:
log_to_client("自动重试取消: 任务已被停止", user_id, account_id)
return
log_to_client(f"🔄 开始第 {retry_count + 1} 次自动重试...", user_id, account_id)
ok, msg = submit_account_task(
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
enable_screenshot=enable_screenshot,
source=source,
retry_count=retry_count + 1,
)
if not ok:
log_to_client(f"自动重试提交失败: {msg}", user_id, account_id)
try:
threading.Timer(5, delayed_retry_submit).start()
except Exception:
delayed_retry_submit()
def _finalize_task_run(
*,
account,
user_id: int,
account_id: str,
browse_type: str,
source: str,
enable_screenshot: bool,
retry_count: int,
max_auto_retry: int,
task_start_time: float,
result,
batch_id,
batch_recorded: bool,
) -> None:
final_status = str(account.status or "")
account.is_running = False
screenshot_submitted = False
_close_account_automation(
account,
on_error=lambda e: log_to_client(f"关闭主任务浏览器时出错: {str(e)}", user_id, account_id),
)
safe_remove_task(account_id)
safe_remove_task_status(account_id)
if final_status == "已完成" and not account.should_stop:
account.status = "已完成"
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
if enable_screenshot:
log_to_client("等待2秒后开始截图...", user_id, account_id)
account.status = "等待截图"
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
safe_set_task_status(
account_id,
{
"user_id": user_id,
"username": account.username,
"status": "排队中",
"detail_status": "等待截图资源",
"browse_type": browse_type,
"start_time": time.time(),
"source": source,
"progress": {
"items": result.total_items if result else 0,
"attachments": result.total_attachments if result else 0,
},
},
)
browse_result_dict = {
"total_items": result.total_items if result else 0,
"total_attachments": result.total_attachments if result else 0,
}
screenshot_submitted = True
threading.Thread(
target=take_screenshot_for_account,
args=(user_id, account_id, browse_type, source, task_start_time, browse_result_dict),
daemon=True,
).start()
else:
account.status = "未开始"
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
log_to_client("截图功能已禁用,跳过截图", user_id, account_id)
else:
if final_status == "出错" and retry_count < max_auto_retry:
log_to_client(f"⚠ 任务执行失败5秒后自动重试 ({retry_count + 1}/{max_auto_retry})...", user_id, account_id)
account.status = "等待重试"
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
_schedule_auto_retry(
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
enable_screenshot=enable_screenshot,
source=source,
retry_count=retry_count,
)
elif final_status in ["登录失败", "出错"]:
account.status = final_status
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
else:
account.status = "未开始"
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
if batch_id and (not screenshot_submitted) and (not batch_recorded) and account.status != "等待重试":
_record_batch_result(
batch_id=batch_id,
account=account,
total_items=getattr(account, "total_items", 0) or 0,
total_attachments=getattr(account, "total_attachments", 0) or 0,
)
def run_task(user_id, account_id, browse_type, enable_screenshot=True, source="manual", retry_count=0):
"""运行自动化任务
Args:
retry_count: 当前重试次数用于自动重试机制最多重试2次
"""
MAX_AUTO_RETRY = 2 # 最大自动重试次数
LoggerAdapter("app", {"user_id": user_id, "account_id": account_id, "source": source}).debug(
f"run_task enable_screenshot={enable_screenshot} ({type(enable_screenshot).__name__}), retry={retry_count}"
)
account = safe_get_account(user_id, account_id)
if not account:
return
batch_id = _get_batch_id_from_source(source)
batch_recorded = False
checkpoint_mgr = get_checkpoint_mgr()
import time as time_module
task_start_time = time_module.time()
result = None
try:
if _handle_stop_requested(
account=account,
user_id=user_id,
account_id=account_id,
batch_id=batch_id,
remove_task_status=True,
record_batch=True,
):
return
try:
if _handle_stop_requested(
account=account,
user_id=user_id,
account_id=account_id,
batch_id=batch_id,
):
return
task_id = checkpoint_mgr.create_checkpoint(
user_id=user_id, account_id=account_id, username=account.username, browse_type=browse_type
)
logger.info(f"[断点] 任务 {task_id} 已创建")
task_start_time = time_module.time()
account.status = "运行中"
_emit("account_update", account.to_dict(), room=f"user_{user_id}")
account.last_browse_type = browse_type
safe_update_task_status(
account_id, {"status": "运行中", "detail_status": "初始化", "start_time": task_start_time}
)
max_attempts = 3
for attempt in range(1, max_attempts + 1):
try:
if attempt > 1:
log_to_client(f"🔄 第 {attempt} 次尝试(共{max_attempts}次)...", user_id, account_id)
attempt_status, result = _execute_single_attempt(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
checkpoint_mgr=checkpoint_mgr,
task_id=task_id,
task_start_time=task_start_time,
time_module=time_module,
)
if attempt_status == "login_failed":
return
account.total_items = result.total_items
account.total_attachments = result.total_attachments
if result.success:
log_to_client(
f"浏览完成! 共 {result.total_items} 条内容,{result.total_attachments} 个附件",
user_id,
account_id,
)
safe_update_task_status(
account_id,
{
"detail_status": "浏览完成",
"progress": {"items": result.total_items, "attachments": result.total_attachments},
},
)
account.status = "已完成"
checkpoint_mgr.update_stage(task_id, TaskStage.COMPLETING, progress_percent=95)
checkpoint_mgr.complete_task(task_id, success=True)
logger.info(f"[断点] 任务 {task_id} 已完成")
if not enable_screenshot:
batch_recorded = _record_success_without_screenshot(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
task_start_time=task_start_time,
result=result,
batch_id=batch_id,
time_module=time_module,
)
break
error_msg = result.error_message
action = _handle_failed_browse_result(
account=account,
result=result,
error_msg=error_msg,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
attempt=attempt,
max_attempts=max_attempts,
task_start_time=task_start_time,
time_module=time_module,
)
if action == "continue":
continue
break
except Exception as retry_error:
error_msg = str(retry_error)
action = _handle_attempt_exception(
account=account,
error_msg=error_msg,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
attempt=attempt,
max_attempts=max_attempts,
task_start_time=task_start_time,
time_module=time_module,
)
if action == "continue":
continue
break
except Exception as e:
error_msg = str(e)
log_to_client(f"任务执行出错: {error_msg}", user_id, account_id)
account.status = "出错"
_create_task_log(
user_id=user_id,
account_id=account_id,
account=account,
browse_type=browse_type,
status="failed",
total_items=account.total_items,
total_attachments=account.total_attachments,
error_message=error_msg,
duration=int(time_module.time() - task_start_time),
source=source,
)
finally:
_finalize_task_run(
account=account,
user_id=user_id,
account_id=account_id,
browse_type=browse_type,
source=source,
enable_screenshot=enable_screenshot,
retry_count=retry_count,
max_auto_retry=MAX_AUTO_RETRY,
task_start_time=task_start_time,
result=result,
batch_id=batch_id,
batch_recorded=batch_recorded,
)
finally:
pass