replace screenshot pipeline and update admin

This commit is contained in:
2025-12-31 16:50:35 +08:00
parent 2d98ab66a3
commit 41ead4bead
25 changed files with 443 additions and 2250 deletions

View File

@@ -1,42 +1,22 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""浏览器池管理 - 工作线程池模式(真正的浏览器复用"""
"""截图线程池管理 - 工作线程池模式(并发执行截图任务"""
import os
import threading
import queue
import time
from typing import Callable, Optional, Dict, Any
import nest_asyncio
_NEST_ASYNCIO_APPLIED = False
_NEST_ASYNCIO_LOCK = threading.Lock()
def _apply_nest_asyncio_once() -> None:
"""按需应用 nest_asyncio避免 import 时产生全局副作用。"""
global _NEST_ASYNCIO_APPLIED
if _NEST_ASYNCIO_APPLIED:
return
with _NEST_ASYNCIO_LOCK:
if _NEST_ASYNCIO_APPLIED:
return
try:
nest_asyncio.apply()
except Exception:
pass
_NEST_ASYNCIO_APPLIED = True
# 安全修复: 将魔法数字提取为可配置常量
BROWSER_IDLE_TIMEOUT = int(os.environ.get('BROWSER_IDLE_TIMEOUT', '300')) # 空闲超时(秒)默认5分钟
TASK_QUEUE_TIMEOUT = int(os.environ.get('TASK_QUEUE_TIMEOUT', '10')) # 队列获取超时(秒)
TASK_QUEUE_MAXSIZE = int(os.environ.get('BROWSER_TASK_QUEUE_MAXSIZE', '200')) # 队列最大长度(0表示无限制)
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个浏览器最大复用次数(0表示不限制)
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个执行环境最大复用次数(0表示不限制)
class BrowserWorker(threading.Thread):
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
"""截图工作线程 - 每个worker维护自己的执行环境"""
def __init__(
self,
@@ -62,82 +42,44 @@ class BrowserWorker(threading.Thread):
if self.log_callback:
self.log_callback(f"[Worker-{self.worker_id}] {message}")
else:
print(f"[浏览器池][Worker-{self.worker_id}] {message}")
print(f"[截图池][Worker-{self.worker_id}] {message}")
def _create_browser(self):
"""创建浏览器实例"""
try:
from playwright.sync_api import sync_playwright
"""创建截图执行环境(逻辑占位,无需真实浏览器"""
created_at = time.time()
self.browser_instance = {
'created_at': created_at,
'use_count': 0,
'worker_id': self.worker_id,
}
self.last_activity_ts = created_at
self.log("截图执行环境就绪")
return True
self.log("正在创建浏览器...")
playwright = sync_playwright().start()
browser = playwright.chromium.launch(
headless=True,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
]
)
def _close_browser(self):
"""关闭截图执行环境"""
if self.browser_instance:
self.log(f"执行环境已释放(共处理{self.browser_instance.get('use_count', 0)}个任务)")
self.browser_instance = None
created_at = time.time()
self.browser_instance = {
'playwright': playwright,
'browser': browser,
'created_at': created_at,
'use_count': 0,
'worker_id': self.worker_id
}
self.last_activity_ts = created_at
self.log(f"浏览器创建成功")
def _check_browser_health(self) -> bool:
"""检查执行环境是否就绪"""
return bool(self.browser_instance)
def _ensure_browser(self) -> bool:
"""确保执行环境可用"""
if self._check_browser_health():
return True
except Exception as e:
self.log(f"创建浏览器失败: {e}")
return False
def _close_browser(self):
"""关闭浏览器"""
if self.browser_instance:
try:
self.log("正在关闭浏览器...")
if self.browser_instance['browser']:
self.browser_instance['browser'].close()
if self.browser_instance['playwright']:
self.browser_instance['playwright'].stop()
self.log(f"浏览器已关闭(共处理{self.browser_instance['use_count']}个任务)")
except Exception as e:
self.log(f"关闭浏览器时出错: {e}")
finally:
self.browser_instance = None
def _check_browser_health(self) -> bool:
"""检查浏览器是否健康"""
if not self.browser_instance:
return False
try:
return self.browser_instance['browser'].is_connected()
except:
return False
def _ensure_browser(self) -> bool:
"""确保浏览器可用(如果不可用则重新创建)"""
if self._check_browser_health():
return True
# 浏览器不可用,尝试重新创建
self.log("浏览器不可用,尝试重新创建...")
self._close_browser()
return self._create_browser()
self.log("执行环境不可用,尝试重新创建...")
self._close_browser()
return self._create_browser()
def run(self):
"""工作线程主循环 - 按需启动浏览器模式"""
"""工作线程主循环 - 按需启动执行环境模式"""
if self.pre_warm:
self.log("Worker启动预热模式启动即创建浏览器")
self.log("Worker启动预热模式启动即准备执行环境")
else:
self.log("Worker启动按需模式等待任务时不占用浏览器资源)")
self.log("Worker启动按需模式等待任务时不占用资源")
if self.pre_warm and not self.browser_instance:
self._create_browser()
@@ -155,11 +97,11 @@ class BrowserWorker(threading.Thread):
try:
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
except queue.Empty:
# 检查是否需要关闭空闲的浏览器
# 检查是否需要释放空闲的执行环境
if self.browser_instance and self.last_activity_ts > 0:
idle_time = time.time() - self.last_activity_ts
if idle_time > BROWSER_IDLE_TIMEOUT:
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
self.log(f"空闲{int(idle_time)}秒,释放执行环境")
self._close_browser()
continue
@@ -169,14 +111,14 @@ class BrowserWorker(threading.Thread):
self.log("收到停止信号")
break
# 按需创建或确保浏览器可用
# 按需创建或确保执行环境可用
browser_ready = False
for attempt in range(2):
if self._ensure_browser():
browser_ready = True
break
if attempt < 1:
self.log("浏览器创建失败,重试...")
self.log("执行环境创建失败,重试...")
time.sleep(0.5)
if not browser_ready:
@@ -185,20 +127,20 @@ class BrowserWorker(threading.Thread):
task["retry_count"] = retry_count + 1
try:
self.task_queue.put(task, timeout=1)
self.log("浏览器不可用,任务重新入队")
self.log("执行环境不可用,任务重新入队")
except queue.Full:
self.log("任务队列已满,无法重新入队,任务失败")
callback = task.get("callback")
if callable(callback):
callback(None, "浏览器不可用")
callback(None, "执行环境不可用")
self.total_tasks += 1
self.failed_tasks += 1
continue
self.log("浏览器不可用,任务失败")
self.log("执行环境不可用,任务失败")
callback = task.get("callback") if isinstance(task, dict) else None
if callable(callback):
callback(None, "浏览器不可用")
callback(None, "执行环境不可用")
self.total_tasks += 1
self.failed_tasks += 1
continue
@@ -212,10 +154,10 @@ class BrowserWorker(threading.Thread):
self.total_tasks += 1
self.browser_instance['use_count'] += 1
self.log(f"开始执行任务(第{self.browser_instance['use_count']}使用浏览器")
self.log(f"开始执行任务(第{self.browser_instance['use_count']}执行")
try:
# 将浏览器实例传递给任务函数
# 将执行环境实例传递给任务函数
result = task_func(self.browser_instance, *task_args, **task_kwargs)
callback(result, None)
self.log(f"任务执行成功")
@@ -227,15 +169,15 @@ class BrowserWorker(threading.Thread):
self.failed_tasks += 1
self.last_activity_ts = time.time()
# 任务失败后,检查浏览器健康
# 任务失败后,检查执行环境健康
if not self._check_browser_health():
self.log("任务失败导致浏览器异常,将在下次任务前重建")
self.log("任务失败导致执行环境异常,将在下次任务前重建")
self._close_browser()
# 定期重启浏览器释放Chromium可能累积的内存
# 定期重启执行环境,释放可能累积的资源
if self.browser_instance and BROWSER_MAX_USE_COUNT > 0:
if self.browser_instance.get('use_count', 0) >= BROWSER_MAX_USE_COUNT:
self.log(f"浏览器已复用{self.browser_instance['use_count']}次,重启释放资源")
self.log(f"执行环境已复用{self.browser_instance['use_count']}次,重启释放资源")
self._close_browser()
except Exception as e:
@@ -252,7 +194,7 @@ class BrowserWorker(threading.Thread):
class BrowserWorkerPool:
"""浏览器工作线程池"""
"""截图工作线程池"""
def __init__(self, pool_size: int = 3, log_callback: Optional[Callable] = None):
self.pool_size = pool_size
@@ -265,20 +207,18 @@ class BrowserWorkerPool:
def log(self, message: str):
"""日志输出"""
if self.log_callback:
self.log_callback(message)
else:
print(f"[浏览器池] {message}")
if self.log_callback:
self.log_callback(message)
else:
print(f"[截图池] {message}")
def initialize(self):
"""初始化工作线程池按需模式默认预热1个浏览器"""
"""初始化工作线程池按需模式默认预热1个执行环境"""
with self.lock:
if self.initialized:
return
_apply_nest_asyncio_once()
self.log(f"正在初始化工作线程池({self.pool_size}个worker按需启动浏览器...")
self.log(f"正在初始化截图线程池({self.pool_size}个worker按需启动执行环境...")
for i in range(self.pool_size):
worker = BrowserWorker(
@@ -291,13 +231,13 @@ class BrowserWorkerPool:
self.workers.append(worker)
self.initialized = True
self.log(f"工作线程池初始化完成({self.pool_size}个worker就绪浏览器将在有任务时按需启动)")
self.log(f"截图线程池初始化完成({self.pool_size}个worker就绪执行环境将在有任务时按需启动)")
# 初始化完成后默认预热1个浏览器,降低容器重启后前几批任务的冷启动开销
# 初始化完成后默认预热1个执行环境,降低容器重启后前几批任务的冷启动开销
self.warmup(1)
def warmup(self, count: int = 1) -> int:
"""预热浏览器池 - 预创建指定数量的浏览器"""
"""预热截图线程池 - 预创建指定数量的执行环境"""
if count <= 0:
return 0
@@ -308,7 +248,7 @@ class BrowserWorkerPool:
with self.lock:
target_workers = list(self.workers[: min(count, len(self.workers))])
self.log(f"预热浏览器池(预创建{len(target_workers)}浏览器...")
self.log(f"预热截图线程池(预创建{len(target_workers)}执行环境...")
for worker in target_workers:
if not worker.browser_instance:
@@ -323,7 +263,7 @@ class BrowserWorkerPool:
time.sleep(0.1)
warmed = sum(1 for w in target_workers if w.browser_instance)
self.log(f"浏览器池预热完成({warmed}浏览器就绪)")
self.log(f"截图线程池预热完成({warmed}执行环境就绪)")
return warmed
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
@@ -434,8 +374,8 @@ _global_pool: Optional[BrowserWorkerPool] = None
_pool_lock = threading.Lock()
def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None) -> BrowserWorkerPool:
"""获取全局浏览器工作线程池(单例)"""
def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None) -> BrowserWorkerPool:
"""获取全局截图工作线程池(单例)"""
global _global_pool
with _pool_lock:
@@ -446,14 +386,48 @@ def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable]
return _global_pool
def init_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None):
"""初始化全局浏览器工作线程池"""
get_browser_worker_pool(pool_size=pool_size, log_callback=log_callback)
def shutdown_browser_worker_pool():
"""关闭全局浏览器工作线程池"""
global _global_pool
def init_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None):
"""初始化全局截图工作线程池"""
get_browser_worker_pool(pool_size=pool_size, log_callback=log_callback)
def _shutdown_pool_when_idle(pool: BrowserWorkerPool) -> None:
try:
pool.wait_for_completion(timeout=60)
except Exception:
pass
try:
pool.shutdown()
except Exception:
pass
def resize_browser_worker_pool(pool_size: int, log_callback: Optional[Callable] = None) -> bool:
"""调整截图线程池并发(新任务走新池,旧池空闲后自动关闭)"""
global _global_pool
try:
target_size = max(1, int(pool_size))
except Exception:
target_size = 1
with _pool_lock:
old_pool = _global_pool
if old_pool and int(getattr(old_pool, "pool_size", 0) or 0) == target_size:
return False
effective_log_callback = log_callback or (getattr(old_pool, "log_callback", None) if old_pool else None)
_global_pool = BrowserWorkerPool(pool_size=target_size, log_callback=effective_log_callback)
_global_pool.initialize()
if old_pool:
threading.Thread(target=_shutdown_pool_when_idle, args=(old_pool,), daemon=True).start()
return True
def shutdown_browser_worker_pool():
"""关闭全局截图工作线程池"""
global _global_pool
with _pool_lock:
if _global_pool:
@@ -461,9 +435,9 @@ def shutdown_browser_worker_pool():
_global_pool = None
if __name__ == '__main__':
# 测试代码
print("测试浏览器工作线程池...")
if __name__ == '__main__':
# 测试代码
print("测试截图工作线程池...")
def test_task(browser_instance, url: str, task_id: int):
"""测试任务访问URL"""
@@ -478,8 +452,8 @@ if __name__ == '__main__':
else:
print(f"任务成功: {result}")
# 创建线程池2个worker
pool = BrowserWorkerPool(pool_size=2)
# 创建线程池2个worker
pool = BrowserWorkerPool(pool_size=2)
pool.initialize()
# 提交4个任务