replace screenshot pipeline and update admin
This commit is contained in:
@@ -1,42 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""浏览器池管理 - 工作线程池模式(真正的浏览器复用)"""
|
||||
"""截图线程池管理 - 工作线程池模式(并发执行截图任务)"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
from typing import Callable, Optional, Dict, Any
|
||||
import nest_asyncio
|
||||
|
||||
_NEST_ASYNCIO_APPLIED = False
|
||||
_NEST_ASYNCIO_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _apply_nest_asyncio_once() -> None:
|
||||
"""按需应用 nest_asyncio,避免 import 时产生全局副作用。"""
|
||||
global _NEST_ASYNCIO_APPLIED
|
||||
|
||||
if _NEST_ASYNCIO_APPLIED:
|
||||
return
|
||||
with _NEST_ASYNCIO_LOCK:
|
||||
if _NEST_ASYNCIO_APPLIED:
|
||||
return
|
||||
try:
|
||||
nest_asyncio.apply()
|
||||
except Exception:
|
||||
pass
|
||||
_NEST_ASYNCIO_APPLIED = True
|
||||
|
||||
# 安全修复: 将魔法数字提取为可配置常量
|
||||
BROWSER_IDLE_TIMEOUT = int(os.environ.get('BROWSER_IDLE_TIMEOUT', '300')) # 空闲超时(秒),默认5分钟
|
||||
TASK_QUEUE_TIMEOUT = int(os.environ.get('TASK_QUEUE_TIMEOUT', '10')) # 队列获取超时(秒)
|
||||
TASK_QUEUE_MAXSIZE = int(os.environ.get('BROWSER_TASK_QUEUE_MAXSIZE', '200')) # 队列最大长度(0表示无限制)
|
||||
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个浏览器最大复用次数(0表示不限制)
|
||||
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个执行环境最大复用次数(0表示不限制)
|
||||
|
||||
|
||||
class BrowserWorker(threading.Thread):
|
||||
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
|
||||
"""截图工作线程 - 每个worker维护自己的执行环境"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -62,82 +42,44 @@ class BrowserWorker(threading.Thread):
|
||||
if self.log_callback:
|
||||
self.log_callback(f"[Worker-{self.worker_id}] {message}")
|
||||
else:
|
||||
print(f"[浏览器池][Worker-{self.worker_id}] {message}")
|
||||
print(f"[截图池][Worker-{self.worker_id}] {message}")
|
||||
|
||||
def _create_browser(self):
|
||||
"""创建浏览器实例"""
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
"""创建截图执行环境(逻辑占位,无需真实浏览器)"""
|
||||
created_at = time.time()
|
||||
self.browser_instance = {
|
||||
'created_at': created_at,
|
||||
'use_count': 0,
|
||||
'worker_id': self.worker_id,
|
||||
}
|
||||
self.last_activity_ts = created_at
|
||||
self.log("截图执行环境就绪")
|
||||
return True
|
||||
|
||||
self.log("正在创建浏览器...")
|
||||
playwright = sync_playwright().start()
|
||||
browser = playwright.chromium.launch(
|
||||
headless=True,
|
||||
args=[
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
]
|
||||
)
|
||||
def _close_browser(self):
|
||||
"""关闭截图执行环境"""
|
||||
if self.browser_instance:
|
||||
self.log(f"执行环境已释放(共处理{self.browser_instance.get('use_count', 0)}个任务)")
|
||||
self.browser_instance = None
|
||||
|
||||
created_at = time.time()
|
||||
self.browser_instance = {
|
||||
'playwright': playwright,
|
||||
'browser': browser,
|
||||
'created_at': created_at,
|
||||
'use_count': 0,
|
||||
'worker_id': self.worker_id
|
||||
}
|
||||
self.last_activity_ts = created_at
|
||||
self.log(f"浏览器创建成功")
|
||||
def _check_browser_health(self) -> bool:
|
||||
"""检查执行环境是否就绪"""
|
||||
return bool(self.browser_instance)
|
||||
|
||||
def _ensure_browser(self) -> bool:
|
||||
"""确保执行环境可用"""
|
||||
if self._check_browser_health():
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"创建浏览器失败: {e}")
|
||||
return False
|
||||
|
||||
def _close_browser(self):
|
||||
"""关闭浏览器"""
|
||||
if self.browser_instance:
|
||||
try:
|
||||
self.log("正在关闭浏览器...")
|
||||
if self.browser_instance['browser']:
|
||||
self.browser_instance['browser'].close()
|
||||
if self.browser_instance['playwright']:
|
||||
self.browser_instance['playwright'].stop()
|
||||
self.log(f"浏览器已关闭(共处理{self.browser_instance['use_count']}个任务)")
|
||||
except Exception as e:
|
||||
self.log(f"关闭浏览器时出错: {e}")
|
||||
finally:
|
||||
self.browser_instance = None
|
||||
|
||||
def _check_browser_health(self) -> bool:
|
||||
"""检查浏览器是否健康"""
|
||||
if not self.browser_instance:
|
||||
return False
|
||||
|
||||
try:
|
||||
return self.browser_instance['browser'].is_connected()
|
||||
except:
|
||||
return False
|
||||
|
||||
def _ensure_browser(self) -> bool:
|
||||
"""确保浏览器可用(如果不可用则重新创建)"""
|
||||
if self._check_browser_health():
|
||||
return True
|
||||
|
||||
# 浏览器不可用,尝试重新创建
|
||||
self.log("浏览器不可用,尝试重新创建...")
|
||||
self._close_browser()
|
||||
return self._create_browser()
|
||||
self.log("执行环境不可用,尝试重新创建...")
|
||||
self._close_browser()
|
||||
return self._create_browser()
|
||||
|
||||
def run(self):
|
||||
"""工作线程主循环 - 按需启动浏览器模式"""
|
||||
"""工作线程主循环 - 按需启动执行环境模式"""
|
||||
if self.pre_warm:
|
||||
self.log("Worker启动(预热模式,启动即创建浏览器)")
|
||||
self.log("Worker启动(预热模式,启动即准备执行环境)")
|
||||
else:
|
||||
self.log("Worker启动(按需模式,等待任务时不占用浏览器资源)")
|
||||
self.log("Worker启动(按需模式,等待任务时不占用资源)")
|
||||
|
||||
if self.pre_warm and not self.browser_instance:
|
||||
self._create_browser()
|
||||
@@ -155,11 +97,11 @@ class BrowserWorker(threading.Thread):
|
||||
try:
|
||||
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
|
||||
except queue.Empty:
|
||||
# 检查是否需要关闭空闲的浏览器
|
||||
# 检查是否需要释放空闲的执行环境
|
||||
if self.browser_instance and self.last_activity_ts > 0:
|
||||
idle_time = time.time() - self.last_activity_ts
|
||||
if idle_time > BROWSER_IDLE_TIMEOUT:
|
||||
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
|
||||
self.log(f"空闲{int(idle_time)}秒,释放执行环境")
|
||||
self._close_browser()
|
||||
continue
|
||||
|
||||
@@ -169,14 +111,14 @@ class BrowserWorker(threading.Thread):
|
||||
self.log("收到停止信号")
|
||||
break
|
||||
|
||||
# 按需创建或确保浏览器可用
|
||||
# 按需创建或确保执行环境可用
|
||||
browser_ready = False
|
||||
for attempt in range(2):
|
||||
if self._ensure_browser():
|
||||
browser_ready = True
|
||||
break
|
||||
if attempt < 1:
|
||||
self.log("浏览器创建失败,重试...")
|
||||
self.log("执行环境创建失败,重试...")
|
||||
time.sleep(0.5)
|
||||
|
||||
if not browser_ready:
|
||||
@@ -185,20 +127,20 @@ class BrowserWorker(threading.Thread):
|
||||
task["retry_count"] = retry_count + 1
|
||||
try:
|
||||
self.task_queue.put(task, timeout=1)
|
||||
self.log("浏览器不可用,任务重新入队")
|
||||
self.log("执行环境不可用,任务重新入队")
|
||||
except queue.Full:
|
||||
self.log("任务队列已满,无法重新入队,任务失败")
|
||||
callback = task.get("callback")
|
||||
if callable(callback):
|
||||
callback(None, "浏览器不可用")
|
||||
callback(None, "执行环境不可用")
|
||||
self.total_tasks += 1
|
||||
self.failed_tasks += 1
|
||||
continue
|
||||
|
||||
self.log("浏览器不可用,任务失败")
|
||||
self.log("执行环境不可用,任务失败")
|
||||
callback = task.get("callback") if isinstance(task, dict) else None
|
||||
if callable(callback):
|
||||
callback(None, "浏览器不可用")
|
||||
callback(None, "执行环境不可用")
|
||||
self.total_tasks += 1
|
||||
self.failed_tasks += 1
|
||||
continue
|
||||
@@ -212,10 +154,10 @@ class BrowserWorker(threading.Thread):
|
||||
self.total_tasks += 1
|
||||
self.browser_instance['use_count'] += 1
|
||||
|
||||
self.log(f"开始执行任务(第{self.browser_instance['use_count']}次使用浏览器)")
|
||||
self.log(f"开始执行任务(第{self.browser_instance['use_count']}次执行)")
|
||||
|
||||
try:
|
||||
# 将浏览器实例传递给任务函数
|
||||
# 将执行环境实例传递给任务函数
|
||||
result = task_func(self.browser_instance, *task_args, **task_kwargs)
|
||||
callback(result, None)
|
||||
self.log(f"任务执行成功")
|
||||
@@ -227,15 +169,15 @@ class BrowserWorker(threading.Thread):
|
||||
self.failed_tasks += 1
|
||||
self.last_activity_ts = time.time()
|
||||
|
||||
# 任务失败后,检查浏览器健康
|
||||
# 任务失败后,检查执行环境健康
|
||||
if not self._check_browser_health():
|
||||
self.log("任务失败导致浏览器异常,将在下次任务前重建")
|
||||
self.log("任务失败导致执行环境异常,将在下次任务前重建")
|
||||
self._close_browser()
|
||||
|
||||
# 定期重启浏览器,释放Chromium可能累积的内存
|
||||
# 定期重启执行环境,释放可能累积的资源
|
||||
if self.browser_instance and BROWSER_MAX_USE_COUNT > 0:
|
||||
if self.browser_instance.get('use_count', 0) >= BROWSER_MAX_USE_COUNT:
|
||||
self.log(f"浏览器已复用{self.browser_instance['use_count']}次,重启释放资源")
|
||||
self.log(f"执行环境已复用{self.browser_instance['use_count']}次,重启释放资源")
|
||||
self._close_browser()
|
||||
|
||||
except Exception as e:
|
||||
@@ -252,7 +194,7 @@ class BrowserWorker(threading.Thread):
|
||||
|
||||
|
||||
class BrowserWorkerPool:
|
||||
"""浏览器工作线程池"""
|
||||
"""截图工作线程池"""
|
||||
|
||||
def __init__(self, pool_size: int = 3, log_callback: Optional[Callable] = None):
|
||||
self.pool_size = pool_size
|
||||
@@ -265,20 +207,18 @@ class BrowserWorkerPool:
|
||||
|
||||
def log(self, message: str):
|
||||
"""日志输出"""
|
||||
if self.log_callback:
|
||||
self.log_callback(message)
|
||||
else:
|
||||
print(f"[浏览器池] {message}")
|
||||
if self.log_callback:
|
||||
self.log_callback(message)
|
||||
else:
|
||||
print(f"[截图池] {message}")
|
||||
|
||||
def initialize(self):
|
||||
"""初始化工作线程池(按需模式,默认预热1个浏览器)"""
|
||||
"""初始化工作线程池(按需模式,默认预热1个执行环境)"""
|
||||
with self.lock:
|
||||
if self.initialized:
|
||||
return
|
||||
|
||||
_apply_nest_asyncio_once()
|
||||
|
||||
self.log(f"正在初始化工作线程池({self.pool_size}个worker,按需启动浏览器)...")
|
||||
self.log(f"正在初始化截图线程池({self.pool_size}个worker,按需启动执行环境)...")
|
||||
|
||||
for i in range(self.pool_size):
|
||||
worker = BrowserWorker(
|
||||
@@ -291,13 +231,13 @@ class BrowserWorkerPool:
|
||||
self.workers.append(worker)
|
||||
|
||||
self.initialized = True
|
||||
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪,浏览器将在有任务时按需启动)")
|
||||
self.log(f"✓ 截图线程池初始化完成({self.pool_size}个worker就绪,执行环境将在有任务时按需启动)")
|
||||
|
||||
# 初始化完成后,默认预热1个浏览器,降低容器重启后前几批任务的冷启动开销
|
||||
# 初始化完成后,默认预热1个执行环境,降低容器重启后前几批任务的冷启动开销
|
||||
self.warmup(1)
|
||||
|
||||
def warmup(self, count: int = 1) -> int:
|
||||
"""预热浏览器池 - 预创建指定数量的浏览器"""
|
||||
"""预热截图线程池 - 预创建指定数量的执行环境"""
|
||||
if count <= 0:
|
||||
return 0
|
||||
|
||||
@@ -308,7 +248,7 @@ class BrowserWorkerPool:
|
||||
with self.lock:
|
||||
target_workers = list(self.workers[: min(count, len(self.workers))])
|
||||
|
||||
self.log(f"预热浏览器池(预创建{len(target_workers)}个浏览器)...")
|
||||
self.log(f"预热截图线程池(预创建{len(target_workers)}个执行环境)...")
|
||||
|
||||
for worker in target_workers:
|
||||
if not worker.browser_instance:
|
||||
@@ -323,7 +263,7 @@ class BrowserWorkerPool:
|
||||
time.sleep(0.1)
|
||||
|
||||
warmed = sum(1 for w in target_workers if w.browser_instance)
|
||||
self.log(f"✓ 浏览器池预热完成({warmed}个浏览器就绪)")
|
||||
self.log(f"✓ 截图线程池预热完成({warmed}个执行环境就绪)")
|
||||
return warmed
|
||||
|
||||
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
|
||||
@@ -434,8 +374,8 @@ _global_pool: Optional[BrowserWorkerPool] = None
|
||||
_pool_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None) -> BrowserWorkerPool:
|
||||
"""获取全局浏览器工作线程池(单例)"""
|
||||
def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None) -> BrowserWorkerPool:
|
||||
"""获取全局截图工作线程池(单例)"""
|
||||
global _global_pool
|
||||
|
||||
with _pool_lock:
|
||||
@@ -446,14 +386,48 @@ def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable]
|
||||
return _global_pool
|
||||
|
||||
|
||||
def init_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None):
|
||||
"""初始化全局浏览器工作线程池"""
|
||||
get_browser_worker_pool(pool_size=pool_size, log_callback=log_callback)
|
||||
|
||||
|
||||
def shutdown_browser_worker_pool():
|
||||
"""关闭全局浏览器工作线程池"""
|
||||
global _global_pool
|
||||
def init_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None):
|
||||
"""初始化全局截图工作线程池"""
|
||||
get_browser_worker_pool(pool_size=pool_size, log_callback=log_callback)
|
||||
|
||||
|
||||
def _shutdown_pool_when_idle(pool: BrowserWorkerPool) -> None:
|
||||
try:
|
||||
pool.wait_for_completion(timeout=60)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
pool.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def resize_browser_worker_pool(pool_size: int, log_callback: Optional[Callable] = None) -> bool:
|
||||
"""调整截图线程池并发(新任务走新池,旧池空闲后自动关闭)"""
|
||||
global _global_pool
|
||||
|
||||
try:
|
||||
target_size = max(1, int(pool_size))
|
||||
except Exception:
|
||||
target_size = 1
|
||||
|
||||
with _pool_lock:
|
||||
old_pool = _global_pool
|
||||
if old_pool and int(getattr(old_pool, "pool_size", 0) or 0) == target_size:
|
||||
return False
|
||||
effective_log_callback = log_callback or (getattr(old_pool, "log_callback", None) if old_pool else None)
|
||||
_global_pool = BrowserWorkerPool(pool_size=target_size, log_callback=effective_log_callback)
|
||||
_global_pool.initialize()
|
||||
|
||||
if old_pool:
|
||||
threading.Thread(target=_shutdown_pool_when_idle, args=(old_pool,), daemon=True).start()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def shutdown_browser_worker_pool():
|
||||
"""关闭全局截图工作线程池"""
|
||||
global _global_pool
|
||||
|
||||
with _pool_lock:
|
||||
if _global_pool:
|
||||
@@ -461,9 +435,9 @@ def shutdown_browser_worker_pool():
|
||||
_global_pool = None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 测试代码
|
||||
print("测试浏览器工作线程池...")
|
||||
if __name__ == '__main__':
|
||||
# 测试代码
|
||||
print("测试截图工作线程池...")
|
||||
|
||||
def test_task(browser_instance, url: str, task_id: int):
|
||||
"""测试任务:访问URL"""
|
||||
@@ -478,8 +452,8 @@ if __name__ == '__main__':
|
||||
else:
|
||||
print(f"任务成功: {result}")
|
||||
|
||||
# 创建线程池(2个worker)
|
||||
pool = BrowserWorkerPool(pool_size=2)
|
||||
# 创建线程池(2个worker)
|
||||
pool = BrowserWorkerPool(pool_size=2)
|
||||
pool.initialize()
|
||||
|
||||
# 提交4个任务
|
||||
|
||||
Reference in New Issue
Block a user