perf: 启动预热优化 - 解决容器重启后首批任务慢/失败
问题:容器重启后前两批任务明显变慢或失败 - 第一批:代理/目标服务器连接冷启动导致超时 - 第二批:浏览器池冷启动需要创建浏览器 解决方案: - browser_pool_worker.py: 添加 pre_warm 参数,启动时预创建1个浏览器 - api_browser.py: 添加 warmup_api_connection() 预热 TCP/TLS 连接 - api_browser.py: 首次请求使用更长超时(10s),后续恢复正常 - app.py: 启动时后台调用 API 预热 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -35,19 +35,26 @@ TASK_QUEUE_MAXSIZE = int(os.environ.get('BROWSER_TASK_QUEUE_MAXSIZE', '200')) #
|
||||
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个浏览器最大复用次数(0表示不限制)
|
||||
|
||||
|
||||
class BrowserWorker(threading.Thread):
|
||||
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
|
||||
|
||||
def __init__(self, worker_id: int, task_queue: queue.Queue, log_callback: Optional[Callable] = None):
|
||||
super().__init__(daemon=True)
|
||||
self.worker_id = worker_id
|
||||
self.task_queue = task_queue
|
||||
self.log_callback = log_callback
|
||||
self.browser_instance = None
|
||||
self.running = True
|
||||
self.idle = True
|
||||
self.total_tasks = 0
|
||||
self.failed_tasks = 0
|
||||
class BrowserWorker(threading.Thread):
|
||||
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
worker_id: int,
|
||||
task_queue: queue.Queue,
|
||||
log_callback: Optional[Callable] = None,
|
||||
pre_warm: bool = False,
|
||||
):
|
||||
super().__init__(daemon=True)
|
||||
self.worker_id = worker_id
|
||||
self.task_queue = task_queue
|
||||
self.log_callback = log_callback
|
||||
self.browser_instance = None
|
||||
self.running = True
|
||||
self.idle = True
|
||||
self.total_tasks = 0
|
||||
self.failed_tasks = 0
|
||||
self.pre_warm = pre_warm
|
||||
|
||||
def log(self, message: str):
|
||||
"""日志输出"""
|
||||
@@ -122,25 +129,39 @@ class BrowserWorker(threading.Thread):
|
||||
self._close_browser()
|
||||
return self._create_browser()
|
||||
|
||||
def run(self):
|
||||
"""工作线程主循环 - 按需启动浏览器模式"""
|
||||
self.log("Worker启动(按需模式,等待任务时不占用浏览器资源)")
|
||||
last_task_time = 0
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
# 从队列获取任务(带超时,以便能响应停止信号和空闲检查)
|
||||
self.idle = True
|
||||
try:
|
||||
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
|
||||
except queue.Empty:
|
||||
# 检查是否需要关闭空闲的浏览器
|
||||
if self.browser_instance and last_task_time > 0:
|
||||
idle_time = time.time() - last_task_time
|
||||
if idle_time > BROWSER_IDLE_TIMEOUT:
|
||||
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
|
||||
self._close_browser()
|
||||
continue
|
||||
def run(self):
|
||||
"""工作线程主循环 - 按需启动浏览器模式"""
|
||||
if self.pre_warm:
|
||||
self.log("Worker启动(预热模式,启动即创建浏览器)")
|
||||
else:
|
||||
self.log("Worker启动(按需模式,等待任务时不占用浏览器资源)")
|
||||
|
||||
last_activity_time = 0
|
||||
if self.pre_warm and not self.browser_instance:
|
||||
if self._create_browser():
|
||||
last_activity_time = time.time()
|
||||
self.pre_warm = False
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
# 允许运行中触发预热(例如池在初始化后调用 warmup)
|
||||
if self.pre_warm and not self.browser_instance:
|
||||
if self._create_browser():
|
||||
last_activity_time = time.time()
|
||||
self.pre_warm = False
|
||||
|
||||
# 从队列获取任务(带超时,以便能响应停止信号和空闲检查)
|
||||
self.idle = True
|
||||
try:
|
||||
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
|
||||
except queue.Empty:
|
||||
# 检查是否需要关闭空闲的浏览器
|
||||
if self.browser_instance and last_activity_time > 0:
|
||||
idle_time = time.time() - last_activity_time
|
||||
if idle_time > BROWSER_IDLE_TIMEOUT:
|
||||
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
|
||||
self._close_browser()
|
||||
continue
|
||||
|
||||
self.idle = False
|
||||
|
||||
@@ -171,13 +192,13 @@ class BrowserWorker(threading.Thread):
|
||||
result = task_func(self.browser_instance, *task_args, **task_kwargs)
|
||||
callback(result, None)
|
||||
self.log(f"任务执行成功")
|
||||
last_task_time = time.time()
|
||||
last_activity_time = time.time()
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"任务执行失败: {e}")
|
||||
callback(None, str(e))
|
||||
self.failed_tasks += 1
|
||||
last_task_time = time.time()
|
||||
last_activity_time = time.time()
|
||||
|
||||
# 任务失败后,检查浏览器健康
|
||||
if not self._check_browser_health():
|
||||
@@ -223,7 +244,7 @@ class BrowserWorkerPool:
|
||||
print(f"[浏览器池] {message}")
|
||||
|
||||
def initialize(self):
|
||||
"""初始化工作线程池(按需模式,启动时不创建浏览器)"""
|
||||
"""初始化工作线程池(按需模式,默认预热1个浏览器)"""
|
||||
with self.lock:
|
||||
if self.initialized:
|
||||
return
|
||||
@@ -231,18 +252,52 @@ class BrowserWorkerPool:
|
||||
_apply_nest_asyncio_once()
|
||||
|
||||
self.log(f"正在初始化工作线程池({self.pool_size}个worker,按需启动浏览器)...")
|
||||
|
||||
for i in range(self.pool_size):
|
||||
worker = BrowserWorker(
|
||||
worker_id=i + 1,
|
||||
task_queue=self.task_queue,
|
||||
log_callback=self.log_callback
|
||||
)
|
||||
worker.start()
|
||||
self.workers.append(worker)
|
||||
|
||||
self.initialized = True
|
||||
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪,浏览器将在有任务时按需启动)")
|
||||
|
||||
for i in range(self.pool_size):
|
||||
worker = BrowserWorker(
|
||||
worker_id=i + 1,
|
||||
task_queue=self.task_queue,
|
||||
log_callback=self.log_callback,
|
||||
pre_warm=(i < 1),
|
||||
)
|
||||
worker.start()
|
||||
self.workers.append(worker)
|
||||
|
||||
self.initialized = True
|
||||
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪,浏览器将在有任务时按需启动)")
|
||||
|
||||
# 初始化完成后,默认预热1个浏览器,降低容器重启后前几批任务的冷启动开销
|
||||
self.warmup(1)
|
||||
|
||||
def warmup(self, count: int = 1) -> int:
|
||||
"""预热浏览器池 - 预创建指定数量的浏览器"""
|
||||
if count <= 0:
|
||||
return 0
|
||||
|
||||
if not self.initialized:
|
||||
self.log("警告:线程池未初始化,无法预热")
|
||||
return 0
|
||||
|
||||
with self.lock:
|
||||
target_workers = list(self.workers[: min(count, len(self.workers))])
|
||||
|
||||
self.log(f"预热浏览器池(预创建{len(target_workers)}个浏览器)...")
|
||||
|
||||
for worker in target_workers:
|
||||
if not worker.browser_instance:
|
||||
worker.pre_warm = True
|
||||
|
||||
# 等待预热完成(最多等待20秒,避免阻塞过久)
|
||||
deadline = time.time() + 20
|
||||
while time.time() < deadline:
|
||||
warmed = sum(1 for w in target_workers if w.browser_instance)
|
||||
if warmed >= len(target_workers):
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
warmed = sum(1 for w in target_workers if w.browser_instance)
|
||||
self.log(f"✓ 浏览器池预热完成({warmed}个浏览器就绪)")
|
||||
return warmed
|
||||
|
||||
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user