perf: 启动预热优化 - 解决容器重启后首批任务慢/失败

问题:容器重启后前两批任务明显变慢或失败
- 第一批:代理/目标服务器连接冷启动导致超时
- 第二批:浏览器池冷启动需要创建浏览器

解决方案:
- browser_pool_worker.py: 添加 pre_warm 参数,启动时预创建1个浏览器
- api_browser.py: 添加 warmup_api_connection() 预热 TCP/TLS 连接
- api_browser.py: 首次请求使用更长超时(10s),后续恢复正常
- app.py: 启动时后台调用 API 预热

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-24 01:18:18 +08:00
parent 1d44859857
commit 151fc3e09f
3 changed files with 149 additions and 48 deletions

View File

@@ -35,19 +35,26 @@ TASK_QUEUE_MAXSIZE = int(os.environ.get('BROWSER_TASK_QUEUE_MAXSIZE', '200')) #
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个浏览器最大复用次数(0表示不限制)
class BrowserWorker(threading.Thread):
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
def __init__(self, worker_id: int, task_queue: queue.Queue, log_callback: Optional[Callable] = None):
super().__init__(daemon=True)
self.worker_id = worker_id
self.task_queue = task_queue
self.log_callback = log_callback
self.browser_instance = None
self.running = True
self.idle = True
self.total_tasks = 0
self.failed_tasks = 0
class BrowserWorker(threading.Thread):
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
def __init__(
self,
worker_id: int,
task_queue: queue.Queue,
log_callback: Optional[Callable] = None,
pre_warm: bool = False,
):
super().__init__(daemon=True)
self.worker_id = worker_id
self.task_queue = task_queue
self.log_callback = log_callback
self.browser_instance = None
self.running = True
self.idle = True
self.total_tasks = 0
self.failed_tasks = 0
self.pre_warm = pre_warm
def log(self, message: str):
"""日志输出"""
@@ -122,25 +129,39 @@ class BrowserWorker(threading.Thread):
self._close_browser()
return self._create_browser()
def run(self):
"""工作线程主循环 - 按需启动浏览器模式"""
self.log("Worker启动按需模式等待任务时不占用浏览器资源")
last_task_time = 0
while self.running:
try:
# 从队列获取任务(带超时,以便能响应停止信号和空闲检查)
self.idle = True
try:
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
except queue.Empty:
# 检查是否需要关闭空闲的浏览器
if self.browser_instance and last_task_time > 0:
idle_time = time.time() - last_task_time
if idle_time > BROWSER_IDLE_TIMEOUT:
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
self._close_browser()
continue
def run(self):
"""工作线程主循环 - 按需启动浏览器模式"""
if self.pre_warm:
self.log("Worker启动预热模式启动即创建浏览器")
else:
self.log("Worker启动按需模式等待任务时不占用浏览器资源")
last_activity_time = 0
if self.pre_warm and not self.browser_instance:
if self._create_browser():
last_activity_time = time.time()
self.pre_warm = False
while self.running:
try:
# 允许运行中触发预热(例如池在初始化后调用 warmup
if self.pre_warm and not self.browser_instance:
if self._create_browser():
last_activity_time = time.time()
self.pre_warm = False
# 从队列获取任务(带超时,以便能响应停止信号和空闲检查)
self.idle = True
try:
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
except queue.Empty:
# 检查是否需要关闭空闲的浏览器
if self.browser_instance and last_activity_time > 0:
idle_time = time.time() - last_activity_time
if idle_time > BROWSER_IDLE_TIMEOUT:
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
self._close_browser()
continue
self.idle = False
@@ -171,13 +192,13 @@ class BrowserWorker(threading.Thread):
result = task_func(self.browser_instance, *task_args, **task_kwargs)
callback(result, None)
self.log(f"任务执行成功")
last_task_time = time.time()
last_activity_time = time.time()
except Exception as e:
self.log(f"任务执行失败: {e}")
callback(None, str(e))
self.failed_tasks += 1
last_task_time = time.time()
last_activity_time = time.time()
# 任务失败后,检查浏览器健康
if not self._check_browser_health():
@@ -223,7 +244,7 @@ class BrowserWorkerPool:
print(f"[浏览器池] {message}")
def initialize(self):
"""初始化工作线程池(按需模式,启动时不创建浏览器)"""
"""初始化工作线程池(按需模式,默认预热1个浏览器)"""
with self.lock:
if self.initialized:
return
@@ -231,18 +252,52 @@ class BrowserWorkerPool:
_apply_nest_asyncio_once()
self.log(f"正在初始化工作线程池({self.pool_size}个worker按需启动浏览器...")
for i in range(self.pool_size):
worker = BrowserWorker(
worker_id=i + 1,
task_queue=self.task_queue,
log_callback=self.log_callback
)
worker.start()
self.workers.append(worker)
self.initialized = True
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪浏览器将在有任务时按需启动")
for i in range(self.pool_size):
worker = BrowserWorker(
worker_id=i + 1,
task_queue=self.task_queue,
log_callback=self.log_callback,
pre_warm=(i < 1),
)
worker.start()
self.workers.append(worker)
self.initialized = True
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪浏览器将在有任务时按需启动")
# 初始化完成后默认预热1个浏览器降低容器重启后前几批任务的冷启动开销
self.warmup(1)
def warmup(self, count: int = 1) -> int:
"""预热浏览器池 - 预创建指定数量的浏览器"""
if count <= 0:
return 0
if not self.initialized:
self.log("警告:线程池未初始化,无法预热")
return 0
with self.lock:
target_workers = list(self.workers[: min(count, len(self.workers))])
self.log(f"预热浏览器池(预创建{len(target_workers)}个浏览器)...")
for worker in target_workers:
if not worker.browser_instance:
worker.pre_warm = True
# 等待预热完成最多等待20秒避免阻塞过久
deadline = time.time() + 20
while time.time() < deadline:
warmed = sum(1 for w in target_workers if w.browser_instance)
if warmed >= len(target_workers):
break
time.sleep(0.1)
warmed = sum(1 for w in target_workers if w.browser_instance)
self.log(f"✓ 浏览器池预热完成({warmed}个浏览器就绪)")
return warmed
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
"""