perf: 启动预热优化 - 解决容器重启后首批任务慢/失败
问题:容器重启后前两批任务明显变慢或失败 - 第一批:代理/目标服务器连接冷启动导致超时 - 第二批:浏览器池冷启动需要创建浏览器 解决方案: - browser_pool_worker.py: 添加 pre_warm 参数,启动时预创建1个浏览器 - api_browser.py: 添加 warmup_api_connection() 预热 TCP/TLS 连接 - api_browser.py: 首次请求使用更长超时(10s),后续恢复正常 - app.py: 启动时后台调用 API 预热 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -71,6 +71,7 @@ class APIBrowser:
|
|||||||
self.log_callback = log_callback
|
self.log_callback = log_callback
|
||||||
self.stop_flag = False
|
self.stop_flag = False
|
||||||
self._closed = False # 防止重复关闭
|
self._closed = False # 防止重复关闭
|
||||||
|
self._first_request = True
|
||||||
self.last_total_records = 0
|
self.last_total_records = 0
|
||||||
|
|
||||||
# 设置代理
|
# 设置代理
|
||||||
@@ -132,7 +133,12 @@ class APIBrowser:
|
|||||||
|
|
||||||
def _request_with_retry(self, method, url, max_retries=3, retry_delay=1, **kwargs):
|
def _request_with_retry(self, method, url, max_retries=3, retry_delay=1, **kwargs):
|
||||||
"""带重试机制的请求方法"""
|
"""带重试机制的请求方法"""
|
||||||
kwargs.setdefault('timeout', _API_REQUEST_TIMEOUT_SECONDS)
|
# 首次请求使用更长超时(10秒),后续使用配置的超时
|
||||||
|
if self._first_request:
|
||||||
|
kwargs.setdefault('timeout', 10.0)
|
||||||
|
self._first_request = False
|
||||||
|
else:
|
||||||
|
kwargs.setdefault('timeout', _API_REQUEST_TIMEOUT_SECONDS)
|
||||||
last_error = None
|
last_error = None
|
||||||
|
|
||||||
for attempt in range(1, max_retries + 1):
|
for attempt in range(1, max_retries + 1):
|
||||||
@@ -518,3 +524,28 @@ class APIBrowser:
|
|||||||
"""Context manager支持 - 退出"""
|
"""Context manager支持 - 退出"""
|
||||||
self.close()
|
self.close()
|
||||||
return False # 不抑制异常
|
return False # 不抑制异常
|
||||||
|
|
||||||
|
|
||||||
|
def warmup_api_connection(proxy_config: Optional[dict] = None, log_callback: Optional[Callable] = None):
|
||||||
|
"""预热 API 连接 - 建立 TCP/TLS 连接池"""
|
||||||
|
|
||||||
|
def log(msg: str):
|
||||||
|
if log_callback:
|
||||||
|
log_callback(msg)
|
||||||
|
else:
|
||||||
|
print(f"[API预热] {msg}")
|
||||||
|
|
||||||
|
log("正在预热 API 连接...")
|
||||||
|
try:
|
||||||
|
session = requests.Session()
|
||||||
|
if proxy_config and proxy_config.get("server"):
|
||||||
|
session.proxies = {"http": proxy_config["server"], "https": proxy_config["server"]}
|
||||||
|
|
||||||
|
# 发送一个轻量级请求建立连接
|
||||||
|
resp = session.get(f"{BASE_URL}/admin/login.aspx", timeout=10, allow_redirects=False)
|
||||||
|
log(f"✓ API 连接预热完成 (status={resp.status_code})")
|
||||||
|
session.close()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
log(f"API 连接预热失败: {e}")
|
||||||
|
return False
|
||||||
|
|||||||
15
app.py
15
app.py
@@ -277,6 +277,21 @@ if __name__ == "__main__":
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"警告: 截图线程池初始化失败: {e}")
|
logger.warning(f"警告: 截图线程池初始化失败: {e}")
|
||||||
|
|
||||||
|
# 预热 API 连接(后台进行,不阻塞启动)
|
||||||
|
logger.info("预热 API 连接...")
|
||||||
|
try:
|
||||||
|
from api_browser import warmup_api_connection
|
||||||
|
import threading
|
||||||
|
|
||||||
|
threading.Thread(
|
||||||
|
target=warmup_api_connection,
|
||||||
|
kwargs={"log_callback": lambda msg: logger.info(msg)},
|
||||||
|
daemon=True,
|
||||||
|
name="api-warmup",
|
||||||
|
).start()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"API 预热失败: {e}")
|
||||||
|
|
||||||
socketio.run(
|
socketio.run(
|
||||||
app,
|
app,
|
||||||
host=config.SERVER_HOST,
|
host=config.SERVER_HOST,
|
||||||
|
|||||||
@@ -38,7 +38,13 @@ BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每
|
|||||||
class BrowserWorker(threading.Thread):
|
class BrowserWorker(threading.Thread):
|
||||||
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
|
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
|
||||||
|
|
||||||
def __init__(self, worker_id: int, task_queue: queue.Queue, log_callback: Optional[Callable] = None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
worker_id: int,
|
||||||
|
task_queue: queue.Queue,
|
||||||
|
log_callback: Optional[Callable] = None,
|
||||||
|
pre_warm: bool = False,
|
||||||
|
):
|
||||||
super().__init__(daemon=True)
|
super().__init__(daemon=True)
|
||||||
self.worker_id = worker_id
|
self.worker_id = worker_id
|
||||||
self.task_queue = task_queue
|
self.task_queue = task_queue
|
||||||
@@ -48,6 +54,7 @@ class BrowserWorker(threading.Thread):
|
|||||||
self.idle = True
|
self.idle = True
|
||||||
self.total_tasks = 0
|
self.total_tasks = 0
|
||||||
self.failed_tasks = 0
|
self.failed_tasks = 0
|
||||||
|
self.pre_warm = pre_warm
|
||||||
|
|
||||||
def log(self, message: str):
|
def log(self, message: str):
|
||||||
"""日志输出"""
|
"""日志输出"""
|
||||||
@@ -124,19 +131,33 @@ class BrowserWorker(threading.Thread):
|
|||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""工作线程主循环 - 按需启动浏览器模式"""
|
"""工作线程主循环 - 按需启动浏览器模式"""
|
||||||
self.log("Worker启动(按需模式,等待任务时不占用浏览器资源)")
|
if self.pre_warm:
|
||||||
last_task_time = 0
|
self.log("Worker启动(预热模式,启动即创建浏览器)")
|
||||||
|
else:
|
||||||
|
self.log("Worker启动(按需模式,等待任务时不占用浏览器资源)")
|
||||||
|
|
||||||
|
last_activity_time = 0
|
||||||
|
if self.pre_warm and not self.browser_instance:
|
||||||
|
if self._create_browser():
|
||||||
|
last_activity_time = time.time()
|
||||||
|
self.pre_warm = False
|
||||||
|
|
||||||
while self.running:
|
while self.running:
|
||||||
try:
|
try:
|
||||||
|
# 允许运行中触发预热(例如池在初始化后调用 warmup)
|
||||||
|
if self.pre_warm and not self.browser_instance:
|
||||||
|
if self._create_browser():
|
||||||
|
last_activity_time = time.time()
|
||||||
|
self.pre_warm = False
|
||||||
|
|
||||||
# 从队列获取任务(带超时,以便能响应停止信号和空闲检查)
|
# 从队列获取任务(带超时,以便能响应停止信号和空闲检查)
|
||||||
self.idle = True
|
self.idle = True
|
||||||
try:
|
try:
|
||||||
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
|
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
# 检查是否需要关闭空闲的浏览器
|
# 检查是否需要关闭空闲的浏览器
|
||||||
if self.browser_instance and last_task_time > 0:
|
if self.browser_instance and last_activity_time > 0:
|
||||||
idle_time = time.time() - last_task_time
|
idle_time = time.time() - last_activity_time
|
||||||
if idle_time > BROWSER_IDLE_TIMEOUT:
|
if idle_time > BROWSER_IDLE_TIMEOUT:
|
||||||
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
|
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
|
||||||
self._close_browser()
|
self._close_browser()
|
||||||
@@ -171,13 +192,13 @@ class BrowserWorker(threading.Thread):
|
|||||||
result = task_func(self.browser_instance, *task_args, **task_kwargs)
|
result = task_func(self.browser_instance, *task_args, **task_kwargs)
|
||||||
callback(result, None)
|
callback(result, None)
|
||||||
self.log(f"任务执行成功")
|
self.log(f"任务执行成功")
|
||||||
last_task_time = time.time()
|
last_activity_time = time.time()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log(f"任务执行失败: {e}")
|
self.log(f"任务执行失败: {e}")
|
||||||
callback(None, str(e))
|
callback(None, str(e))
|
||||||
self.failed_tasks += 1
|
self.failed_tasks += 1
|
||||||
last_task_time = time.time()
|
last_activity_time = time.time()
|
||||||
|
|
||||||
# 任务失败后,检查浏览器健康
|
# 任务失败后,检查浏览器健康
|
||||||
if not self._check_browser_health():
|
if not self._check_browser_health():
|
||||||
@@ -223,7 +244,7 @@ class BrowserWorkerPool:
|
|||||||
print(f"[浏览器池] {message}")
|
print(f"[浏览器池] {message}")
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
"""初始化工作线程池(按需模式,启动时不创建浏览器)"""
|
"""初始化工作线程池(按需模式,默认预热1个浏览器)"""
|
||||||
with self.lock:
|
with self.lock:
|
||||||
if self.initialized:
|
if self.initialized:
|
||||||
return
|
return
|
||||||
@@ -236,7 +257,8 @@ class BrowserWorkerPool:
|
|||||||
worker = BrowserWorker(
|
worker = BrowserWorker(
|
||||||
worker_id=i + 1,
|
worker_id=i + 1,
|
||||||
task_queue=self.task_queue,
|
task_queue=self.task_queue,
|
||||||
log_callback=self.log_callback
|
log_callback=self.log_callback,
|
||||||
|
pre_warm=(i < 1),
|
||||||
)
|
)
|
||||||
worker.start()
|
worker.start()
|
||||||
self.workers.append(worker)
|
self.workers.append(worker)
|
||||||
@@ -244,6 +266,39 @@ class BrowserWorkerPool:
|
|||||||
self.initialized = True
|
self.initialized = True
|
||||||
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪,浏览器将在有任务时按需启动)")
|
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪,浏览器将在有任务时按需启动)")
|
||||||
|
|
||||||
|
# 初始化完成后,默认预热1个浏览器,降低容器重启后前几批任务的冷启动开销
|
||||||
|
self.warmup(1)
|
||||||
|
|
||||||
|
def warmup(self, count: int = 1) -> int:
|
||||||
|
"""预热浏览器池 - 预创建指定数量的浏览器"""
|
||||||
|
if count <= 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not self.initialized:
|
||||||
|
self.log("警告:线程池未初始化,无法预热")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
with self.lock:
|
||||||
|
target_workers = list(self.workers[: min(count, len(self.workers))])
|
||||||
|
|
||||||
|
self.log(f"预热浏览器池(预创建{len(target_workers)}个浏览器)...")
|
||||||
|
|
||||||
|
for worker in target_workers:
|
||||||
|
if not worker.browser_instance:
|
||||||
|
worker.pre_warm = True
|
||||||
|
|
||||||
|
# 等待预热完成(最多等待20秒,避免阻塞过久)
|
||||||
|
deadline = time.time() + 20
|
||||||
|
while time.time() < deadline:
|
||||||
|
warmed = sum(1 for w in target_workers if w.browser_instance)
|
||||||
|
if warmed >= len(target_workers):
|
||||||
|
break
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
warmed = sum(1 for w in target_workers if w.browser_instance)
|
||||||
|
self.log(f"✓ 浏览器池预热完成({warmed}个浏览器就绪)")
|
||||||
|
return warmed
|
||||||
|
|
||||||
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
|
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
|
||||||
"""
|
"""
|
||||||
提交任务到队列
|
提交任务到队列
|
||||||
|
|||||||
Reference in New Issue
Block a user