fix: 内存溢出与任务调度优化

This commit is contained in:
2025-12-13 17:36:02 +08:00
parent 9e761140c1
commit d7d878dc08
5 changed files with 1128 additions and 480 deletions

View File

@@ -10,9 +10,11 @@ from typing import Callable, Optional, Dict, Any
import nest_asyncio
nest_asyncio.apply()
# 安全修复: 将魔法数字提取为可配置常量
BROWSER_IDLE_TIMEOUT = int(os.environ.get('BROWSER_IDLE_TIMEOUT', '300')) # 空闲超时(秒)默认5分钟
TASK_QUEUE_TIMEOUT = int(os.environ.get('TASK_QUEUE_TIMEOUT', '10')) # 队列获取超时(秒)
# 安全修复: 将魔法数字提取为可配置常量
BROWSER_IDLE_TIMEOUT = int(os.environ.get('BROWSER_IDLE_TIMEOUT', '300')) # 空闲超时(秒)默认5分钟
TASK_QUEUE_TIMEOUT = int(os.environ.get('TASK_QUEUE_TIMEOUT', '10')) # 队列获取超时(秒)
TASK_QUEUE_MAXSIZE = int(os.environ.get('BROWSER_TASK_QUEUE_MAXSIZE', '200')) # 队列最大长度(0表示无限制)
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个浏览器最大复用次数(0表示不限制)
class BrowserWorker(threading.Thread):
@@ -146,27 +148,33 @@ class BrowserWorker(threading.Thread):
self.log(f"开始执行任务(第{self.browser_instance['use_count']}次使用浏览器)")
try:
# 将浏览器实例传递给任务函数
result = task_func(self.browser_instance, *task_args, **task_kwargs)
callback(result, None)
self.log(f"任务执行成功")
last_task_time = time.time()
except Exception as e:
self.log(f"任务执行失败: {e}")
callback(None, str(e))
self.failed_tasks += 1
last_task_time = time.time()
# 任务失败后,检查浏览器健康
if not self._check_browser_health():
self.log("任务失败导致浏览器异常,将在下次任务前重建")
self._close_browser()
except Exception as e:
self.log(f"Worker出错: {e}")
time.sleep(1)
try:
# 将浏览器实例传递给任务函数
result = task_func(self.browser_instance, *task_args, **task_kwargs)
callback(result, None)
self.log(f"任务执行成功")
last_task_time = time.time()
except Exception as e:
self.log(f"任务执行失败: {e}")
callback(None, str(e))
self.failed_tasks += 1
last_task_time = time.time()
# 任务失败后,检查浏览器健康
if not self._check_browser_health():
self.log("任务失败导致浏览器异常,将在下次任务前重建")
self._close_browser()
# 定期重启浏览器释放Chromium可能累积的内存
if self.browser_instance and BROWSER_MAX_USE_COUNT > 0:
if self.browser_instance.get('use_count', 0) >= BROWSER_MAX_USE_COUNT:
self.log(f"浏览器已复用{self.browser_instance['use_count']}次,重启释放资源")
self._close_browser()
except Exception as e:
self.log(f"Worker出错: {e}")
time.sleep(1)
# 清理资源
self._close_browser()
@@ -177,16 +185,17 @@ class BrowserWorker(threading.Thread):
self.running = False
class BrowserWorkerPool:
"""浏览器工作线程池"""
def __init__(self, pool_size: int = 3, log_callback: Optional[Callable] = None):
self.pool_size = pool_size
self.log_callback = log_callback
self.task_queue = queue.Queue()
self.workers = []
self.initialized = False
self.lock = threading.Lock()
class BrowserWorkerPool:
"""浏览器工作线程池"""
def __init__(self, pool_size: int = 3, log_callback: Optional[Callable] = None):
self.pool_size = pool_size
self.log_callback = log_callback
maxsize = TASK_QUEUE_MAXSIZE if TASK_QUEUE_MAXSIZE > 0 else 0
self.task_queue = queue.Queue(maxsize=maxsize)
self.workers = []
self.initialized = False
self.lock = threading.Lock()
def log(self, message: str):
"""日志输出"""
@@ -215,7 +224,7 @@ class BrowserWorkerPool:
self.initialized = True
self.log(f"✓ 工作线程池初始化完成({self.pool_size}个worker就绪浏览器将在有任务时按需启动")
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
"""
提交任务到队列
@@ -238,8 +247,12 @@ class BrowserWorkerPool:
'callback': callback
}
self.task_queue.put(task)
return True
try:
self.task_queue.put(task, timeout=1)
return True
except queue.Full:
self.log(f"警告任务队列已满maxsize={self.task_queue.maxsize}),拒绝提交任务")
return False
def get_stats(self) -> Dict[str, Any]:
"""获取线程池统计信息"""