replace screenshot pipeline and update admin

This commit is contained in:
2025-12-31 16:50:35 +08:00
parent 2d98ab66a3
commit 41ead4bead
25 changed files with 443 additions and 2250 deletions

View File

@@ -1,14 +1,18 @@
# 使用国内镜像源加速
FROM mcr.microsoft.com/playwright/python:v1.40.0-jammy
FROM python:3.10-slim-bullseye
# 设置工作目录
WORKDIR /app
# 设置环境变量
ENV PYTHONUNBUFFERED=1
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
ENV TZ=Asia/Shanghai
# 安装 wkhtmltopdf包含 wkhtmltoimage与中文字体
RUN apt-get update && \
apt-get install -y --no-install-recommends wkhtmltopdf fonts-noto-cjk && \
rm -rf /var/lib/apt/lists/*
# 配置 pip 使用国内镜像源
RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && pip config set install.trusted-host mirrors.aliyun.com
@@ -22,10 +26,8 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY app.py .
COPY database.py .
COPY db_pool.py .
COPY playwright_automation.py .
COPY api_browser.py .
COPY browser_pool_worker.py .
COPY browser_installer.py .
COPY password_utils.py .
COPY crypto_utils.py .
COPY task_checkpoint.py .

View File

@@ -6,10 +6,10 @@
## 项目简介
本项目是一个 **Docker 容器化应用**,使用 Flask + Playwright + SQLite 构建,提供:
本项目是一个 **Docker 容器化应用**,使用 Flask + Requests + wkhtmltopdf + SQLite 构建,提供:
- 多用户注册登录系统
- 浏览器自动化任务
- 自动化任务HTTP 模拟)
- 定时任务调度
- 截图管理
- VIP用户管理
@@ -22,7 +22,8 @@
- **后端**: Python 3.8+, Flask
- **数据库**: SQLite
- **自动化**: Playwright (Chromium)
- **自动化**: Requests + BeautifulSoup
- **截图**: wkhtmltopdf / wkhtmltoimage
- **容器化**: Docker + Docker Compose
- **前端**: HTML + JavaScript + Socket.IO
@@ -39,10 +40,8 @@ zsglpt/
├── database.py # 数据库稳定门面(对外 API
├── db/ # DB 分域实现 + schema/migrations
├── db_pool.py # 数据库连接池
├── playwright_automation.py # Playwright 自动化
├── api_browser.py # Requests 自动化(主浏览流程)
├── browser_pool_worker.py # 截图 WorkerPool(浏览器复用)
├── browser_installer.py # 浏览器安装检查
├── browser_pool_worker.py # 截图 WorkerPool
├── app_config.py # 配置管理
├── app_logger.py # 日志系统
├── app_security.py # 安全模块
@@ -122,8 +121,8 @@ cd /www/wwwroot/zsgpt2
### 步骤4: 创建必要的目录
```bash
mkdir -p data logs 截图 playwright
chmod 777 data logs 截图 playwright
mkdir -p data logs 截图
chmod 777 data logs 截图
```
### 步骤5: 构建并启动Docker容器
@@ -447,19 +446,19 @@ docker-compose down
docker-compose up -d
```
### 5. 浏览器下载失败
### 5. 截图工具未安装
**问题**: Playwright浏览器下载失败
**问题**: wkhtmltoimage 命令不存在
**解决方案**:
```bash
# 进入容器手动安装
docker exec -it knowledge-automation-multiuser bash
playwright install chromium
apt-get update
apt-get install -y wkhtmltopdf
# 或使用国内镜像
export PLAYWRIGHT_DOWNLOAD_HOST=https://npmmirror.com/mirrors/playwright/
playwright install chromium
# 验证安装
wkhtmltoimage --version
```
---
@@ -631,7 +630,12 @@ docker logs knowledge-automation-multiuser | grep "数据库"
|--------|------|--------|
| TZ | 时区 | Asia/Shanghai |
| PYTHONUNBUFFERED | Python输出缓冲 | 1 |
| PLAYWRIGHT_BROWSERS_PATH | 浏览器路径 | /ms-playwright |
| WKHTMLTOIMAGE_PATH | wkhtmltoimage 可执行文件路径 | 自动探测 |
| WKHTMLTOIMAGE_JS_DELAY_MS | JS 等待时间(毫秒) | 3000 |
| WKHTMLTOIMAGE_WIDTH | 截图宽度 | 1920 |
| WKHTMLTOIMAGE_QUALITY | JPG截图质量 | 95 |
| WKHTMLTOIMAGE_TIMEOUT_SECONDS | 截图超时时间(秒) | 60 |
| WKHTMLTOIMAGE_USER_AGENT | 截图使用的 UA | Chrome 120 |
---
@@ -641,13 +645,13 @@ docker logs knowledge-automation-multiuser | grep "数据库"
- **项目名称**: 知识管理平台自动化工具
- **版本**: Docker 多用户版
- **技术栈**: Python + Flask + Playwright + SQLite + Docker
- **技术栈**: Python + Flask + Requests + wkhtmltopdf + SQLite + Docker
### 常用文档链接
- [Docker 官方文档](https://docs.docker.com/)
- [Flask 官方文档](https://flask.palletsprojects.com/)
- [Playwright 官方文档](https://playwright.dev/python/)
- [wkhtmltopdf 官方文档](https://wkhtmltopdf.org/)
### 故障排查
@@ -683,8 +687,8 @@ ssh root@your-ip
# 3. 进入目录并创建必要目录
cd /www/wwwroot/zsgpt2
mkdir -p data logs 截图 playwright
chmod 777 data logs 截图 playwright
mkdir -p data logs 截图
chmod 777 data logs 截图
# 4. 启动容器
docker-compose up -d

View File

@@ -46,6 +46,11 @@ export async function getIpRisk(ip) {
return data
}
export async function clearIpRisk(ip) {
const { data } = await api.post('/admin/security/ip-risk/clear', { ip })
return data
}
export async function getUserRisk(userId) {
const safeUserId = encodeURIComponent(String(userId || '').trim())
const { data } = await api.get(`/admin/security/user-risk/${safeUserId}`)
@@ -56,4 +61,3 @@ export async function cleanup() {
const { data } = await api.post('/admin/security/cleanup', {})
return data
}

View File

@@ -25,6 +25,7 @@ const refreshStats = inject('refreshStats', null)
const adminStats = inject('adminStats', null)
const loading = ref(false)
const refreshing = ref(false)
const lastUpdatedAt = ref('')
const taskStats = ref(null)
@@ -181,9 +182,13 @@ const runningCountsLabel = computed(() => {
return `运行中 ${runningCount} / 排队 ${queuingCount} / 并发上限 ${maxGlobal || maxConcurrentGlobal.value || '-'}`
})
async function refreshAll() {
if (loading.value) return
loading.value = true
async function refreshAll(options = {}) {
const showLoading = options.showLoading ?? true
if (refreshing.value) return
refreshing.value = true
if (showLoading) {
loading.value = true
}
try {
const [
taskResult,
@@ -217,15 +222,22 @@ async function refreshAll() {
await refreshStats?.()
recordUpdatedAt()
} finally {
loading.value = false
refreshing.value = false
if (showLoading) {
loading.value = false
}
}
}
let refreshTimer = null
function manualRefresh() {
return refreshAll({ showLoading: true })
}
onMounted(() => {
refreshAll()
refreshTimer = setInterval(refreshAll, 1000)
refreshAll({ showLoading: false })
refreshTimer = setInterval(() => refreshAll({ showLoading: false }), 1000)
})
onUnmounted(() => {
@@ -252,7 +264,7 @@ onUnmounted(() => {
</div>
<div class="hero-actions">
<el-button type="primary" plain :loading="loading" @click="refreshAll">刷新</el-button>
<el-button type="primary" plain :loading="loading" @click="manualRefresh">刷新</el-button>
</div>
</div>
@@ -593,9 +605,9 @@ onUnmounted(() => {
<div class="panel-head">
<div class="head-left">
<div class="head-text">
<div class="panel-title">浏览器</div>
<div class="panel-title">截图线程</div>
<div class="panel-sub app-muted">
活跃浏览器{{ browserPoolActiveWorkers }} · 忙碌 {{ browserPoolBusyWorkers }} · 队列 {{ browserPoolQueueSize }}
活跃执行环境{{ browserPoolActiveWorkers }} · 忙碌 {{ browserPoolBusyWorkers }} · 队列 {{ browserPoolQueueSize }}
</div>
</div>
</div>
@@ -609,7 +621,7 @@ onUnmounted(() => {
</div>
<div class="tile">
<div class="tile-v ok">{{ browserPoolActiveWorkers }}</div>
<div class="tile-k app-muted">活跃浏览器</div>
<div class="tile-k app-muted">活跃执行环境</div>
</div>
<div class="tile">
<div class="tile-v">{{ browserPoolIdleWorkers }}</div>
@@ -645,7 +657,7 @@ onUnmounted(() => {
</el-table-column>
<el-table-column prop="browser_use_count" label="复用" width="90" />
<el-table-column prop="last_active_at" label="最近活跃" min-width="160" />
<el-table-column prop="browser_created_at" label="浏览器创建" min-width="160" />
<el-table-column prop="browser_created_at" label="环境创建" min-width="160" />
</el-table>
</div>
</el-card>

View File

@@ -6,6 +6,7 @@ import {
banIp,
banUser,
cleanup,
clearIpRisk,
getBannedIps,
getBannedUsers,
getDashboard,
@@ -381,6 +382,35 @@ async function unbanFromRisk() {
}
}
async function clearIpRiskScore() {
if (riskResultKind.value !== 'ip') return
const ipText = String(riskResult.value?.ip || '').trim()
if (!ipText) return
try {
await ElMessageBox.confirm(
`确定清除 IP ${ipText} 的风险分吗?\n\n清除风险分不会删除威胁历史也不会解除封禁。`,
'清除风险分',
{ confirmButtonText: '清除', cancelButtonText: '取消', type: 'warning' },
)
} catch {
return
}
if (riskLoading.value) return
riskLoading.value = true
try {
await clearIpRisk(ipText)
ElMessage.success('IP风险分已清零')
} catch {
// handled by interceptor
} finally {
riskLoading.value = false
}
await queryIpRisk()
}
const cleanupLoading = ref(false)
async function onCleanup() {
@@ -613,6 +643,15 @@ onMounted(async () => {
<div class="toolbar">
<el-button v-if="!riskResult.is_banned" type="primary" plain @click="openBanFromRisk">封禁</el-button>
<el-button v-else type="danger" plain @click="unbanFromRisk">解除封禁</el-button>
<el-button
v-if="riskResultKind === 'ip'"
type="warning"
plain
:loading="riskLoading"
@click="clearIpRiskScore"
>
清除风险分
</el-button>
</div>
</div>

View File

@@ -261,7 +261,7 @@ onMounted(loadAll)
<el-form-item label="截图最大并发数">
<el-input-number v-model="maxScreenshotConcurrent" :min="1" :max="50" />
<div class="help">同时进行截图的最大数量每个浏览器约占用 200MB 内存</div>
<div class="help">同时进行截图的最大数量wkhtmltoimage 资源占用较低可按需提高</div>
</el-form-item>
</el-form>

View File

@@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
"""
API 浏览器 - 用纯 HTTP 请求实现浏览功能
Playwright 快 30-60 倍
传统浏览器自动化快 30-60 倍
"""
import requests
@@ -44,6 +44,27 @@ except Exception:
_API_DIAGNOSTIC_SLOW_MS = max(0, _API_DIAGNOSTIC_SLOW_MS)
_cookie_domain_fallback = urlsplit(BASE_URL).hostname or "postoa.aidunsoft.com"
_COOKIE_JAR_MAX_AGE_SECONDS = 24 * 60 * 60
def get_cookie_jar_path(username: str) -> str:
"""获取截图用的 cookies 文件路径Netscape Cookie 格式)"""
import hashlib
os.makedirs(COOKIES_DIR, exist_ok=True)
filename = hashlib.sha256(username.encode()).hexdigest()[:32] + ".cookies.txt"
return os.path.join(COOKIES_DIR, filename)
def is_cookie_jar_fresh(cookie_path: str, max_age_seconds: int = _COOKIE_JAR_MAX_AGE_SECONDS) -> bool:
"""判断 cookies 文件是否存在且未过期"""
if not cookie_path or not os.path.exists(cookie_path):
return False
try:
file_age = time.time() - os.path.getmtime(cookie_path)
return file_age <= max(0, int(max_age_seconds or 0))
except Exception:
return False
_api_browser_instances: "weakref.WeakSet[APIBrowser]" = weakref.WeakSet()
@@ -102,37 +123,36 @@ class APIBrowser:
"""记录日志"""
if self.log_callback:
self.log_callback(message)
def save_cookies_for_playwright(self, username: str):
"""保存cookies供Playwright使用"""
import os
import json
import hashlib
os.makedirs(COOKIES_DIR, exist_ok=True)
# 安全修复使用SHA256代替MD5作为文件名哈希
filename = hashlib.sha256(username.encode()).hexdigest()[:32] + '.json'
cookies_path = os.path.join(COOKIES_DIR, filename)
def save_cookies_for_screenshot(self, username: str):
"""保存 cookies 供 wkhtmltoimage 使用Netscape Cookie 格式)"""
cookies_path = get_cookie_jar_path(username)
try:
# 获取requests session的cookies
cookies_list = []
lines = [
"# Netscape HTTP Cookie File",
"# This file was generated by zsglpt",
]
for cookie in self.session.cookies:
cookies_list.append({
'name': cookie.name,
'value': cookie.value,
'domain': cookie.domain or _cookie_domain_fallback,
'path': cookie.path or '/',
})
domain = cookie.domain or _cookie_domain_fallback
include_subdomains = "TRUE" if domain.startswith(".") else "FALSE"
path = cookie.path or "/"
secure = "TRUE" if getattr(cookie, "secure", False) else "FALSE"
expires = int(getattr(cookie, "expires", 0) or 0)
lines.append(
"\t".join(
[
domain,
include_subdomains,
path,
secure,
str(expires),
cookie.name,
cookie.value,
]
)
)
# Playwright storage_state 格式
storage_state = {
'cookies': cookies_list,
'origins': []
}
with open(cookies_path, 'w', encoding='utf-8') as f:
json.dump(storage_state, f)
with open(cookies_path, "w", encoding="utf-8") as f:
f.write("\n".join(lines) + "\n")
self.log(f"[API] Cookies已保存供截图使用")
return True

14
app.py
View File

@@ -33,7 +33,6 @@ from realtime.socketio_handlers import register_socketio_handlers
from realtime.status_push import status_push_worker
from routes import register_blueprints
from security import init_security_middleware
from services.browser_manager import init_browser_manager
from services.checkpoints import init_checkpoint_manager
from services.maintenance import start_cleanup_scheduler
from services.models import User
@@ -199,7 +198,7 @@ def cleanup_on_exit():
except Exception:
pass
logger.info("- 关闭浏览器线程池...")
logger.info("- 关闭截图线程池...")
try:
shutdown_browser_worker_pool()
except Exception:
@@ -278,15 +277,6 @@ if __name__ == "__main__":
except Exception as e:
logger.warning(f"警告: 加载并发配置失败,使用默认值: {e}")
logger.info("正在初始化浏览器管理器...")
try:
from services.browser_manager import init_browser_manager_async
logger.info("启动浏览器环境初始化(后台进行,不阻塞服务启动)...")
init_browser_manager_async()
except Exception as e:
logger.warning(f"警告: 启动浏览器初始化失败: {e}")
logger.info("启动定时任务调度器...")
threading.Thread(target=scheduled_task_worker, daemon=True, name="scheduled-task-worker").start()
logger.info("✓ 定时任务调度器已启动")
@@ -305,7 +295,7 @@ if __name__ == "__main__":
except Exception:
pool_size = 3
try:
logger.info(f"初始化截图线程池({pool_size}个worker按需启动浏览器空闲5分钟后自动关闭...")
logger.info(f"初始化截图线程池({pool_size}个worker按需启动执行环境空闲5分钟后自动释放...")
init_browser_worker_pool(pool_size=pool_size)
logger.info("✓ 截图线程池初始化完成")
except Exception as e:

View File

@@ -1,214 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
浏览器自动下载安装模块
检测本地是否有Playwright浏览器如果没有则自动下载安装
"""
import os
import sys
import shutil
import subprocess
from pathlib import Path
# 设置浏览器安装路径支持Docker和本地环境
# Docker环境: PLAYWRIGHT_BROWSERS_PATH环境变量已设置为 /ms-playwright
# 本地环境: 使用Playwright默认路径
if 'PLAYWRIGHT_BROWSERS_PATH' in os.environ:
BROWSERS_PATH = os.environ['PLAYWRIGHT_BROWSERS_PATH']
else:
# Windows: %USERPROFILE%\AppData\Local\ms-playwright
# Linux: ~/.cache/ms-playwright
if sys.platform == 'win32':
BROWSERS_PATH = str(Path.home() / "AppData" / "Local" / "ms-playwright")
else:
BROWSERS_PATH = str(Path.home() / ".cache" / "ms-playwright")
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = BROWSERS_PATH
class BrowserInstaller:
"""浏览器安装器"""
def __init__(self, log_callback=None):
"""
初始化安装器
Args:
log_callback: 日志回调函数
"""
self.log_callback = log_callback
def log(self, message):
"""输出日志"""
if self.log_callback:
self.log_callback(message)
else:
try:
print(message)
except UnicodeEncodeError:
# 如果打印Unicode字符失败替换特殊字符
safe_message = message.replace('', '[OK]').replace('', '[X]')
print(safe_message)
def check_playwright_installed(self):
"""检查Playwright是否已安装"""
try:
import playwright
self.log("✓ Playwright已安装")
return True
except ImportError:
self.log("✗ Playwright未安装")
return False
def check_chromium_installed(self):
"""检查Chromium浏览器是否已安装"""
try:
from playwright.sync_api import sync_playwright
# 尝试启动浏览器检查是否可用
with sync_playwright() as p:
try:
# 使用超时快速检查
browser = p.chromium.launch(headless=True, timeout=5000)
browser.close()
self.log("✓ Chromium浏览器已安装且可用")
return True
except Exception as e:
error_msg = str(e)
self.log(f"✗ Chromium浏览器不可用: {error_msg}")
# 检查是否是路径不存在的错误
if "Executable doesn't exist" in error_msg:
self.log("检测到浏览器文件缺失,需要重新安装")
return False
except Exception as e:
self.log(f"✗ 检查浏览器时出错: {str(e)}")
return False
def install_chromium(self):
"""安装Chromium浏览器"""
try:
self.log("正在安装 Chromium 浏览器...")
# 查找 playwright 可执行文件
playwright_cli = None
possible_paths = [
os.path.join(os.path.dirname(sys.executable), "Scripts", "playwright.exe"),
os.path.join(os.path.dirname(sys.executable), "playwright.exe"),
os.path.join(os.path.dirname(sys.executable), "Scripts", "playwright"),
os.path.join(os.path.dirname(sys.executable), "playwright"),
"playwright", # 系统PATH中
]
for path in possible_paths:
if os.path.exists(path) or shutil.which(path):
playwright_cli = path
break
# 如果找到了 playwright CLI直接调用
if playwright_cli:
self.log(f"使用 Playwright CLI: {playwright_cli}")
result = subprocess.run(
[playwright_cli, "install", "chromium"],
capture_output=True,
text=True,
timeout=300
)
else:
# 检测是否是 Nuitka 编译的程序
is_nuitka = hasattr(sys, 'frozen') or '__compiled__' in globals()
if is_nuitka:
self.log("检测到 Nuitka 编译环境")
self.log("✗ 无法找到 playwright CLI 工具")
self.log("请手动运行: playwright install chromium")
return False
else:
# 使用 python -m
result = subprocess.run(
[sys.executable, "-m", "playwright", "install", "chromium"],
capture_output=True,
text=True,
timeout=300
)
if result.returncode == 0:
self.log("✓ Chromium浏览器安装成功")
return True
else:
self.log(f"✗ 浏览器安装失败: {result.stderr}")
return False
except subprocess.TimeoutExpired:
self.log("✗ 浏览器安装超时")
return False
except Exception as e:
self.log(f"✗ 浏览器安装出错: {str(e)}")
return False
def auto_install(self):
"""
自动检测并安装所需环境
Returns:
是否成功安装或已安装
"""
self.log("=" * 60)
self.log("检查浏览器环境...")
self.log("=" * 60)
# 1. 检查Playwright是否安装
if not self.check_playwright_installed():
self.log("✗ Playwright未安装无法继续")
self.log("请确保程序包含 Playwright 库")
return False
# 2. 检查Chromium浏览器是否安装
if not self.check_chromium_installed():
self.log("\n未检测到Chromium浏览器开始自动安装...")
# 安装浏览器
if not self.install_chromium():
self.log("✗ 浏览器安装失败")
self.log("\n您可以尝试以下方法:")
self.log("1. 手动执行: playwright install chromium")
self.log("2. 检查网络连接后重试")
self.log("3. 检查防火墙设置")
return False
self.log("\n" + "=" * 60)
self.log("✓ 浏览器环境检查完成,一切就绪!")
self.log("=" * 60 + "\n")
return True
def check_and_install_browser(log_callback=None):
"""
便捷函数:检查并安装浏览器
Args:
log_callback: 日志回调函数
Returns:
是否成功
"""
installer = BrowserInstaller(log_callback)
return installer.auto_install()
# 测试代码
if __name__ == "__main__":
print("浏览器自动安装工具")
print("=" * 60)
installer = BrowserInstaller()
success = installer.auto_install()
if success:
print("\n✓ 安装成功!您现在可以运行主程序了。")
else:
print("\n✗ 安装失败,请查看上方错误信息。")
print("=" * 60)

View File

@@ -1,42 +1,22 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""浏览器池管理 - 工作线程池模式(真正的浏览器复用"""
"""截图线程池管理 - 工作线程池模式(并发执行截图任务"""
import os
import threading
import queue
import time
from typing import Callable, Optional, Dict, Any
import nest_asyncio
_NEST_ASYNCIO_APPLIED = False
_NEST_ASYNCIO_LOCK = threading.Lock()
def _apply_nest_asyncio_once() -> None:
"""按需应用 nest_asyncio避免 import 时产生全局副作用。"""
global _NEST_ASYNCIO_APPLIED
if _NEST_ASYNCIO_APPLIED:
return
with _NEST_ASYNCIO_LOCK:
if _NEST_ASYNCIO_APPLIED:
return
try:
nest_asyncio.apply()
except Exception:
pass
_NEST_ASYNCIO_APPLIED = True
# 安全修复: 将魔法数字提取为可配置常量
BROWSER_IDLE_TIMEOUT = int(os.environ.get('BROWSER_IDLE_TIMEOUT', '300')) # 空闲超时(秒)默认5分钟
TASK_QUEUE_TIMEOUT = int(os.environ.get('TASK_QUEUE_TIMEOUT', '10')) # 队列获取超时(秒)
TASK_QUEUE_MAXSIZE = int(os.environ.get('BROWSER_TASK_QUEUE_MAXSIZE', '200')) # 队列最大长度(0表示无限制)
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个浏览器最大复用次数(0表示不限制)
BROWSER_MAX_USE_COUNT = int(os.environ.get('BROWSER_MAX_USE_COUNT', '0')) # 每个执行环境最大复用次数(0表示不限制)
class BrowserWorker(threading.Thread):
"""浏览器工作线程 - 每个worker维护自己的浏览器"""
"""截图工作线程 - 每个worker维护自己的执行环境"""
def __init__(
self,
@@ -62,82 +42,44 @@ class BrowserWorker(threading.Thread):
if self.log_callback:
self.log_callback(f"[Worker-{self.worker_id}] {message}")
else:
print(f"[浏览器池][Worker-{self.worker_id}] {message}")
print(f"[截图池][Worker-{self.worker_id}] {message}")
def _create_browser(self):
"""创建浏览器实例"""
try:
from playwright.sync_api import sync_playwright
self.log("正在创建浏览器...")
playwright = sync_playwright().start()
browser = playwright.chromium.launch(
headless=True,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
]
)
created_at = time.time()
self.browser_instance = {
'playwright': playwright,
'browser': browser,
'created_at': created_at,
'use_count': 0,
'worker_id': self.worker_id
}
self.last_activity_ts = created_at
self.log(f"浏览器创建成功")
return True
except Exception as e:
self.log(f"创建浏览器失败: {e}")
return False
"""创建截图执行环境(逻辑占位,无需真实浏览器"""
created_at = time.time()
self.browser_instance = {
'created_at': created_at,
'use_count': 0,
'worker_id': self.worker_id,
}
self.last_activity_ts = created_at
self.log("截图执行环境就绪")
return True
def _close_browser(self):
"""关闭浏览器"""
"""关闭截图执行环境"""
if self.browser_instance:
try:
self.log("正在关闭浏览器...")
if self.browser_instance['browser']:
self.browser_instance['browser'].close()
if self.browser_instance['playwright']:
self.browser_instance['playwright'].stop()
self.log(f"浏览器已关闭(共处理{self.browser_instance['use_count']}个任务)")
except Exception as e:
self.log(f"关闭浏览器时出错: {e}")
finally:
self.browser_instance = None
self.log(f"执行环境已释放(共处理{self.browser_instance.get('use_count', 0)}个任务)")
self.browser_instance = None
def _check_browser_health(self) -> bool:
"""检查浏览器是否健康"""
if not self.browser_instance:
return False
try:
return self.browser_instance['browser'].is_connected()
except:
return False
"""检查执行环境是否就绪"""
return bool(self.browser_instance)
def _ensure_browser(self) -> bool:
"""确保浏览器可用(如果不可用则重新创建)"""
"""确保执行环境可用"""
if self._check_browser_health():
return True
# 浏览器不可用,尝试重新创建
self.log("浏览器不可用,尝试重新创建...")
self.log("执行环境不可用,尝试重新创建...")
self._close_browser()
return self._create_browser()
def run(self):
"""工作线程主循环 - 按需启动浏览器模式"""
"""工作线程主循环 - 按需启动执行环境模式"""
if self.pre_warm:
self.log("Worker启动预热模式启动即创建浏览器")
self.log("Worker启动预热模式启动即准备执行环境")
else:
self.log("Worker启动按需模式等待任务时不占用浏览器资源)")
self.log("Worker启动按需模式等待任务时不占用资源")
if self.pre_warm and not self.browser_instance:
self._create_browser()
@@ -155,11 +97,11 @@ class BrowserWorker(threading.Thread):
try:
task = self.task_queue.get(timeout=TASK_QUEUE_TIMEOUT)
except queue.Empty:
# 检查是否需要关闭空闲的浏览器
# 检查是否需要释放空闲的执行环境
if self.browser_instance and self.last_activity_ts > 0:
idle_time = time.time() - self.last_activity_ts
if idle_time > BROWSER_IDLE_TIMEOUT:
self.log(f"空闲{int(idle_time)}秒,关闭浏览器释放资源")
self.log(f"空闲{int(idle_time)}秒,释放执行环境")
self._close_browser()
continue
@@ -169,14 +111,14 @@ class BrowserWorker(threading.Thread):
self.log("收到停止信号")
break
# 按需创建或确保浏览器可用
# 按需创建或确保执行环境可用
browser_ready = False
for attempt in range(2):
if self._ensure_browser():
browser_ready = True
break
if attempt < 1:
self.log("浏览器创建失败,重试...")
self.log("执行环境创建失败,重试...")
time.sleep(0.5)
if not browser_ready:
@@ -185,20 +127,20 @@ class BrowserWorker(threading.Thread):
task["retry_count"] = retry_count + 1
try:
self.task_queue.put(task, timeout=1)
self.log("浏览器不可用,任务重新入队")
self.log("执行环境不可用,任务重新入队")
except queue.Full:
self.log("任务队列已满,无法重新入队,任务失败")
callback = task.get("callback")
if callable(callback):
callback(None, "浏览器不可用")
callback(None, "执行环境不可用")
self.total_tasks += 1
self.failed_tasks += 1
continue
self.log("浏览器不可用,任务失败")
self.log("执行环境不可用,任务失败")
callback = task.get("callback") if isinstance(task, dict) else None
if callable(callback):
callback(None, "浏览器不可用")
callback(None, "执行环境不可用")
self.total_tasks += 1
self.failed_tasks += 1
continue
@@ -212,10 +154,10 @@ class BrowserWorker(threading.Thread):
self.total_tasks += 1
self.browser_instance['use_count'] += 1
self.log(f"开始执行任务(第{self.browser_instance['use_count']}使用浏览器")
self.log(f"开始执行任务(第{self.browser_instance['use_count']}执行")
try:
# 将浏览器实例传递给任务函数
# 将执行环境实例传递给任务函数
result = task_func(self.browser_instance, *task_args, **task_kwargs)
callback(result, None)
self.log(f"任务执行成功")
@@ -227,15 +169,15 @@ class BrowserWorker(threading.Thread):
self.failed_tasks += 1
self.last_activity_ts = time.time()
# 任务失败后,检查浏览器健康
# 任务失败后,检查执行环境健康
if not self._check_browser_health():
self.log("任务失败导致浏览器异常,将在下次任务前重建")
self.log("任务失败导致执行环境异常,将在下次任务前重建")
self._close_browser()
# 定期重启浏览器释放Chromium可能累积的内存
# 定期重启执行环境,释放可能累积的资源
if self.browser_instance and BROWSER_MAX_USE_COUNT > 0:
if self.browser_instance.get('use_count', 0) >= BROWSER_MAX_USE_COUNT:
self.log(f"浏览器已复用{self.browser_instance['use_count']}次,重启释放资源")
self.log(f"执行环境已复用{self.browser_instance['use_count']}次,重启释放资源")
self._close_browser()
except Exception as e:
@@ -252,7 +194,7 @@ class BrowserWorker(threading.Thread):
class BrowserWorkerPool:
"""浏览器工作线程池"""
"""截图工作线程池"""
def __init__(self, pool_size: int = 3, log_callback: Optional[Callable] = None):
self.pool_size = pool_size
@@ -268,17 +210,15 @@ class BrowserWorkerPool:
if self.log_callback:
self.log_callback(message)
else:
print(f"[浏览器池] {message}")
print(f"[截图池] {message}")
def initialize(self):
"""初始化工作线程池按需模式默认预热1个浏览器"""
"""初始化工作线程池按需模式默认预热1个执行环境"""
with self.lock:
if self.initialized:
return
_apply_nest_asyncio_once()
self.log(f"正在初始化工作线程池({self.pool_size}个worker按需启动浏览器...")
self.log(f"正在初始化截图线程池({self.pool_size}个worker按需启动执行环境...")
for i in range(self.pool_size):
worker = BrowserWorker(
@@ -291,13 +231,13 @@ class BrowserWorkerPool:
self.workers.append(worker)
self.initialized = True
self.log(f"工作线程池初始化完成({self.pool_size}个worker就绪浏览器将在有任务时按需启动)")
self.log(f"截图线程池初始化完成({self.pool_size}个worker就绪执行环境将在有任务时按需启动)")
# 初始化完成后默认预热1个浏览器,降低容器重启后前几批任务的冷启动开销
# 初始化完成后默认预热1个执行环境,降低容器重启后前几批任务的冷启动开销
self.warmup(1)
def warmup(self, count: int = 1) -> int:
"""预热浏览器池 - 预创建指定数量的浏览器"""
"""预热截图线程池 - 预创建指定数量的执行环境"""
if count <= 0:
return 0
@@ -308,7 +248,7 @@ class BrowserWorkerPool:
with self.lock:
target_workers = list(self.workers[: min(count, len(self.workers))])
self.log(f"预热浏览器池(预创建{len(target_workers)}浏览器...")
self.log(f"预热截图线程池(预创建{len(target_workers)}执行环境...")
for worker in target_workers:
if not worker.browser_instance:
@@ -323,7 +263,7 @@ class BrowserWorkerPool:
time.sleep(0.1)
warmed = sum(1 for w in target_workers if w.browser_instance)
self.log(f"浏览器池预热完成({warmed}浏览器就绪)")
self.log(f"截图线程池预热完成({warmed}执行环境就绪)")
return warmed
def submit_task(self, task_func: Callable, callback: Callable, *args, **kwargs) -> bool:
@@ -435,7 +375,7 @@ _pool_lock = threading.Lock()
def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None) -> BrowserWorkerPool:
"""获取全局浏览器工作线程池(单例)"""
"""获取全局截图工作线程池(单例)"""
global _global_pool
with _pool_lock:
@@ -447,12 +387,46 @@ def get_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable]
def init_browser_worker_pool(pool_size: int = 3, log_callback: Optional[Callable] = None):
"""初始化全局浏览器工作线程池"""
"""初始化全局截图工作线程池"""
get_browser_worker_pool(pool_size=pool_size, log_callback=log_callback)
def _shutdown_pool_when_idle(pool: BrowserWorkerPool) -> None:
try:
pool.wait_for_completion(timeout=60)
except Exception:
pass
try:
pool.shutdown()
except Exception:
pass
def resize_browser_worker_pool(pool_size: int, log_callback: Optional[Callable] = None) -> bool:
"""调整截图线程池并发(新任务走新池,旧池空闲后自动关闭)"""
global _global_pool
try:
target_size = max(1, int(pool_size))
except Exception:
target_size = 1
with _pool_lock:
old_pool = _global_pool
if old_pool and int(getattr(old_pool, "pool_size", 0) or 0) == target_size:
return False
effective_log_callback = log_callback or (getattr(old_pool, "log_callback", None) if old_pool else None)
_global_pool = BrowserWorkerPool(pool_size=target_size, log_callback=effective_log_callback)
_global_pool.initialize()
if old_pool:
threading.Thread(target=_shutdown_pool_when_idle, args=(old_pool,), daemon=True).start()
return True
def shutdown_browser_worker_pool():
"""关闭全局浏览器工作线程池"""
"""关闭全局截图工作线程池"""
global _global_pool
with _pool_lock:
@@ -463,7 +437,7 @@ def shutdown_browser_worker_pool():
if __name__ == '__main__':
# 测试代码
print("测试浏览器工作线程池...")
print("测试截图工作线程池...")
def test_task(browser_instance, url: str, task_id: int):
"""测试任务访问URL"""

View File

@@ -10,7 +10,6 @@ services:
- ./data:/app/data # 数据库持久化
- ./logs:/app/logs # 日志持久化
- ./截图:/app/截图 # 截图持久化
- ./playwright:/ms-playwright # Playwright浏览器持久化避免重复下载
- /etc/localtime:/etc/localtime:ro # 时区同步
- ./static:/app/static # 静态文件(实时更新)
- ./templates:/app/templates # 模板文件(实时更新)
@@ -23,8 +22,6 @@ services:
environment:
- TZ=Asia/Shanghai
- PYTHONUNBUFFERED=1
- PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
- PLAYWRIGHT_DOWNLOAD_HOST=https://npmmirror.com/mirrors/playwright
# Flask 配置
- FLASK_ENV=production
- FLASK_DEBUG=false

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,6 @@ flask==3.0.0
flask-socketio==5.3.5
flask-login==0.6.3
python-socketio==5.10.0
playwright==1.40.0
schedule==1.2.0
psutil==5.9.6
pytz==2024.1
@@ -10,6 +9,5 @@ bcrypt==4.0.1
requests==2.31.0
python-dotenv==1.0.0
beautifulsoup4==4.12.2
nest_asyncio
cryptography>=41.0.0
Pillow>=10.0.0

View File

@@ -350,7 +350,7 @@ def get_system_stats():
@admin_api_bp.route("/browser_pool/stats", methods=["GET"])
@admin_required
def get_browser_pool_stats():
"""获取浏览器池状态"""
"""获取截图线程池状态"""
try:
from browser_pool_worker import get_browser_worker_pool
@@ -408,8 +408,8 @@ def get_browser_pool_stats():
}
)
except Exception as e:
logger.exception(f"[AdminAPI] 获取浏览器池状态失败: {e}")
return jsonify({"error": "获取浏览器池状态失败"}), 500
logger.exception(f"[AdminAPI] 获取截图线程池状态失败: {e}")
return jsonify({"error": "获取截图线程池状态失败"}), 500
@admin_api_bp.route("/docker_stats", methods=["GET"])
@@ -619,7 +619,7 @@ def update_system_config_api():
if new_max_screenshot_concurrent is not None:
if not isinstance(new_max_screenshot_concurrent, int) or new_max_screenshot_concurrent < 1:
return jsonify({"error": "截图并发数必须大于0建议根据服务器配置设置每个浏览器约占用200MB内存"}), 400
return jsonify({"error": "截图并发数必须大于0建议根据服务器配置设置wkhtmltoimage 资源占用较低"}), 400
if schedule_time is not None:
import re
@@ -672,6 +672,14 @@ def update_system_config_api():
max_global=int(new_config.get("max_concurrent_global", old_config.get("max_concurrent_global", 2))),
max_per_user=int(new_config.get("max_concurrent_per_account", old_config.get("max_concurrent_per_account", 1))),
)
if new_max_screenshot_concurrent is not None:
try:
from browser_pool_worker import resize_browser_worker_pool
if resize_browser_worker_pool(int(new_config.get("max_screenshot_concurrent", new_max_screenshot_concurrent))):
logger.info(f"截图线程池并发已更新为: {new_config.get('max_screenshot_concurrent')}")
except Exception as pool_error:
logger.warning(f"截图线程池并发更新失败: {pool_error}")
except Exception:
pass

View File

@@ -295,6 +295,21 @@ def get_ip_risk(ip):
)
@security_bp.route("/api/admin/security/ip-risk/clear", methods=["POST"])
@admin_required
def clear_ip_risk():
"""清除指定IP的风险分"""
data = _parse_json()
ip_text = str(data.get("ip") or "").strip()
if not ip_text:
return jsonify({"error": "ip不能为空"}), 400
if not scorer.reset_ip_score(ip_text):
return jsonify({"error": "清理失败"}), 400
return jsonify({"success": True, "ip": _truncate(ip_text, 64), "risk_score": 0})
@security_bp.route("/api/admin/security/user-risk/<int:user_id>", methods=["GET"])
@admin_required
def get_user_risk(user_id):
@@ -331,4 +346,3 @@ def cleanup_expired():
pool_stats = None
return jsonify({"success": True, "pool_stats": pool_stats})

View File

@@ -11,7 +11,6 @@ from crypto_utils import encrypt_password as encrypt_account_password
from flask import Blueprint, jsonify, request
from flask_login import current_user, login_required
from services.accounts_service import load_user_accounts
from services.browser_manager import init_browser_manager_async
from services.browse_types import BROWSE_TYPE_SHOULD_READ, normalize_browse_type, validate_browse_type
from services.client_log import log_to_client
from services.models import Account
@@ -230,10 +229,6 @@ def start_account(account_id):
if not browse_type:
return jsonify({"error": "浏览类型无效"}), 400
enable_screenshot = data.get("enable_screenshot", True)
if enable_screenshot:
# 异步初始化浏览器环境,避免首次下载/安装 Chromium 阻塞请求导致“网页无响应”
init_browser_manager_async()
ok, message = submit_account_task(
user_id=user_id,
account_id=account_id,
@@ -308,9 +303,6 @@ def manual_screenshot(account_id):
account.last_browse_type = browse_type
# 异步初始化浏览器环境,避免首次下载/安装 Chromium 阻塞请求
init_browser_manager_async()
threading.Thread(
target=take_screenshot_for_account,
args=(user_id, account_id, browse_type, "manual_screenshot"),
@@ -336,10 +328,6 @@ def batch_start_accounts():
if not account_ids:
return jsonify({"error": "请选择要启动的账号"}), 400
if enable_screenshot:
# 异步初始化浏览器环境,避免首次下载/安装 Chromium 阻塞请求
init_browser_manager_async()
started = []
failed = []

View File

@@ -229,6 +229,33 @@ class RiskScorer:
self._update_scores(cursor, "", user_id_int, delta, now_str)
conn.commit()
def reset_ip_score(self, ip: str) -> bool:
"""清零指定IP的风险分"""
ip_text = str(ip or "").strip()[:64]
if not ip_text:
return False
now_str = get_cst_now_str()
with db_pool.get_db() as conn:
cursor = conn.cursor()
cursor.execute("SELECT ip FROM ip_risk_scores WHERE ip = ?", (ip_text,))
row = cursor.fetchone()
if row:
cursor.execute(
"UPDATE ip_risk_scores SET risk_score = 0, last_seen = ?, updated_at = ? WHERE ip = ?",
(now_str, now_str, ip_text),
)
else:
cursor.execute(
"""
INSERT INTO ip_risk_scores (ip, risk_score, last_seen, created_at, updated_at)
VALUES (?, 0, ?, ?, ?)
""",
(ip_text, now_str, now_str, now_str),
)
conn.commit()
return True
def _update_scores(
self,
cursor,

View File

@@ -1,112 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import threading
import time
from typing import Optional
from app_logger import get_logger
from browser_installer import check_and_install_browser
from playwright_automation import PlaywrightBrowserManager
logger = get_logger("browser_manager")
_browser_manager: Optional[PlaywrightBrowserManager] = None
_lock = threading.Lock()
_cond = threading.Condition(_lock)
_init_in_progress = False
_init_error: Optional[str] = None
_init_thread: Optional[threading.Thread] = None
def get_browser_manager() -> Optional[PlaywrightBrowserManager]:
return _browser_manager
def is_browser_manager_ready() -> bool:
return _browser_manager is not None
def get_browser_manager_init_error() -> Optional[str]:
return _init_error
def init_browser_manager(*, block: bool = True, timeout: Optional[float] = None) -> bool:
global _browser_manager
global _init_in_progress, _init_error
deadline = time.monotonic() + float(timeout) if timeout is not None else None
with _cond:
if _browser_manager is not None:
return True
if _init_in_progress:
if not block:
return False
while _init_in_progress:
if deadline is None:
_cond.wait(timeout=0.5)
continue
remaining = deadline - time.monotonic()
if remaining <= 0:
break
_cond.wait(timeout=min(0.5, remaining))
return _browser_manager is not None
_init_in_progress = True
_init_error = None
ok = False
error: Optional[str] = None
manager: Optional[PlaywrightBrowserManager] = None
try:
logger.info("正在初始化Playwright浏览器管理器...")
if not check_and_install_browser(log_callback=lambda msg, account_id=None: logger.info(str(msg))):
error = "浏览器环境检查失败"
logger.error("浏览器环境检查失败!")
ok = False
else:
manager = PlaywrightBrowserManager(
headless=True,
log_callback=lambda msg, account_id=None: logger.info(str(msg)),
)
ok = True
logger.info("Playwright浏览器管理器创建成功")
except Exception as exc:
error = f"{type(exc).__name__}: {exc}"
logger.exception("初始化Playwright浏览器管理器时发生异常")
ok = False
with _cond:
if ok and manager is not None:
_browser_manager = manager
else:
_init_error = error or "初始化失败"
_init_in_progress = False
_cond.notify_all()
return ok
def init_browser_manager_async() -> None:
"""异步初始化浏览器环境,避免阻塞 Web 请求/服务启动。"""
global _init_thread
def _worker():
try:
init_browser_manager(block=True)
except Exception:
logger.exception("异步初始化浏览器管理器失败")
with _cond:
if _browser_manager is not None:
return
if _init_thread and _init_thread.is_alive():
return
if _init_in_progress:
return
_init_thread = threading.Thread(target=_worker, daemon=True, name="browser-manager-init")
_init_thread.start()

View File

@@ -3,15 +3,16 @@
from __future__ import annotations
import os
import shutil
import subprocess
import time
import database
import email_service
from api_browser import APIBrowser, get_cookie_jar_path, is_cookie_jar_fresh
from app_config import get_config
from app_logger import get_logger
from browser_pool_worker import get_browser_worker_pool
from playwright_automation import PlaywrightAutomation
from services.browser_manager import get_browser_manager
from services.client_log import log_to_client
from services.runtime import get_socketio
from services.state import safe_get_account, safe_remove_task_status, safe_update_task_status
@@ -24,6 +25,93 @@ config = get_config()
SCREENSHOTS_DIR = config.SCREENSHOTS_DIR
os.makedirs(SCREENSHOTS_DIR, exist_ok=True)
_WKHTMLTOIMAGE_TIMEOUT_SECONDS = int(os.environ.get("WKHTMLTOIMAGE_TIMEOUT_SECONDS", "60"))
_WKHTMLTOIMAGE_JS_DELAY_MS = int(os.environ.get("WKHTMLTOIMAGE_JS_DELAY_MS", "3000"))
_WKHTMLTOIMAGE_WIDTH = int(os.environ.get("WKHTMLTOIMAGE_WIDTH", "1920"))
_WKHTMLTOIMAGE_QUALITY = int(os.environ.get("WKHTMLTOIMAGE_QUALITY", "95"))
_WKHTMLTOIMAGE_UA = os.environ.get(
"WKHTMLTOIMAGE_USER_AGENT",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
)
def _resolve_wkhtmltoimage_path() -> str | None:
return os.environ.get("WKHTMLTOIMAGE_PATH") or shutil.which("wkhtmltoimage")
def _ensure_login_cookies(account, proxy_config, log_callback) -> bool:
"""确保有可用的登录 cookies通过 API 登录刷新)"""
try:
with APIBrowser(log_callback=log_callback, proxy_config=proxy_config) as api_browser:
if not api_browser.login(account.username, account.password):
return False
return api_browser.save_cookies_for_screenshot(account.username)
except Exception:
return False
def take_screenshot_wkhtmltoimage(
url: str,
output_path: str,
cookies_path: str | None = None,
proxy_server: str | None = None,
log_callback=None,
) -> bool:
wkhtmltoimage_path = _resolve_wkhtmltoimage_path()
if not wkhtmltoimage_path:
if log_callback:
log_callback("wkhtmltoimage 未安装或不在 PATH 中")
return False
ext = os.path.splitext(output_path)[1].lower()
image_format = "jpg" if ext in (".jpg", ".jpeg") else "png"
cmd = [
wkhtmltoimage_path,
"--format",
image_format,
"--width",
str(_WKHTMLTOIMAGE_WIDTH),
"--disable-smart-width",
"--javascript-delay",
str(_WKHTMLTOIMAGE_JS_DELAY_MS),
"--load-error-handling",
"ignore",
"--enable-local-file-access",
"--encoding",
"utf-8",
"--user-agent",
_WKHTMLTOIMAGE_UA,
]
if image_format in ("jpg", "jpeg"):
cmd.extend(["--quality", str(_WKHTMLTOIMAGE_QUALITY)])
if cookies_path:
cmd.extend(["--cookie-jar", cookies_path])
if proxy_server:
cmd.extend(["--proxy", proxy_server])
cmd.extend([url, output_path])
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=_WKHTMLTOIMAGE_TIMEOUT_SECONDS)
if result.returncode != 0:
if log_callback:
err_msg = (result.stderr or result.stdout or "").strip()
log_callback(f"wkhtmltoimage 截图失败: {err_msg[:200]}")
return False
return True
except subprocess.TimeoutExpired:
if log_callback:
log_callback("wkhtmltoimage 截图超时")
return False
except Exception as e:
if log_callback:
log_callback(f"wkhtmltoimage 截图异常: {e}")
return False
def _emit(event: str, data: object, *, room: str | None = None) -> None:
try:
@@ -42,7 +130,7 @@ def take_screenshot_for_account(
task_start_time=None,
browse_result=None,
):
"""为账号任务完成后截图(使用工作线程池,真正的浏览器复用"""
"""为账号任务完成后截图(使用截图线程池并发执行"""
account = safe_get_account(user_id, account_id)
if not account:
return
@@ -63,9 +151,11 @@ def take_screenshot_for_account(
_emit("account_update", acc.to_dict(), room=f"user_{user_id}")
max_retries = 3
proxy_config = account.proxy_config if hasattr(account, "proxy_config") else None
proxy_server = proxy_config.get("server") if proxy_config else None
cookie_path = get_cookie_jar_path(account.username)
for attempt in range(1, max_retries + 1):
automation = None
try:
safe_update_task_status(
account_id,
@@ -75,100 +165,39 @@ def take_screenshot_for_account(
if attempt > 1:
log_to_client(f"🔄 第 {attempt} 次截图尝试...", user_id, account_id)
worker_id = browser_instance.get("worker_id", "?") if isinstance(browser_instance, dict) else "?"
use_count = browser_instance.get("use_count", 0) if isinstance(browser_instance, dict) else 0
log_to_client(
f"使用Worker-{browser_instance['worker_id']}的浏览器(已使用{browser_instance['use_count']}次)",
f"使用Worker-{worker_id}执行截图(已执行{use_count}次)",
user_id,
account_id,
)
proxy_config = account.proxy_config if hasattr(account, "proxy_config") else None
automation = PlaywrightAutomation(get_browser_manager(), account_id, proxy_config=proxy_config)
automation.playwright = browser_instance["playwright"]
automation.browser = browser_instance["browser"]
def custom_log(message: str):
log_to_client(message, user_id, account_id)
automation.log = custom_log
log_to_client("登录中...", user_id, account_id)
login_result = automation.quick_login(account.username, account.password, account.remember)
if not login_result["success"]:
error_message = login_result.get("message", "截图登录失败")
log_to_client(f"截图登录失败: {error_message}", user_id, account_id)
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
continue
log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
return {"success": False, "error": "登录失败"}
if not is_cookie_jar_fresh(cookie_path) or attempt > 1:
log_to_client("正在刷新登录态...", user_id, account_id)
if not _ensure_login_cookies(account, proxy_config, custom_log):
log_to_client("截图登录失败", user_id, account_id)
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
continue
log_to_client("❌ 截图失败: 登录失败", user_id, account_id)
return {"success": False, "error": "登录失败"}
log_to_client(f"导航到 '{browse_type}' 页面...", user_id, account_id)
# 截图场景:优先用 bz 参数直达页面(更稳定,避免页面按钮点击失败导致截图跑偏)
navigated = False
try:
from urllib.parse import urlsplit
from urllib.parse import urlsplit
parsed = urlsplit(config.ZSGL_LOGIN_URL)
base = f"{parsed.scheme}://{parsed.netloc}"
if "注册前" in str(browse_type):
bz = 0
else:
bz = 2 # 应读
target_url = f"{base}/admin/center.aspx?bz={bz}"
# 目标:保留外层框架(左侧菜单/顶部栏),仅在 mainframe 内部导航到目标内容页
iframe = None
try:
iframe = automation.get_iframe_safe(retry=True, max_retries=5)
except Exception:
iframe = None
if iframe:
iframe.goto(target_url, timeout=60000)
current_url = getattr(iframe, "url", "") or ""
if "center.aspx" not in current_url:
raise RuntimeError(f"unexpected_iframe_url:{current_url}")
try:
iframe.wait_for_load_state("networkidle", timeout=10000)
except Exception:
pass
try:
iframe.wait_for_selector("table.ltable", timeout=5000)
except Exception:
pass
else:
# 兜底:若获取不到 iframe则退回到主页面直达
automation.main_page.goto(target_url, timeout=60000)
current_url = getattr(automation.main_page, "url", "") or ""
if "center.aspx" not in current_url:
raise RuntimeError(f"unexpected_url:{current_url}")
try:
automation.main_page.wait_for_load_state("networkidle", timeout=10000)
except Exception:
pass
try:
automation.main_page.wait_for_selector("table.ltable", timeout=5000)
except Exception:
pass
navigated = True
except Exception as nav_error:
log_to_client(f"直达页面失败,将尝试按钮切换: {str(nav_error)[:120]}", user_id, account_id)
# 兼容兜底:若直达失败,则回退到原有按钮切换方式
if not navigated:
result = automation.browse_content(
navigate_only=True,
browse_type=browse_type,
auto_next_page=False,
auto_view_attachments=False,
interval=0,
should_stop_callback=None,
)
if not result.success and result.error_message:
log_to_client(f"导航警告: {result.error_message}", user_id, account_id)
time.sleep(2)
parsed = urlsplit(config.ZSGL_LOGIN_URL)
base = f"{parsed.scheme}://{parsed.netloc}"
if "注册前" in str(browse_type):
bz = 0
else:
bz = 2 # 应读
target_url = f"{base}/admin/center.aspx?bz={bz}"
timestamp = get_beijing_now().strftime("%Y%m%d_%H%M%S")
@@ -178,7 +207,13 @@ def take_screenshot_for_account(
screenshot_filename = f"{username_prefix}_{login_account}_{browse_type}_{timestamp}.jpg"
screenshot_path = os.path.join(SCREENSHOTS_DIR, screenshot_filename)
if automation.take_screenshot(screenshot_path):
if take_screenshot_wkhtmltoimage(
target_url,
screenshot_path,
cookies_path=cookie_path if is_cookie_jar_fresh(cookie_path) else None,
proxy_server=proxy_server,
log_callback=custom_log,
):
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 1000:
log_to_client(f"✓ 截图成功: {screenshot_filename}", user_id, account_id)
return {"success": True, "filename": screenshot_filename}
@@ -197,15 +232,6 @@ def take_screenshot_for_account(
if attempt < max_retries:
log_to_client("将重试...", user_id, account_id)
time.sleep(2)
finally:
if automation:
try:
if automation.context:
automation.context.close()
automation.context = None
automation.page = None
except Exception as e:
logger.debug(f"关闭context时出错: {e}")
return {"success": False, "error": "截图失败已重试3次"}

View File

@@ -574,7 +574,7 @@ def run_task(user_id, account_id, browse_type, enable_screenshot=True, source="m
with APIBrowser(log_callback=custom_log, proxy_config=proxy_config) as api_browser:
if api_browser.login(account.username, account.password):
log_to_client("✓ 登录成功!", user_id, account_id)
api_browser.save_cookies_for_playwright(account.username)
api_browser.save_cookies_for_screenshot(account.username)
database.reset_account_login_status(account_id)
if not account.remark:

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -808,7 +808,7 @@
<label>截图最大并发数</label>
<input type="number" id="maxScreenshotConcurrent" min="1" value="3" style="max-width: 200px;">
<div style="font-size: 12px; color: #666; margin-top: 5px;">
说明:同时进行截图的最大数量。每个浏览器约占用200MB内存
说明:同时进行截图的最大数量。wkhtmltoimage 资源占用较低,可按需提高
</div>
</div>

View File

@@ -41,7 +41,7 @@ def test_requeue_task_when_browser_unavailable():
worker.join(timeout=5)
assert worker.is_alive() is False
assert worker.ensure_calls == 2 # 本地最多尝试2次创建浏览器
assert worker.ensure_calls == 2 # 本地最多尝试2次创建执行环境
assert callback_calls == [] # 第一次失败会重新入队,不应立即回调失败
requeued = task_queue.get_nowait()
@@ -69,7 +69,6 @@ def test_fail_task_after_second_assignment():
worker.join(timeout=5)
assert worker.is_alive() is False
assert callback_calls == [(None, "浏览器不可用")]
assert callback_calls == [(None, "执行环境不可用")]
assert worker.total_tasks == 1
assert worker.failed_tasks == 1