Initial commit: 知识管理平台
主要功能: - 多用户管理系统 - 浏览器自动化(Playwright) - 任务编排和执行 - Docker容器化部署 - 数据持久化和日志管理 技术栈: - Flask 3.0.0 - Playwright 1.40.0 - SQLite with connection pooling - Docker + Docker Compose 部署说明详见README.md
This commit is contained in:
762
playwright_automation.py
Executable file
762
playwright_automation.py
Executable file
@@ -0,0 +1,762 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Playwright版本 - 知识管理系统自动化核心
|
||||
使用浏览器上下文(Context)实现高性能并发
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from playwright.sync_api import sync_playwright, Browser, BrowserContext, Page, Playwright
|
||||
import time
|
||||
import threading
|
||||
from typing import Optional, Callable
|
||||
from dataclasses import dataclass
|
||||
|
||||
# 设置浏览器安装路径(避免Nuitka onefile临时目录问题)
|
||||
BROWSERS_PATH = str(Path.home() / "AppData" / "Local" / "ms-playwright")
|
||||
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = BROWSERS_PATH
|
||||
|
||||
# 配置常量
|
||||
class Config:
|
||||
"""配置常量"""
|
||||
LOGIN_URL = "https://postoa.aidunsoft.com/admin/login.aspx"
|
||||
INDEX_URL_PATTERN = "index.aspx"
|
||||
|
||||
PAGE_LOAD_TIMEOUT = 60000 # 毫秒 (increased from 30s to 60s for multi-account support)
|
||||
DEFAULT_TIMEOUT = 60000 # 增加超时时间以支持多账号并发
|
||||
|
||||
MAX_CONCURRENT_CONTEXTS = 100 # 最大并发上下文数
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrowseResult:
|
||||
"""浏览结果"""
|
||||
success: bool
|
||||
total_items: int = 0
|
||||
total_attachments: int = 0
|
||||
error_message: str = ""
|
||||
|
||||
|
||||
class PlaywrightBrowserManager:
|
||||
"""Playwright浏览器管理器 - 每个账号独立的浏览器实例"""
|
||||
|
||||
def __init__(self, headless: bool = True, log_callback: Optional[Callable] = None):
|
||||
"""
|
||||
初始化浏览器管理器
|
||||
|
||||
Args:
|
||||
headless: 是否使用无头模式
|
||||
log_callback: 日志回调函数,签名: log_callback(message, account_id=None)
|
||||
"""
|
||||
self.headless = headless
|
||||
self.log_callback = log_callback
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def log(self, message: str, account_id: Optional[str] = None):
|
||||
"""记录日志"""
|
||||
if self.log_callback:
|
||||
self.log_callback(message, account_id)
|
||||
|
||||
def create_browser(self, proxy_config=None):
|
||||
"""创建新的独立浏览器实例(每个账号独立)"""
|
||||
try:
|
||||
self.log("初始化Playwright实例...")
|
||||
playwright = sync_playwright().start()
|
||||
|
||||
self.log("启动独立浏览器进程...")
|
||||
start_time = time.time()
|
||||
|
||||
# 准备浏览器启动参数
|
||||
launch_options = {
|
||||
'headless': self.headless,
|
||||
'args': [
|
||||
'--no-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--disable-extensions',
|
||||
'--disable-notifications',
|
||||
'--disable-infobars',
|
||||
'--disable-default-apps',
|
||||
'--disable-background-timer-throttling',
|
||||
'--disable-backgrounding-occluded-windows',
|
||||
'--disable-renderer-backgrounding',
|
||||
]
|
||||
}
|
||||
|
||||
# 如果有代理配置,添加代理
|
||||
if proxy_config and proxy_config.get('server'):
|
||||
launch_options['proxy'] = {
|
||||
'server': proxy_config['server']
|
||||
}
|
||||
self.log(f"使用代理: {proxy_config['server']}")
|
||||
|
||||
browser = playwright.chromium.launch(**launch_options)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
self.log(f"独立浏览器启动成功 (耗时: {elapsed:.2f}秒)")
|
||||
|
||||
return playwright, browser
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"启动浏览器失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def create_browser_and_context(self, proxy_config=None):
|
||||
"""创建独立的浏览器和上下文(每个账号完全隔离)"""
|
||||
playwright, browser = self.create_browser(proxy_config)
|
||||
|
||||
start_time = time.time()
|
||||
self.log("创建浏览器上下文...")
|
||||
|
||||
context = browser.new_context(
|
||||
viewport={'width': 1920, 'height': 1080},
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
device_scale_factor=2, # 2倍设备像素比,提高文字清晰度
|
||||
)
|
||||
|
||||
# 设置默认超时
|
||||
context.set_default_timeout(Config.DEFAULT_TIMEOUT)
|
||||
context.set_default_navigation_timeout(Config.PAGE_LOAD_TIMEOUT)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
self.log(f"上下文创建完成 (耗时: {elapsed:.3f}秒)")
|
||||
|
||||
return playwright, browser, context
|
||||
|
||||
|
||||
class PlaywrightAutomation:
|
||||
"""Playwright自动化操作类"""
|
||||
|
||||
def __init__(self, browser_manager: PlaywrightBrowserManager, account_id: str, proxy_config: Optional[dict] = None):
|
||||
"""
|
||||
初始化自动化操作
|
||||
|
||||
Args:
|
||||
browser_manager: 浏览器管理器
|
||||
account_id: 账号ID(用于日志)
|
||||
"""
|
||||
self.browser_manager = browser_manager
|
||||
self.account_id = account_id
|
||||
self.proxy_config = proxy_config
|
||||
self.playwright: Optional[Playwright] = None
|
||||
self.browser: Optional[Browser] = None
|
||||
self.context: Optional[BrowserContext] = None
|
||||
self.page: Optional[Page] = None
|
||||
self.main_page: Optional[Page] = None
|
||||
|
||||
def log(self, message: str):
|
||||
"""记录日志"""
|
||||
self.browser_manager.log(message, self.account_id)
|
||||
|
||||
def login(self, username: str, password: str, remember: bool = True) -> bool:
|
||||
"""
|
||||
登录系统
|
||||
|
||||
Args:
|
||||
username: 用户名
|
||||
password: 密码
|
||||
remember: 是否记住密码
|
||||
|
||||
Returns:
|
||||
是否登录成功
|
||||
"""
|
||||
try:
|
||||
self.log("创建浏览器上下文...")
|
||||
start_time = time.time()
|
||||
self.playwright, self.browser, self.context = self.browser_manager.create_browser_and_context(self.proxy_config)
|
||||
elapsed = time.time() - start_time
|
||||
self.log(f"浏览器和上下文创建完成 (耗时: {elapsed:.3f}秒)")
|
||||
|
||||
self.log("创建页面...")
|
||||
self.page = self.context.new_page()
|
||||
self.main_page = self.page
|
||||
|
||||
self.log("访问登录页面...")
|
||||
# 使用重试机制处理超时
|
||||
max_retries = 2
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
self.page.goto(Config.LOGIN_URL, timeout=60000)
|
||||
break
|
||||
except Exception as e:
|
||||
if attempt < max_retries - 1:
|
||||
self.log(f"页面加载超时,重试中... ({attempt + 1}/{max_retries})")
|
||||
time.sleep(2)
|
||||
else:
|
||||
raise
|
||||
|
||||
self.log("填写登录信息...")
|
||||
self.page.fill('#txtUserName', username)
|
||||
self.page.fill('#txtPassword', password)
|
||||
|
||||
if remember:
|
||||
self.page.check('#chkRemember')
|
||||
|
||||
self.log("点击登录按钮...")
|
||||
self.page.click('#btnSubmit')
|
||||
|
||||
# 等待跳转
|
||||
self.log("等待登录处理...")
|
||||
self.page.wait_for_load_state('networkidle', timeout=30000) # 增加到30秒
|
||||
|
||||
# 检查登录结果
|
||||
current_url = self.page.url
|
||||
self.log(f"当前URL: {current_url}")
|
||||
|
||||
if Config.INDEX_URL_PATTERN in current_url:
|
||||
self.log("登录成功!")
|
||||
return True
|
||||
else:
|
||||
self.log("登录失败,请检查用户名和密码")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"登录过程中出错: {str(e)}")
|
||||
return False
|
||||
|
||||
def switch_to_iframe(self) -> bool:
|
||||
"""切换到mainframe iframe"""
|
||||
try:
|
||||
self.log("查找并切换到iframe...")
|
||||
|
||||
# 使用Playwright的等待机制
|
||||
max_retries = 3
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
# 等待iframe元素出现
|
||||
self.main_page.wait_for_selector("iframe[name='mainframe']", timeout=2000)
|
||||
|
||||
# 获取iframe
|
||||
iframe = self.main_page.frame('mainframe')
|
||||
if iframe:
|
||||
self.page = iframe
|
||||
self.log(f"✓ 成功切换到iframe (尝试 {i+1}/{max_retries})")
|
||||
return True
|
||||
except Exception as e:
|
||||
if i < max_retries - 1:
|
||||
self.log(f"未找到iframe,重试中... ({i+1}/{max_retries})")
|
||||
time.sleep(1)
|
||||
else:
|
||||
self.log(f"所有重试都失败,未找到iframe")
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"切换到iframe时出错: {str(e)}")
|
||||
return False
|
||||
|
||||
def switch_browse_type(self, browse_type: str, max_retries: int = 2) -> bool:
|
||||
"""
|
||||
切换浏览类型(带重试机制)
|
||||
|
||||
Args:
|
||||
browse_type: 浏览类型(注册前未读/应读/已读)
|
||||
max_retries: 最大重试次数(默认2次)
|
||||
|
||||
Returns:
|
||||
是否切换成功
|
||||
"""
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
if attempt > 0:
|
||||
self.log(f"⚠ 第 {attempt + 1} 次尝试切换浏览类型...")
|
||||
else:
|
||||
self.log(f"切换到'{browse_type}'类型...")
|
||||
|
||||
# 切换到iframe
|
||||
if not self.switch_to_iframe():
|
||||
if attempt < max_retries:
|
||||
self.log(f"iframe切换失败,等待1秒后重试...")
|
||||
time.sleep(1)
|
||||
continue
|
||||
return False
|
||||
|
||||
# 方法1: 尝试查找<a>标签(如果JavaScript创建了的话)
|
||||
selector = f"//div[contains(@class, 'rule-multi-radio')]//a[contains(text(), '{browse_type}')]"
|
||||
|
||||
try:
|
||||
# 等待并点击
|
||||
self.page.locator(selector).click(timeout=5000)
|
||||
self.log(f"点击'{browse_type}'按钮成功")
|
||||
|
||||
# 等待页面刷新并加载内容
|
||||
time.sleep(1.5)
|
||||
|
||||
# 等待表格加载(最多等待30秒)
|
||||
try:
|
||||
self.page.locator("//table[@class='ltable']").wait_for(timeout=30000)
|
||||
self.log("内容表格已加载")
|
||||
except Exception as e:
|
||||
self.log("等待表格加载超时,继续...")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
if "Execution context was destroyed" in error_msg:
|
||||
self.log(f"⚠ 检测到执行上下文被销毁")
|
||||
if attempt < max_retries:
|
||||
self.log(f"等待2秒后重试...")
|
||||
time.sleep(2)
|
||||
continue
|
||||
self.log(f"未找到<a>标签,尝试点击<label>...")
|
||||
|
||||
# 方法2: 点击label(模拟点击radio button)
|
||||
label_selector = f"//label[contains(text(), '{browse_type}')]"
|
||||
|
||||
try:
|
||||
self.page.locator(label_selector).click(timeout=5000)
|
||||
self.log(f"点击'{browse_type}'标签成功")
|
||||
|
||||
# 等待页面刷新并加载内容
|
||||
time.sleep(1.5)
|
||||
|
||||
# 等待表格加载(最多等待30秒)
|
||||
try:
|
||||
self.page.locator("//table[@class='ltable']").wait_for(timeout=30000)
|
||||
self.log("内容表格已加载")
|
||||
except Exception as e:
|
||||
self.log("等待表格加载超时,继续...")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
if "Execution context was destroyed" in error_msg:
|
||||
self.log(f"⚠ 检测到执行上下文被销毁")
|
||||
if attempt < max_retries:
|
||||
self.log(f"等待2秒后重试...")
|
||||
time.sleep(2)
|
||||
continue
|
||||
self.log(f"未找到<label>标签")
|
||||
|
||||
# 如果两种方法都失败,但还有重试机会
|
||||
if attempt < max_retries:
|
||||
self.log(f"切换失败,等待2秒后重试...")
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
self.log(f"切换浏览类型时出错: {error_msg}")
|
||||
|
||||
# 检查是否是 "Execution context was destroyed" 错误
|
||||
if "Execution context was destroyed" in error_msg or "navigation" in error_msg.lower():
|
||||
if attempt < max_retries:
|
||||
self.log(f"⚠ 检测到执行上下文被销毁或导航错误,等待2秒后重试...")
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
return False
|
||||
|
||||
# 所有重试都失败
|
||||
self.log(f"❌ 切换浏览类型失败,已重试 {max_retries} 次")
|
||||
return False
|
||||
|
||||
def browse_content(self, browse_type: str,
|
||||
auto_next_page: bool = True,
|
||||
auto_view_attachments: bool = True,
|
||||
interval: float = 1.0,
|
||||
should_stop_callback: Optional[Callable] = None) -> BrowseResult:
|
||||
"""
|
||||
浏览内容
|
||||
|
||||
Args:
|
||||
browse_type: 浏览类型
|
||||
auto_next_page: 是否自动翻页
|
||||
auto_view_attachments: 是否自动查看附件
|
||||
interval: 查看附件的间隔时间(秒)
|
||||
should_stop_callback: 检查是否应该停止的回调函数
|
||||
|
||||
Returns:
|
||||
浏览结果
|
||||
"""
|
||||
result = BrowseResult(success=False)
|
||||
|
||||
try:
|
||||
# 先导航到浏览页面
|
||||
self.log(f"导航到 '{browse_type}' 页面...")
|
||||
try:
|
||||
# 等待页面完全加载
|
||||
time.sleep(2)
|
||||
self.log(f"当前URL: {self.main_page.url}")
|
||||
except Exception as e:
|
||||
self.log(f"获取URL失败: {str(e)}")
|
||||
|
||||
# 切换浏览类型
|
||||
if not self.switch_browse_type(browse_type):
|
||||
result.error_message = "切换浏览类型失败"
|
||||
return result
|
||||
|
||||
current_page = 1
|
||||
total_items = 0
|
||||
total_attachments = 0
|
||||
completed_first_round = False
|
||||
empty_page_counter = 0
|
||||
|
||||
while True:
|
||||
# 检查是否应该停止
|
||||
if should_stop_callback and should_stop_callback():
|
||||
self.log("收到停止信号,终止浏览")
|
||||
break
|
||||
|
||||
self.log(f"处理第 {current_page} 页...")
|
||||
|
||||
# 确保在iframe中(关键!)
|
||||
time.sleep(0.2)
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
if not self.page:
|
||||
self.log("错误:无法获取iframe")
|
||||
break
|
||||
|
||||
# 额外等待,确保AJAX内容加载完成
|
||||
time.sleep(0.5)
|
||||
|
||||
# 获取内容行数量
|
||||
rows_locator = self.page.locator("//table[@class='ltable']/tbody/tr[position()>1 and count(td)>=5]")
|
||||
rows_count = rows_locator.count()
|
||||
|
||||
if rows_count == 0:
|
||||
self.log("当前页面没有内容")
|
||||
empty_page_counter += 1
|
||||
self.log(f"连续空页面数: {empty_page_counter}")
|
||||
|
||||
# 检查是否已完成至少一轮浏览且连续空页面数达到阈值
|
||||
if completed_first_round and empty_page_counter >= 2:
|
||||
self.log("检测到连续空页面且已完成至少一轮浏览,内容已浏览完毕")
|
||||
break
|
||||
|
||||
# 尝试翻页或返回第一页
|
||||
if auto_next_page:
|
||||
# 检查是否有下一页
|
||||
try:
|
||||
next_button = self.page.locator("//div[@id='PageContent']/a[contains(text(), '下一页') or contains(text(), '»')]")
|
||||
if next_button.count() > 0:
|
||||
self.log("点击下一页...")
|
||||
next_button.click()
|
||||
time.sleep(1.5)
|
||||
current_page += 1
|
||||
continue
|
||||
else:
|
||||
# 没有下一页,返回第一页
|
||||
if not completed_first_round:
|
||||
completed_first_round = True
|
||||
self.log("完成第一轮浏览,准备返回第一页继续浏览...")
|
||||
else:
|
||||
self.log("完成一轮浏览,返回第一页继续...")
|
||||
|
||||
# 刷新页面并重新点击浏览类型
|
||||
self.log("刷新页面并重新点击浏览类型...")
|
||||
self.main_page.reload()
|
||||
time.sleep(1.5)
|
||||
|
||||
# 切换到iframe
|
||||
time.sleep(0.5)
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
|
||||
# 重新点击浏览类型按钮
|
||||
selector = f"//div[contains(@class, 'rule-multi-radio')]//a[contains(text(), '{browse_type}')]"
|
||||
try:
|
||||
self.page.locator(selector).click(timeout=5000)
|
||||
self.log(f"重新点击'{browse_type}'按钮成功")
|
||||
time.sleep(1.5)
|
||||
|
||||
# 等待表格加载
|
||||
try:
|
||||
self.page.locator("//table[@class='ltable']").wait_for(timeout=30000) # 增加到30秒
|
||||
self.log("内容表格已加载")
|
||||
except Exception as e:
|
||||
self.log("等待表格加载超时,继续...")
|
||||
except Exception as e:
|
||||
# 尝试点击label
|
||||
label_selector = f"//label[contains(text(), '{browse_type}')]"
|
||||
self.page.locator(label_selector).click(timeout=5000)
|
||||
self.log(f"点击'{browse_type}'标签成功")
|
||||
time.sleep(1.5)
|
||||
|
||||
current_page = 1
|
||||
continue
|
||||
except Exception as e:
|
||||
self.log(f"翻页时出错: {str(e)}")
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
# 找到内容,重置空页面计数
|
||||
empty_page_counter = 0
|
||||
self.log(f"找到 {rows_count} 条内容")
|
||||
|
||||
# 处理每一行 (每次从头重新获取所有行)
|
||||
for i in range(rows_count):
|
||||
if should_stop_callback and should_stop_callback():
|
||||
break
|
||||
|
||||
# 每次处理新行前,确保在iframe中(关键!尤其是history.back()后)
|
||||
if i > 0:
|
||||
time.sleep(0.2)
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
|
||||
# 每次都重新获取rows_locator和row,确保元素是最新的
|
||||
current_rows_locator = self.page.locator("//table[@class='ltable']/tbody/tr[position()>1 and count(td)>=5]")
|
||||
row = current_rows_locator.nth(i)
|
||||
|
||||
# 获取标题 (使用xpath:)
|
||||
title_cell = row.locator("xpath=.//td[4]")
|
||||
title = title_cell.inner_text().strip()
|
||||
self.log(f" [{i+1}] {title[:50]}")
|
||||
total_items += 1
|
||||
|
||||
# 处理附件 (使用xpath:)
|
||||
if auto_view_attachments:
|
||||
# 每次都重新获取附件链接数量
|
||||
att_links_locator = row.locator("xpath=.//td[5]//a[contains(@class, 'link-btn')]")
|
||||
att_count = att_links_locator.count()
|
||||
|
||||
if att_count > 0:
|
||||
# 只处理第一个附件
|
||||
att_link = att_links_locator.first
|
||||
att_text = att_link.inner_text().strip() or "附件"
|
||||
self.log(f" - 处理{att_text}...")
|
||||
|
||||
try:
|
||||
# 记录点击前的页面数量
|
||||
pages_before = len(self.context.pages)
|
||||
|
||||
# 点击附件
|
||||
att_link.click()
|
||||
|
||||
# 快速检测是否有新窗口(0.5秒足够)
|
||||
time.sleep(0.5)
|
||||
|
||||
# 检查是否有新窗口
|
||||
pages_after = self.context.pages
|
||||
if len(pages_after) > pages_before:
|
||||
# 有新窗口打开
|
||||
new_page = pages_after[-1]
|
||||
self.log(f" - 新窗口已打开,等待加载...")
|
||||
time.sleep(interval) # 使用用户设置的间隔
|
||||
|
||||
# 关闭新窗口
|
||||
new_page.close()
|
||||
self.log(f" - 新窗口已关闭")
|
||||
else:
|
||||
# 没有新窗口,使用浏览器返回(像Selenium版本一样)
|
||||
# 关键问题:iframe内点击附件不会触发真正的导航
|
||||
# Selenium的driver.back()不等待,Playwright的go_back()会等待导航
|
||||
# 解决方案:使用JavaScript执行history.back(),不等待导航
|
||||
self.main_page.evaluate("() => window.history.back()")
|
||||
time.sleep(0.5)
|
||||
|
||||
# 确保回到iframe中
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
|
||||
# 确保回到iframe中
|
||||
time.sleep(0.2)
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
|
||||
total_attachments += 1
|
||||
self.log(f" - {att_text}处理完成")
|
||||
|
||||
except Exception as e:
|
||||
self.log(f" - 处理{att_text}时出错: {str(e)}")
|
||||
# 发生错误时尝试恢复到iframe
|
||||
try:
|
||||
# 尝试重新获取iframe
|
||||
iframe = self.main_page.frame('mainframe')
|
||||
if iframe:
|
||||
self.page = iframe
|
||||
else:
|
||||
# 如果找不到iframe,可能需要刷新
|
||||
self.log(f" - 找不到iframe,刷新页面...")
|
||||
self.main_page.reload()
|
||||
time.sleep(1)
|
||||
if self.switch_browse_type(browse_type):
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# 处理完当前页后,检查是否需要翻页
|
||||
if auto_next_page:
|
||||
try:
|
||||
# 确保在iframe中
|
||||
time.sleep(0.2)
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
|
||||
# 检查是否有下一页
|
||||
next_button = self.page.locator("//div[@id='PageContent']/a[contains(text(), '下一页') or contains(text(), '»')]")
|
||||
if next_button.count() > 0:
|
||||
self.log("点击下一页...")
|
||||
next_button.click()
|
||||
time.sleep(1.5)
|
||||
current_page += 1
|
||||
# 继续下一页的循环
|
||||
else:
|
||||
# 没有下一页了,返回第一页继续
|
||||
if not completed_first_round:
|
||||
completed_first_round = True
|
||||
self.log("完成第一轮浏览,准备返回第一页继续浏览...")
|
||||
else:
|
||||
self.log("完成一轮浏览,返回第一页继续...")
|
||||
|
||||
# 刷新页面并重新点击浏览类型
|
||||
self.log("刷新页面并重新点击浏览类型...")
|
||||
self.main_page.reload()
|
||||
time.sleep(1.5)
|
||||
|
||||
# 切换到iframe
|
||||
time.sleep(0.5)
|
||||
self.page = self.main_page.frame('mainframe')
|
||||
|
||||
# 重新点击浏览类型按钮
|
||||
selector = f"//div[contains(@class, 'rule-multi-radio')]//a[contains(text(), '{browse_type}')]"
|
||||
try:
|
||||
self.page.locator(selector).click(timeout=5000)
|
||||
self.log(f"重新点击'{browse_type}'按钮成功")
|
||||
time.sleep(1.5)
|
||||
|
||||
# 等待表格加载
|
||||
try:
|
||||
self.page.locator("//table[@class='ltable']").wait_for(timeout=30000) # 增加到30秒
|
||||
self.log("内容表格已加载")
|
||||
except Exception as e:
|
||||
self.log("等待表格加载超时,继续...")
|
||||
except Exception as e:
|
||||
# 尝试点击label
|
||||
label_selector = f"//label[contains(text(), '{browse_type}')]"
|
||||
self.page.locator(label_selector).click(timeout=5000)
|
||||
self.log(f"点击'{browse_type}'标签成功")
|
||||
time.sleep(1.5)
|
||||
|
||||
current_page = 1
|
||||
# 继续循环,从第一页开始
|
||||
except Exception as e:
|
||||
self.log(f"翻页时出错: {str(e)}")
|
||||
break
|
||||
|
||||
result.success = True
|
||||
result.total_items = total_items
|
||||
result.total_attachments = total_attachments
|
||||
self.log(f"浏览完成!共 {total_items} 条内容,{total_attachments} 个附件")
|
||||
|
||||
except Exception as e:
|
||||
result.error_message = str(e)
|
||||
self.log(f"浏览内容时出错: {str(e)}")
|
||||
|
||||
return result
|
||||
|
||||
def take_screenshot(self, filepath: str) -> bool:
|
||||
"""
|
||||
截图
|
||||
|
||||
Args:
|
||||
filepath: 截图保存路径
|
||||
|
||||
Returns:
|
||||
是否截图成功
|
||||
"""
|
||||
try:
|
||||
# 使用最高质量设置截图
|
||||
# type='jpeg' 指定JPEG格式(支持quality参数)
|
||||
# quality=100 表示100%的JPEG质量(范围0-100,最高质量)
|
||||
# full_page=True 表示截取整个页面
|
||||
# 视口分辨率 2560x1440 确保高清晰度
|
||||
# 这样可以生成更清晰的截图,大小约500KB-1MB左右
|
||||
self.main_page.screenshot(
|
||||
path=filepath,
|
||||
type='jpeg',
|
||||
full_page=True,
|
||||
quality=100
|
||||
)
|
||||
self.log(f"截图已保存: {filepath}")
|
||||
return True
|
||||
except Exception as e:
|
||||
self.log(f"截图失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""完全关闭浏览器进程(每个账号独立)并确保资源释放"""
|
||||
errors = []
|
||||
|
||||
# 第一步:关闭上下文
|
||||
if self.context:
|
||||
try:
|
||||
self.context.close()
|
||||
self.log("上下文已关闭")
|
||||
except Exception as e:
|
||||
error_msg = f"关闭上下文时出错: {str(e)}"
|
||||
self.log(error_msg)
|
||||
errors.append(error_msg)
|
||||
|
||||
# 第二步:关闭浏览器进程
|
||||
if self.browser:
|
||||
try:
|
||||
self.browser.close()
|
||||
self.log("浏览器进程已关闭")
|
||||
except Exception as e:
|
||||
error_msg = f"关闭浏览器时出错: {str(e)}"
|
||||
self.log(error_msg)
|
||||
errors.append(error_msg)
|
||||
|
||||
# 第三步:停止Playwright
|
||||
if self.playwright:
|
||||
try:
|
||||
self.playwright.stop()
|
||||
self.log("Playwright已停止")
|
||||
except Exception as e:
|
||||
error_msg = f"停止Playwright时出错: {str(e)}"
|
||||
self.log(error_msg)
|
||||
errors.append(error_msg)
|
||||
|
||||
# 第四步:清空引用,确保垃圾回收
|
||||
self.context = None
|
||||
self.page = None
|
||||
self.main_page = None
|
||||
self.browser = None
|
||||
self.playwright = None
|
||||
|
||||
# 第五步:强制等待,确保进程完全退出
|
||||
time.sleep(0.5)
|
||||
|
||||
if errors:
|
||||
self.log(f"资源清理完成,但有{len(errors)}个警告")
|
||||
else:
|
||||
self.log("资源清理完成")
|
||||
|
||||
|
||||
# 简单的测试函数
|
||||
if __name__ == "__main__":
|
||||
print("Playwright自动化核心 - 测试")
|
||||
print("="*60)
|
||||
|
||||
# 创建浏览器管理器
|
||||
manager = PlaywrightBrowserManager(headless=True)
|
||||
|
||||
try:
|
||||
# 初始化浏览器
|
||||
manager.initialize()
|
||||
|
||||
# 创建自动化实例
|
||||
automation = PlaywrightAutomation(manager, "test_account")
|
||||
|
||||
# 登录
|
||||
if automation.login("19174616018", "aa123456"):
|
||||
# 浏览内容
|
||||
result = automation.browse_content(
|
||||
browse_type="应读",
|
||||
auto_next_page=True,
|
||||
auto_view_attachments=True,
|
||||
interval=2.0 # 增加间隔时间
|
||||
)
|
||||
|
||||
print(f"\n浏览结果: {result}")
|
||||
|
||||
# 关闭
|
||||
automation.close()
|
||||
|
||||
finally:
|
||||
# 关闭浏览器管理器
|
||||
manager.close()
|
||||
|
||||
print("="*60)
|
||||
print("测试完成")
|
||||
Reference in New Issue
Block a user