#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Playwright版本 - 知识管理系统自动化核心
使用浏览器上下文(Context)实现高性能并发
"""
import os
from pathlib import Path
from playwright.sync_api import sync_playwright, Browser, BrowserContext, Page, Playwright
import time
import threading
from typing import Optional, Callable
from dataclasses import dataclass
# 设置浏览器安装路径(避免Nuitka onefile临时目录问题)
BROWSERS_PATH = str(Path.home() / "AppData" / "Local" / "ms-playwright")
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = BROWSERS_PATH
# 配置常量
class Config:
"""配置常量"""
LOGIN_URL = "https://postoa.aidunsoft.com/admin/login.aspx"
INDEX_URL_PATTERN = "index.aspx"
PAGE_LOAD_TIMEOUT = 60000 # 毫秒 (increased from 30s to 60s for multi-account support)
DEFAULT_TIMEOUT = 60000 # 增加超时时间以支持多账号并发
MAX_CONCURRENT_CONTEXTS = 100 # 最大并发上下文数
@dataclass
class BrowseResult:
"""浏览结果"""
success: bool
total_items: int = 0
total_attachments: int = 0
error_message: str = ""
class PlaywrightBrowserManager:
"""Playwright浏览器管理器 - 每个账号独立的浏览器实例"""
def __init__(self, headless: bool = True, log_callback: Optional[Callable] = None):
"""
初始化浏览器管理器
Args:
headless: 是否使用无头模式
log_callback: 日志回调函数,签名: log_callback(message, account_id=None)
"""
self.headless = headless
self.log_callback = log_callback
self._lock = threading.Lock()
def log(self, message: str, account_id: Optional[str] = None):
"""记录日志"""
if self.log_callback:
self.log_callback(message, account_id)
def create_browser(self, proxy_config=None):
"""创建新的独立浏览器实例(每个账号独立)"""
try:
self.log("初始化Playwright实例...")
playwright = sync_playwright().start()
self.log("启动独立浏览器进程...")
start_time = time.time()
# 准备浏览器启动参数
launch_options = {
'headless': self.headless,
'args': [
'--no-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--disable-extensions',
'--disable-notifications',
'--disable-infobars',
'--disable-default-apps',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
]
}
# 如果有代理配置,添加代理
if proxy_config and proxy_config.get('server'):
launch_options['proxy'] = {
'server': proxy_config['server']
}
self.log(f"使用代理: {proxy_config['server']}")
browser = playwright.chromium.launch(**launch_options)
elapsed = time.time() - start_time
self.log(f"独立浏览器启动成功 (耗时: {elapsed:.2f}秒)")
return playwright, browser
except Exception as e:
self.log(f"启动浏览器失败: {str(e)}")
raise
def create_browser_and_context(self, proxy_config=None):
"""创建独立的浏览器和上下文(每个账号完全隔离)"""
playwright, browser = self.create_browser(proxy_config)
start_time = time.time()
self.log("创建浏览器上下文...")
context = browser.new_context(
viewport={'width': 1920, 'height': 1080},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
device_scale_factor=2, # 2倍设备像素比,提高文字清晰度
)
# 设置默认超时
context.set_default_timeout(Config.DEFAULT_TIMEOUT)
context.set_default_navigation_timeout(Config.PAGE_LOAD_TIMEOUT)
elapsed = time.time() - start_time
self.log(f"上下文创建完成 (耗时: {elapsed:.3f}秒)")
return playwright, browser, context
class PlaywrightAutomation:
"""Playwright自动化操作类"""
def __init__(self, browser_manager: PlaywrightBrowserManager, account_id: str, proxy_config: Optional[dict] = None):
"""
初始化自动化操作
Args:
browser_manager: 浏览器管理器
account_id: 账号ID(用于日志)
"""
self.browser_manager = browser_manager
self.account_id = account_id
self.proxy_config = proxy_config
self.playwright: Optional[Playwright] = None
self.browser: Optional[Browser] = None
self.context: Optional[BrowserContext] = None
self.page: Optional[Page] = None
self.main_page: Optional[Page] = None
def log(self, message: str):
"""记录日志"""
self.browser_manager.log(message, self.account_id)
def login(self, username: str, password: str, remember: bool = True) -> bool:
"""
登录系统
Args:
username: 用户名
password: 密码
remember: 是否记住密码
Returns:
是否登录成功
"""
try:
self.log("创建浏览器上下文...")
start_time = time.time()
self.playwright, self.browser, self.context = self.browser_manager.create_browser_and_context(self.proxy_config)
elapsed = time.time() - start_time
self.log(f"浏览器和上下文创建完成 (耗时: {elapsed:.3f}秒)")
self.log("创建页面...")
self.page = self.context.new_page()
self.main_page = self.page
self.log("访问登录页面...")
# 使用重试机制处理超时
max_retries = 2
for attempt in range(max_retries):
try:
self.page.goto(Config.LOGIN_URL, timeout=60000)
break
except Exception as e:
if attempt < max_retries - 1:
self.log(f"页面加载超时,重试中... ({attempt + 1}/{max_retries})")
time.sleep(2)
else:
raise
self.log("填写登录信息...")
self.page.fill('#txtUserName', username)
self.page.fill('#txtPassword', password)
if remember:
self.page.check('#chkRemember')
self.log("点击登录按钮...")
self.page.click('#btnSubmit')
# 等待跳转
self.log("等待登录处理...")
self.page.wait_for_load_state('networkidle', timeout=30000) # 增加到30秒
# 检查登录结果
current_url = self.page.url
self.log(f"当前URL: {current_url}")
if Config.INDEX_URL_PATTERN in current_url:
self.log("登录成功!")
return True
else:
self.log("登录失败,请检查用户名和密码")
return False
except Exception as e:
self.log(f"登录过程中出错: {str(e)}")
return False
def switch_to_iframe(self) -> bool:
"""切换到mainframe iframe"""
try:
self.log("查找并切换到iframe...")
# 使用Playwright的等待机制
max_retries = 3
for i in range(max_retries):
try:
# 等待iframe元素出现
self.main_page.wait_for_selector("iframe[name='mainframe']", timeout=2000)
# 获取iframe
iframe = self.main_page.frame('mainframe')
if iframe:
self.page = iframe
self.log(f"✓ 成功切换到iframe (尝试 {i+1}/{max_retries})")
return True
except Exception as e:
if i < max_retries - 1:
self.log(f"未找到iframe,重试中... ({i+1}/{max_retries})")
time.sleep(1)
else:
self.log(f"所有重试都失败,未找到iframe")
return False
except Exception as e:
self.log(f"切换到iframe时出错: {str(e)}")
return False
def switch_browse_type(self, browse_type: str, max_retries: int = 2) -> bool:
"""
切换浏览类型(带重试机制)
Args:
browse_type: 浏览类型(注册前未读/应读/已读)
max_retries: 最大重试次数(默认2次)
Returns:
是否切换成功
"""
for attempt in range(max_retries + 1):
try:
if attempt > 0:
self.log(f"⚠ 第 {attempt + 1} 次尝试切换浏览类型...")
else:
self.log(f"切换到'{browse_type}'类型...")
# 切换到iframe
if not self.switch_to_iframe():
if attempt < max_retries:
self.log(f"iframe切换失败,等待1秒后重试...")
time.sleep(1)
continue
return False
# 方法1: 尝试查找标签(如果JavaScript创建了的话)
selector = f"//div[contains(@class, 'rule-multi-radio')]//a[contains(text(), '{browse_type}')]"
try:
# 等待并点击
self.page.locator(selector).click(timeout=5000)
self.log(f"点击'{browse_type}'按钮成功")
# 等待页面刷新并加载内容
time.sleep(1.5)
# 等待表格加载(最多等待30秒)
try:
self.page.locator("//table[@class='ltable']").wait_for(timeout=30000)
self.log("内容表格已加载")
except Exception as e:
self.log("等待表格加载超时,继续...")
return True
except Exception as e:
error_msg = str(e)
if "Execution context was destroyed" in error_msg:
self.log(f"⚠ 检测到执行上下文被销毁")
if attempt < max_retries:
self.log(f"等待2秒后重试...")
time.sleep(2)
continue
self.log(f"未找到标签,尝试点击