#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Playwright版本 - 知识管理系统自动化核心 使用浏览器上下文(Context)实现高性能并发 """ import os from pathlib import Path from playwright.sync_api import sync_playwright, Browser, BrowserContext, Page, Playwright import time import threading from typing import Optional, Callable from dataclasses import dataclass # 设置浏览器安装路径(避免Nuitka onefile临时目录问题) BROWSERS_PATH = str(Path.home() / "AppData" / "Local" / "ms-playwright") os.environ["PLAYWRIGHT_BROWSERS_PATH"] = BROWSERS_PATH # 配置常量 class Config: """配置常量""" LOGIN_URL = "https://postoa.aidunsoft.com/admin/login.aspx" INDEX_URL_PATTERN = "index.aspx" PAGE_LOAD_TIMEOUT = 60000 # 毫秒 (increased from 30s to 60s for multi-account support) DEFAULT_TIMEOUT = 60000 # 增加超时时间以支持多账号并发 MAX_CONCURRENT_CONTEXTS = 100 # 最大并发上下文数 @dataclass class BrowseResult: """浏览结果""" success: bool total_items: int = 0 total_attachments: int = 0 error_message: str = "" class PlaywrightBrowserManager: """Playwright浏览器管理器 - 每个账号独立的浏览器实例""" def __init__(self, headless: bool = True, log_callback: Optional[Callable] = None): """ 初始化浏览器管理器 Args: headless: 是否使用无头模式 log_callback: 日志回调函数,签名: log_callback(message, account_id=None) """ self.headless = headless self.log_callback = log_callback self._lock = threading.Lock() def log(self, message: str, account_id: Optional[str] = None): """记录日志""" if self.log_callback: self.log_callback(message, account_id) def create_browser(self, proxy_config=None): """创建新的独立浏览器实例(每个账号独立)""" try: self.log("初始化Playwright实例...") playwright = sync_playwright().start() self.log("启动独立浏览器进程...") start_time = time.time() # 准备浏览器启动参数 launch_options = { 'headless': self.headless, 'args': [ '--no-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--disable-extensions', '--disable-notifications', '--disable-infobars', '--disable-default-apps', '--disable-background-timer-throttling', '--disable-backgrounding-occluded-windows', '--disable-renderer-backgrounding', ] } # 如果有代理配置,添加代理 if proxy_config and proxy_config.get('server'): launch_options['proxy'] = { 'server': proxy_config['server'] } self.log(f"使用代理: {proxy_config['server']}") browser = playwright.chromium.launch(**launch_options) elapsed = time.time() - start_time self.log(f"独立浏览器启动成功 (耗时: {elapsed:.2f}秒)") return playwright, browser except Exception as e: self.log(f"启动浏览器失败: {str(e)}") raise def create_browser_and_context(self, proxy_config=None): """创建独立的浏览器和上下文(每个账号完全隔离)""" playwright, browser = self.create_browser(proxy_config) start_time = time.time() self.log("创建浏览器上下文...") context = browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', device_scale_factor=2, # 2倍设备像素比,提高文字清晰度 ) # 设置默认超时 context.set_default_timeout(Config.DEFAULT_TIMEOUT) context.set_default_navigation_timeout(Config.PAGE_LOAD_TIMEOUT) elapsed = time.time() - start_time self.log(f"上下文创建完成 (耗时: {elapsed:.3f}秒)") return playwright, browser, context class PlaywrightAutomation: """Playwright自动化操作类""" def __init__(self, browser_manager: PlaywrightBrowserManager, account_id: str, proxy_config: Optional[dict] = None): """ 初始化自动化操作 Args: browser_manager: 浏览器管理器 account_id: 账号ID(用于日志) """ self.browser_manager = browser_manager self.account_id = account_id self.proxy_config = proxy_config self.playwright: Optional[Playwright] = None self.browser: Optional[Browser] = None self.context: Optional[BrowserContext] = None self.page: Optional[Page] = None self.main_page: Optional[Page] = None def log(self, message: str): """记录日志""" self.browser_manager.log(message, self.account_id) def login(self, username: str, password: str, remember: bool = True) -> bool: """ 登录系统 Args: username: 用户名 password: 密码 remember: 是否记住密码 Returns: 是否登录成功 """ try: self.log("创建浏览器上下文...") start_time = time.time() self.playwright, self.browser, self.context = self.browser_manager.create_browser_and_context(self.proxy_config) elapsed = time.time() - start_time self.log(f"浏览器和上下文创建完成 (耗时: {elapsed:.3f}秒)") self.log("创建页面...") self.page = self.context.new_page() self.main_page = self.page self.log("访问登录页面...") # 使用重试机制处理超时 max_retries = 2 for attempt in range(max_retries): try: self.page.goto(Config.LOGIN_URL, timeout=60000) break except Exception as e: if attempt < max_retries - 1: self.log(f"页面加载超时,重试中... ({attempt + 1}/{max_retries})") time.sleep(2) else: raise self.log("填写登录信息...") self.page.fill('#txtUserName', username) self.page.fill('#txtPassword', password) if remember: self.page.check('#chkRemember') self.log("点击登录按钮...") self.page.click('#btnSubmit') # 等待跳转 self.log("等待登录处理...") self.page.wait_for_load_state('networkidle', timeout=30000) # 增加到30秒 # 检查登录结果 current_url = self.page.url self.log(f"当前URL: {current_url}") if Config.INDEX_URL_PATTERN in current_url: self.log("登录成功!") return True else: self.log("登录失败,请检查用户名和密码") return False except Exception as e: self.log(f"登录过程中出错: {str(e)}") return False def switch_to_iframe(self) -> bool: """切换到mainframe iframe""" try: self.log("查找并切换到iframe...") # 使用Playwright的等待机制 max_retries = 3 for i in range(max_retries): try: # 等待iframe元素出现 self.main_page.wait_for_selector("iframe[name='mainframe']", timeout=2000) # 获取iframe iframe = self.main_page.frame('mainframe') if iframe: self.page = iframe self.log(f"✓ 成功切换到iframe (尝试 {i+1}/{max_retries})") return True except Exception as e: if i < max_retries - 1: self.log(f"未找到iframe,重试中... ({i+1}/{max_retries})") time.sleep(1) else: self.log(f"所有重试都失败,未找到iframe") return False except Exception as e: self.log(f"切换到iframe时出错: {str(e)}") return False def switch_browse_type(self, browse_type: str, max_retries: int = 2) -> bool: """ 切换浏览类型(带重试机制) Args: browse_type: 浏览类型(注册前未读/应读/已读) max_retries: 最大重试次数(默认2次) Returns: 是否切换成功 """ for attempt in range(max_retries + 1): try: if attempt > 0: self.log(f"⚠ 第 {attempt + 1} 次尝试切换浏览类型...") else: self.log(f"切换到'{browse_type}'类型...") # 切换到iframe if not self.switch_to_iframe(): if attempt < max_retries: self.log(f"iframe切换失败,等待1秒后重试...") time.sleep(1) continue return False # 方法1: 尝试查找标签(如果JavaScript创建了的话) selector = f"//div[contains(@class, 'rule-multi-radio')]//a[contains(text(), '{browse_type}')]" try: # 等待并点击 self.page.locator(selector).click(timeout=5000) self.log(f"点击'{browse_type}'按钮成功") # 等待页面刷新并加载内容 time.sleep(1.5) # 等待表格加载(最多等待30秒) try: self.page.locator("//table[@class='ltable']").wait_for(timeout=30000) self.log("内容表格已加载") except Exception as e: self.log("等待表格加载超时,继续...") return True except Exception as e: error_msg = str(e) if "Execution context was destroyed" in error_msg: self.log(f"⚠ 检测到执行上下文被销毁") if attempt < max_retries: self.log(f"等待2秒后重试...") time.sleep(2) continue self.log(f"未找到标签,尝试点击