feat: 知识管理平台精简版 - PyQt6桌面应用
主要功能: - 账号管理:添加/编辑/删除账号,测试登录 - 浏览任务:批量浏览应读/选读内容并标记已读 - 截图管理:wkhtmltoimage截图,查看历史 - 金山文档:扫码登录/微信快捷登录,自动上传截图 技术栈: - PyQt6 GUI框架 - Playwright 浏览器自动化 - SQLite 本地数据存储 - wkhtmltoimage 网页截图 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
13
core/__init__.py
Normal file
13
core/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""核心业务逻辑模块"""
|
||||
|
||||
from .api_browser import APIBrowser, APIBrowseResult, get_cookie_jar_path
|
||||
from .screenshot import take_screenshot, ScreenshotResult
|
||||
from .kdocs_uploader import KDocsUploader
|
||||
|
||||
__all__ = [
|
||||
'APIBrowser', 'APIBrowseResult', 'get_cookie_jar_path',
|
||||
'take_screenshot', 'ScreenshotResult',
|
||||
'KDocsUploader'
|
||||
]
|
||||
504
core/api_browser.py
Normal file
504
core/api_browser.py
Normal file
@@ -0,0 +1,504 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
API浏览器 - 精简版
|
||||
用纯HTTP请求实现浏览功能,比浏览器自动化快30-60倍
|
||||
从原项目精简提取,移除了缓存、诊断日志等复杂功能
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
from typing import Optional, Callable, List, Dict, Any
|
||||
from dataclasses import dataclass
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
@dataclass
|
||||
class APIBrowseResult:
|
||||
"""API浏览结果"""
|
||||
success: bool
|
||||
total_items: int = 0
|
||||
total_attachments: int = 0
|
||||
error_message: str = ""
|
||||
|
||||
|
||||
def get_cookie_jar_path(username: str) -> str:
|
||||
"""获取截图用的cookies文件路径(Netscape Cookie格式)"""
|
||||
from config import COOKIES_DIR
|
||||
|
||||
COOKIES_DIR.mkdir(exist_ok=True)
|
||||
filename = hashlib.sha256(username.encode()).hexdigest()[:32] + ".cookies.txt"
|
||||
return str(COOKIES_DIR / filename)
|
||||
|
||||
|
||||
def is_cookie_jar_fresh(cookie_path: str, max_age_seconds: int = 86400) -> bool:
|
||||
"""判断cookies文件是否存在且未过期(默认24小时)"""
|
||||
if not cookie_path or not os.path.exists(cookie_path):
|
||||
return False
|
||||
try:
|
||||
file_age = time.time() - os.path.getmtime(cookie_path)
|
||||
return file_age <= max(0, int(max_age_seconds or 0))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
class APIBrowser:
|
||||
"""
|
||||
API浏览器 - 使用纯HTTP请求实现浏览
|
||||
|
||||
用法:
|
||||
with APIBrowser(log_callback=print) as browser:
|
||||
if browser.login(username, password):
|
||||
result = browser.browse_content("应读")
|
||||
"""
|
||||
|
||||
def __init__(self, log_callback: Optional[Callable] = None, proxy_config: Optional[dict] = None):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
})
|
||||
self.logged_in = False
|
||||
self.log_callback = log_callback
|
||||
self.stop_flag = False
|
||||
self._closed = False
|
||||
self.last_total_records = 0
|
||||
self._username = ""
|
||||
|
||||
# 获取配置
|
||||
from config import get_config
|
||||
config = get_config()
|
||||
self.base_url = config.zsgl.base_url
|
||||
self.login_url = config.zsgl.login_url
|
||||
self.index_url_pattern = config.zsgl.index_url_pattern
|
||||
|
||||
# 设置代理
|
||||
if proxy_config and proxy_config.get("server"):
|
||||
proxy_server = proxy_config["server"]
|
||||
self.session.proxies = {"http": proxy_server, "https": proxy_server}
|
||||
self.proxy_server = proxy_server
|
||||
else:
|
||||
self.proxy_server = None
|
||||
|
||||
def log(self, message: str):
|
||||
"""记录日志"""
|
||||
if self.log_callback:
|
||||
self.log_callback(message)
|
||||
|
||||
def _request_with_retry(self, method: str, url: str, max_retries: int = 3,
|
||||
retry_delay: float = 1, **kwargs) -> requests.Response:
|
||||
"""带重试机制的请求方法"""
|
||||
kwargs.setdefault("timeout", 10.0)
|
||||
last_error = None
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
if method.lower() == "get":
|
||||
resp = self.session.get(url, **kwargs)
|
||||
else:
|
||||
resp = self.session.post(url, **kwargs)
|
||||
return resp
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
if attempt < max_retries:
|
||||
self.log(f"[API] 请求超时,{retry_delay}秒后重试 ({attempt}/{max_retries})...")
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
self.log(f"[API] 请求失败,已重试{max_retries}次: {str(e)}")
|
||||
|
||||
raise last_error
|
||||
|
||||
def _get_aspnet_fields(self, soup: BeautifulSoup) -> Dict[str, str]:
|
||||
"""获取ASP.NET隐藏字段"""
|
||||
fields = {}
|
||||
for name in ["__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION"]:
|
||||
field = soup.find("input", {"name": name})
|
||||
if field:
|
||||
fields[name] = field.get("value", "")
|
||||
return fields
|
||||
|
||||
def login(self, username: str, password: str) -> bool:
|
||||
"""登录"""
|
||||
self.log(f"[API] 登录: {username}")
|
||||
self._username = username
|
||||
|
||||
try:
|
||||
resp = self._request_with_retry("get", self.login_url)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
fields = self._get_aspnet_fields(soup)
|
||||
|
||||
data = fields.copy()
|
||||
data["txtUserName"] = username
|
||||
data["txtPassword"] = password
|
||||
data["btnSubmit"] = "登 录"
|
||||
|
||||
resp = self._request_with_retry(
|
||||
"post",
|
||||
self.login_url,
|
||||
data=data,
|
||||
headers={
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Origin": self.base_url,
|
||||
"Referer": self.login_url,
|
||||
},
|
||||
allow_redirects=True,
|
||||
)
|
||||
|
||||
if self.index_url_pattern in resp.url:
|
||||
self.logged_in = True
|
||||
self.log(f"[API] 登录成功")
|
||||
return True
|
||||
else:
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
error = soup.find(id="lblMsg")
|
||||
error_msg = error.get_text().strip() if error else "未知错误"
|
||||
self.log(f"[API] 登录失败: {error_msg}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"[API] 登录异常: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_real_name(self) -> Optional[str]:
|
||||
"""获取用户真实姓名"""
|
||||
if not self.logged_in:
|
||||
return None
|
||||
|
||||
try:
|
||||
url = f"{self.base_url}/admin/center.aspx"
|
||||
resp = self._request_with_retry("get", url)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
nlist = soup.find("div", {"class": "nlist-5"})
|
||||
if nlist:
|
||||
first_li = nlist.find("li")
|
||||
if first_li:
|
||||
text = first_li.get_text()
|
||||
match = re.search(r"姓名[::]\s*([^\((]+)", text)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def save_cookies_for_screenshot(self, username: str) -> bool:
|
||||
"""保存cookies供wkhtmltoimage使用(Netscape Cookie格式)"""
|
||||
cookies_path = get_cookie_jar_path(username)
|
||||
try:
|
||||
parsed = urlsplit(self.base_url)
|
||||
cookie_domain = parsed.hostname or "postoa.aidunsoft.com"
|
||||
|
||||
lines = [
|
||||
"# Netscape HTTP Cookie File",
|
||||
"# Generated by zsglpt-lite",
|
||||
]
|
||||
for cookie in self.session.cookies:
|
||||
domain = cookie.domain or cookie_domain
|
||||
include_subdomains = "TRUE" if domain.startswith(".") else "FALSE"
|
||||
path = cookie.path or "/"
|
||||
secure = "TRUE" if getattr(cookie, "secure", False) else "FALSE"
|
||||
expires = int(getattr(cookie, "expires", 0) or 0)
|
||||
lines.append("\t".join([
|
||||
domain,
|
||||
include_subdomains,
|
||||
path,
|
||||
secure,
|
||||
str(expires),
|
||||
cookie.name,
|
||||
cookie.value,
|
||||
]))
|
||||
|
||||
with open(cookies_path, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(lines) + "\n")
|
||||
|
||||
self.log(f"[API] Cookies已保存供截图使用")
|
||||
return True
|
||||
except Exception as e:
|
||||
self.log(f"[API] 保存cookies失败: {e}")
|
||||
return False
|
||||
|
||||
def get_article_list_page(self, bz: int = 0, page: int = 1) -> tuple:
|
||||
"""获取单页文章列表"""
|
||||
if not self.logged_in:
|
||||
return [], 0, None
|
||||
|
||||
if page > 1:
|
||||
url = f"{self.base_url}/admin/center.aspx?bz={bz}&page={page}"
|
||||
else:
|
||||
url = f"{self.base_url}/admin/center.aspx?bz={bz}"
|
||||
|
||||
resp = self._request_with_retry("get", url)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
articles = []
|
||||
|
||||
ltable = soup.find("table", {"class": "ltable"})
|
||||
if ltable:
|
||||
rows = ltable.find_all("tr")[1:]
|
||||
for row in rows:
|
||||
if "暂无记录" in row.get_text():
|
||||
continue
|
||||
|
||||
link = row.find("a", href=True)
|
||||
if link:
|
||||
href = link.get("href", "")
|
||||
title = link.get_text().strip()
|
||||
match = re.search(r"id=(\d+)", href)
|
||||
article_id = match.group(1) if match else None
|
||||
articles.append({
|
||||
"title": title,
|
||||
"href": href,
|
||||
"article_id": article_id,
|
||||
})
|
||||
|
||||
# 获取总页数
|
||||
total_pages = 1
|
||||
total_records = 0
|
||||
|
||||
page_content = soup.find(id="PageContent")
|
||||
if page_content:
|
||||
text = page_content.get_text()
|
||||
total_match = re.search(r"共(\d+)记录", text)
|
||||
if total_match:
|
||||
total_records = int(total_match.group(1))
|
||||
total_pages = (total_records + 9) // 10
|
||||
|
||||
self.last_total_records = total_records
|
||||
return articles, total_pages, None
|
||||
|
||||
def get_article_attachments(self, article_href: str) -> tuple:
|
||||
"""获取文章的附件列表和文章信息"""
|
||||
if not article_href.startswith("http"):
|
||||
url = f"{self.base_url}/admin/{article_href}"
|
||||
else:
|
||||
url = article_href
|
||||
|
||||
resp = self._request_with_retry("get", url)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
attachments = []
|
||||
article_info = {"channel_id": None, "article_id": None}
|
||||
|
||||
# 从saveread按钮获取channel_id和article_id
|
||||
for elem in soup.find_all(["button", "input"]):
|
||||
onclick = elem.get("onclick", "")
|
||||
match = re.search(r"saveread\((\d+),(\d+)\)", onclick)
|
||||
if match:
|
||||
article_info["channel_id"] = match.group(1)
|
||||
article_info["article_id"] = match.group(2)
|
||||
break
|
||||
|
||||
attach_list = soup.find("div", {"class": "attach-list2"})
|
||||
if attach_list:
|
||||
items = attach_list.find_all("li")
|
||||
for item in items:
|
||||
download_links = item.find_all("a", onclick=re.compile(r"download2?\.ashx"))
|
||||
for link in download_links:
|
||||
onclick = link.get("onclick", "")
|
||||
id_match = re.search(r"id=(\d+)", onclick)
|
||||
channel_match = re.search(r"channel_id=(\d+)", onclick)
|
||||
if id_match:
|
||||
attach_id = id_match.group(1)
|
||||
channel_id = channel_match.group(1) if channel_match else "1"
|
||||
h3 = item.find("h3")
|
||||
filename = h3.get_text().strip() if h3 else f"附件{attach_id}"
|
||||
attachments.append({
|
||||
"id": attach_id,
|
||||
"channel_id": channel_id,
|
||||
"filename": filename
|
||||
})
|
||||
break
|
||||
|
||||
return attachments, article_info
|
||||
|
||||
def mark_article_read(self, channel_id: str, article_id: str) -> bool:
|
||||
"""通过saveread API标记文章已读"""
|
||||
if not channel_id or not article_id:
|
||||
return False
|
||||
|
||||
import random
|
||||
saveread_url = (
|
||||
f"{self.base_url}/tools/submit_ajax.ashx?action=saveread"
|
||||
f"&time={random.random()}&fl={channel_id}&id={article_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = self._request_with_retry("post", saveread_url)
|
||||
if resp.status_code == 200:
|
||||
try:
|
||||
data = resp.json()
|
||||
return data.get("status") == 1
|
||||
except:
|
||||
return True
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def mark_attachment_read(self, attach_id: str, channel_id: str = "1") -> bool:
|
||||
"""通过访问预览通道标记附件已读"""
|
||||
download_url = f"{self.base_url}/tools/download2.ashx?site=main&id={attach_id}&channel_id={channel_id}"
|
||||
|
||||
try:
|
||||
resp = self._request_with_retry("get", download_url, stream=True)
|
||||
resp.close()
|
||||
return resp.status_code == 200
|
||||
except:
|
||||
return False
|
||||
|
||||
def browse_content(
|
||||
self,
|
||||
browse_type: str,
|
||||
should_stop_callback: Optional[Callable] = None,
|
||||
progress_callback: Optional[Callable] = None,
|
||||
) -> APIBrowseResult:
|
||||
"""
|
||||
浏览内容并标记已读
|
||||
|
||||
Args:
|
||||
browse_type: 浏览类型 (应读/注册前未读)
|
||||
should_stop_callback: 检查是否应该停止的回调函数
|
||||
progress_callback: 进度回调,用于实时上报已浏览内容数量
|
||||
回调参数: {"total_items": int, "browsed_items": int}
|
||||
|
||||
Returns:
|
||||
浏览结果
|
||||
"""
|
||||
result = APIBrowseResult(success=False)
|
||||
|
||||
if not self.logged_in:
|
||||
result.error_message = "未登录"
|
||||
return result
|
||||
|
||||
# 根据浏览类型确定bz参数(网站更新后 bz=0 为应读)
|
||||
bz = 0
|
||||
|
||||
self.log(f"[API] 开始浏览 '{browse_type}' (bz={bz})...")
|
||||
|
||||
try:
|
||||
total_items = 0
|
||||
total_attachments = 0
|
||||
|
||||
# 获取第一页
|
||||
articles, total_pages, _ = self.get_article_list_page(bz, 1)
|
||||
|
||||
if not articles:
|
||||
self.log(f"[API] '{browse_type}' 没有待处理内容")
|
||||
result.success = True
|
||||
return result
|
||||
|
||||
total_records = self.last_total_records
|
||||
self.log(f"[API] 共 {total_records} 条记录,开始处理...")
|
||||
|
||||
# 上报初始进度
|
||||
if progress_callback:
|
||||
progress_callback({"total_items": total_records, "browsed_items": 0})
|
||||
|
||||
processed_hrefs = set()
|
||||
current_page = 1
|
||||
max_iterations = total_records + 20
|
||||
|
||||
for iteration in range(max_iterations):
|
||||
if should_stop_callback and should_stop_callback():
|
||||
self.log("[API] 收到停止信号")
|
||||
break
|
||||
|
||||
if not articles:
|
||||
break
|
||||
|
||||
new_articles_in_page = 0
|
||||
|
||||
for article in articles:
|
||||
if should_stop_callback and should_stop_callback():
|
||||
break
|
||||
|
||||
article_href = article["href"]
|
||||
if article_href in processed_hrefs:
|
||||
continue
|
||||
|
||||
processed_hrefs.add(article_href)
|
||||
new_articles_in_page += 1
|
||||
title = article["title"][:30]
|
||||
|
||||
# 获取附件和文章信息
|
||||
try:
|
||||
attachments, article_info = self.get_article_attachments(article_href)
|
||||
except Exception as e:
|
||||
self.log(f"[API] 获取文章失败: {title} | {str(e)}")
|
||||
continue
|
||||
|
||||
total_items += 1
|
||||
|
||||
# 标记文章已读
|
||||
article_marked = False
|
||||
if article_info.get("channel_id") and article_info.get("article_id"):
|
||||
article_marked = self.mark_article_read(
|
||||
article_info["channel_id"],
|
||||
article_info["article_id"]
|
||||
)
|
||||
|
||||
# 处理附件
|
||||
if attachments:
|
||||
for attach in attachments:
|
||||
if self.mark_attachment_read(attach["id"], attach["channel_id"]):
|
||||
total_attachments += 1
|
||||
self.log(f"[API] [{total_items}] {title} - {len(attachments)}个附件")
|
||||
else:
|
||||
status = "已标记" if article_marked else "标记失败"
|
||||
self.log(f"[API] [{total_items}] {title} - 无附件({status})")
|
||||
|
||||
# 上报进度
|
||||
if progress_callback:
|
||||
progress_callback({"total_items": total_records, "browsed_items": total_items})
|
||||
|
||||
# 简单延迟,避免请求太快
|
||||
time.sleep(0.05)
|
||||
|
||||
# 决定下一步
|
||||
if new_articles_in_page > 0:
|
||||
current_page = 1
|
||||
else:
|
||||
current_page += 1
|
||||
if current_page > total_pages:
|
||||
break
|
||||
|
||||
# 获取下一页
|
||||
try:
|
||||
articles, new_total_pages, _ = self.get_article_list_page(bz, current_page)
|
||||
if new_total_pages > 0:
|
||||
total_pages = new_total_pages
|
||||
except Exception as e:
|
||||
self.log(f"[API] 获取第{current_page}页列表失败: {str(e)}")
|
||||
break
|
||||
|
||||
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件")
|
||||
result.success = True
|
||||
result.total_items = total_items
|
||||
result.total_attachments = total_attachments
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
result.error_message = str(e)
|
||||
self.log(f"[API] 浏览出错: {str(e)}")
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
"""关闭会话"""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
try:
|
||||
self.session.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
return False
|
||||
823
core/kdocs_uploader.py
Normal file
823
core/kdocs_uploader.py
Normal file
@@ -0,0 +1,823 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
金山文档上传模块 - 精简版
|
||||
使用Playwright自动化上传截图到金山文档表格
|
||||
移除了队列、并发控制,改为单任务顺序执行
|
||||
"""
|
||||
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from io import BytesIO
|
||||
from typing import Any, Dict, Optional, Callable
|
||||
from urllib.parse import urlparse
|
||||
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
||||
except ImportError:
|
||||
sync_playwright = None
|
||||
|
||||
class PlaywrightTimeoutError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class KDocsUploader:
|
||||
"""金山文档上传器"""
|
||||
|
||||
def __init__(self, log_callback: Optional[Callable] = None):
|
||||
self._playwright = None
|
||||
self._browser = None
|
||||
self._context = None
|
||||
self._page = None
|
||||
self._doc_url: Optional[str] = None
|
||||
self._last_error: Optional[str] = None
|
||||
self._logged_in = False
|
||||
self._log_callback = log_callback
|
||||
|
||||
def log(self, msg: str):
|
||||
"""记录日志"""
|
||||
if self._log_callback:
|
||||
self._log_callback(msg)
|
||||
|
||||
def _ensure_playwright(self, use_storage_state: bool = True) -> bool:
|
||||
"""确保Playwright已启动"""
|
||||
if sync_playwright is None:
|
||||
self._last_error = "playwright 未安装"
|
||||
return False
|
||||
|
||||
try:
|
||||
from config import KDOCS_LOGIN_STATE_FILE
|
||||
|
||||
if self._playwright is None:
|
||||
self._playwright = sync_playwright().start()
|
||||
if self._browser is None:
|
||||
# 调试模式:有头模式,方便查看浏览器行为
|
||||
# 生产环境改回 "true"
|
||||
headless = os.environ.get("KDOCS_HEADLESS", "false").lower() != "false"
|
||||
# 使用系统安装的Chrome浏览器(支持微信快捷登录)
|
||||
# channel='chrome' 会使用系统Chrome,而不是Playwright自带的Chromium
|
||||
chrome_args = [
|
||||
"--disable-blink-features=AutomationControlled", # 隐藏自动化特征
|
||||
"--disable-features=DialMediaRouteProvider", # 禁用本地网络发现提示
|
||||
"--allow-running-insecure-content",
|
||||
]
|
||||
try:
|
||||
self._browser = self._playwright.chromium.launch(
|
||||
headless=headless,
|
||||
channel='chrome', # 使用系统Chrome
|
||||
args=chrome_args
|
||||
)
|
||||
self.log("[KDocs] 使用系统Chrome浏览器")
|
||||
except Exception as e:
|
||||
# 如果系统没有Chrome,回退到Chromium
|
||||
self.log(f"[KDocs] 系统Chrome不可用({e}),使用Chromium")
|
||||
self._browser = self._playwright.chromium.launch(headless=headless, args=chrome_args)
|
||||
if self._context is None:
|
||||
storage_state = str(KDOCS_LOGIN_STATE_FILE)
|
||||
# 创建context时的通用配置
|
||||
context_options = {
|
||||
"permissions": ["clipboard-read", "clipboard-write"], # 剪贴板权限
|
||||
"ignore_https_errors": True,
|
||||
}
|
||||
if use_storage_state and os.path.exists(storage_state):
|
||||
context_options["storage_state"] = storage_state
|
||||
self._context = self._browser.new_context(**context_options)
|
||||
|
||||
# 授予本地网络访问权限(用于微信快捷登录检测)
|
||||
try:
|
||||
self._context.grant_permissions(
|
||||
["clipboard-read", "clipboard-write"],
|
||||
origin="https://account.wps.cn"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if self._page is None or self._page.is_closed():
|
||||
self._page = self._context.new_page()
|
||||
self._page.set_default_timeout(60000)
|
||||
return True
|
||||
except Exception as e:
|
||||
self._last_error = f"浏览器启动失败: {e}"
|
||||
self._cleanup_browser()
|
||||
return False
|
||||
|
||||
def _cleanup_browser(self):
|
||||
"""清理浏览器资源"""
|
||||
for attr in ['_page', '_context', '_browser', '_playwright']:
|
||||
obj = getattr(self, attr, None)
|
||||
if obj:
|
||||
try:
|
||||
if hasattr(obj, 'close'):
|
||||
obj.close()
|
||||
elif hasattr(obj, 'stop'):
|
||||
obj.stop()
|
||||
except Exception:
|
||||
pass
|
||||
setattr(self, attr, None)
|
||||
|
||||
def _open_document(self, doc_url: str) -> bool:
|
||||
"""打开金山文档"""
|
||||
try:
|
||||
self._doc_url = doc_url
|
||||
self._ensure_clipboard_permissions(doc_url)
|
||||
self._page.goto(doc_url, wait_until="domcontentloaded", timeout=30000)
|
||||
time.sleep(3) # 等待页面完全加载,包括登录按钮
|
||||
return True
|
||||
except Exception as e:
|
||||
self._last_error = f"打开文档失败: {e}"
|
||||
return False
|
||||
|
||||
def _ensure_clipboard_permissions(self, doc_url: str):
|
||||
"""授予剪贴板权限"""
|
||||
if not self._context or not doc_url:
|
||||
return
|
||||
try:
|
||||
parsed = urlparse(doc_url)
|
||||
if not parsed.scheme or not parsed.netloc:
|
||||
return
|
||||
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||
self._context.grant_permissions(["clipboard-read", "clipboard-write"], origin=origin)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _is_login_url(self, url: str) -> bool:
|
||||
"""检查是否是登录页面"""
|
||||
if not url:
|
||||
return False
|
||||
lower = url.lower()
|
||||
if "account.wps.cn" in lower or "passport" in lower:
|
||||
return True
|
||||
if "login" in lower and "kdocs.cn" not in lower:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _page_has_login_gate(self, page) -> bool:
|
||||
"""检查页面是否需要登录"""
|
||||
url = getattr(page, "url", "") or ""
|
||||
|
||||
# 如果URL已经是文档页面,说明已登录成功
|
||||
if "kdocs.cn/l/" in url or "www.kdocs.cn/l/" in url:
|
||||
# 但可能有邀请对话框,先尝试点击关闭
|
||||
try:
|
||||
join_btn = page.get_by_role("button", name="登录并加入编辑")
|
||||
if join_btn.count() > 0 and join_btn.first.is_visible(timeout=500):
|
||||
self.log("[KDocs] 点击加入编辑按钮")
|
||||
join_btn.first.click()
|
||||
time.sleep(1)
|
||||
except Exception:
|
||||
pass
|
||||
# 已经在文档页面,算作已登录
|
||||
return False
|
||||
|
||||
# 检查是否在登录页面
|
||||
if self._is_login_url(url):
|
||||
self.log(f"[KDocs] 检测到登录页面URL: {url}")
|
||||
return True
|
||||
|
||||
# 只检查登录页面上的登录按钮(排除文档页面的邀请对话框)
|
||||
login_buttons = ["立即登录", "去登录"]
|
||||
for text in login_buttons:
|
||||
try:
|
||||
btn = page.get_by_role("button", name=text)
|
||||
if btn.count() > 0 and btn.first.is_visible(timeout=500):
|
||||
self.log(f"[KDocs] 检测到登录按钮: {text}")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 检查是否有二维码元素可见(说明还在等待扫码)
|
||||
try:
|
||||
qr_selectors = ["canvas", "img[class*='qr']", "div[class*='qrcode']"]
|
||||
for selector in qr_selectors:
|
||||
qr = page.locator(selector)
|
||||
if qr.count() > 0:
|
||||
for i in range(min(qr.count(), 3)):
|
||||
el = qr.nth(i)
|
||||
try:
|
||||
if el.is_visible(timeout=200):
|
||||
box = el.bounding_box()
|
||||
if box and 80 <= box.get("width", 0) <= 400:
|
||||
self.log(f"[KDocs] 检测到二维码元素: {selector}")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
def _is_logged_in(self) -> bool:
|
||||
"""检查是否已登录"""
|
||||
if not self._page or self._page.is_closed():
|
||||
return False
|
||||
return not self._page_has_login_gate(self._page)
|
||||
|
||||
def _save_login_state(self):
|
||||
"""保存登录状态"""
|
||||
try:
|
||||
from config import KDOCS_LOGIN_STATE_FILE
|
||||
storage_state = str(KDOCS_LOGIN_STATE_FILE)
|
||||
KDOCS_LOGIN_STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._context.storage_state(path=storage_state)
|
||||
self.log("[KDocs] 登录状态已保存")
|
||||
except Exception as e:
|
||||
self.log(f"[KDocs] 保存登录状态失败: {e}")
|
||||
|
||||
def _ensure_login_dialog(self, use_quick_login: bool = False):
|
||||
"""确保打开登录对话框
|
||||
|
||||
Args:
|
||||
use_quick_login: 是否尝试使用微信快捷登录
|
||||
"""
|
||||
agree_names = ["同意", "同意并继续", "我同意", "确定", "确认"]
|
||||
|
||||
# 循环处理登录流程
|
||||
max_clicks = 8
|
||||
for round_num in range(max_clicks):
|
||||
clicked = False
|
||||
current_url = self._page.url
|
||||
|
||||
# 检查是否已经到达文档页面(登录成功)
|
||||
# 需要确保不是临时跳转,等待页面稳定
|
||||
if "kdocs.cn/l/" in current_url or "www.kdocs.cn/l/" in current_url:
|
||||
time.sleep(1) # 等待页面稳定
|
||||
stable_url = self._page.url
|
||||
if "kdocs.cn/l/" in stable_url and "account.wps.cn" not in stable_url:
|
||||
self.log("[KDocs] 已到达文档页面,登录成功")
|
||||
return
|
||||
|
||||
# 1. 先检查是否有隐私协议同意按钮
|
||||
for name in agree_names:
|
||||
try:
|
||||
btn = self._page.get_by_role("button", name=name)
|
||||
if btn.count() > 0 and btn.first.is_visible(timeout=300):
|
||||
self.log(f"[KDocs] 点击同意按钮: {name}")
|
||||
btn.first.click()
|
||||
time.sleep(1)
|
||||
clicked = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if clicked:
|
||||
continue
|
||||
|
||||
# 2. 如果启用快捷登录且在登录页面(account.wps.cn),尝试点击"微信快捷登录"
|
||||
if use_quick_login and "account.wps.cn" in current_url:
|
||||
try:
|
||||
quick_login = self._page.get_by_text("微信快捷登录", exact=False)
|
||||
if quick_login.count() > 0 and quick_login.first.is_visible(timeout=500):
|
||||
self.log("[KDocs] 点击微信快捷登录")
|
||||
quick_login.first.click()
|
||||
time.sleep(3) # 等待快捷登录处理
|
||||
# 检查是否登录成功
|
||||
if "kdocs.cn/l/" in self._page.url:
|
||||
self.log("[KDocs] 微信快捷登录成功")
|
||||
return
|
||||
clicked = True
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3. 点击"立即登录"进入登录页面
|
||||
try:
|
||||
btn = self._page.get_by_role("button", name="立即登录")
|
||||
if btn.count() > 0 and btn.first.is_visible(timeout=500):
|
||||
self.log("[KDocs] 点击立即登录")
|
||||
btn.first.click()
|
||||
time.sleep(2)
|
||||
clicked = True
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 4. 点击"登录并加入编辑"(文档页面的邀请对话框)
|
||||
try:
|
||||
btn = self._page.get_by_role("button", name="登录并加入编辑")
|
||||
if btn.count() > 0 and btn.first.is_visible(timeout=500):
|
||||
self.log("[KDocs] 点击登录并加入编辑")
|
||||
btn.first.click()
|
||||
time.sleep(1.5)
|
||||
clicked = True
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 如果没有点击到任何按钮,退出循环
|
||||
if not clicked:
|
||||
self.log("[KDocs] 未找到更多可点击的按钮")
|
||||
break
|
||||
|
||||
# 最后确保点击微信扫码登录(切换到扫码模式)
|
||||
wechat_names = ["微信登录", "微信扫码登录", "扫码登录", "微信扫码"]
|
||||
for name in wechat_names:
|
||||
try:
|
||||
btn = self._page.get_by_role("button", name=name)
|
||||
if btn.is_visible(timeout=1000):
|
||||
self.log(f"[KDocs] 点击微信登录: {name}")
|
||||
btn.click()
|
||||
time.sleep(1)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 尝试用文本查找微信登录
|
||||
for name in wechat_names:
|
||||
try:
|
||||
el = self._page.get_by_text(name, exact=False).first
|
||||
if el.is_visible(timeout=500):
|
||||
self.log(f"[KDocs] 点击微信登录文本: {name}")
|
||||
el.click()
|
||||
time.sleep(1)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.log("[KDocs] 未找到登录按钮,可能页面已在登录状态或需要手动操作")
|
||||
|
||||
def _capture_qr_image(self) -> Optional[bytes]:
|
||||
"""捕获登录二维码图片"""
|
||||
# 查找二维码元素的选择器
|
||||
selectors = [
|
||||
"canvas",
|
||||
"img[src*='qr']",
|
||||
"img[class*='qr']",
|
||||
"img[class*='code']",
|
||||
"div[class*='qr'] img",
|
||||
"div[class*='qrcode'] img",
|
||||
"div[class*='scan'] img",
|
||||
".qrcode img",
|
||||
".qr-code img",
|
||||
"img", # 最后尝试所有图片
|
||||
]
|
||||
|
||||
# 先在主页面查找
|
||||
for selector in selectors:
|
||||
result = self._try_capture_qr_with_selector(self._page, selector)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# 尝试在iframe中查找
|
||||
try:
|
||||
frames = self._page.frames
|
||||
for frame in frames:
|
||||
if frame == self._page.main_frame:
|
||||
continue
|
||||
for selector in selectors[:5]: # 只用前几个选择器
|
||||
result = self._try_capture_qr_with_selector(frame, selector)
|
||||
if result:
|
||||
return result
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def _try_capture_qr_with_selector(self, page_or_frame, selector: str) -> Optional[bytes]:
|
||||
"""尝试用指定选择器捕获二维码"""
|
||||
try:
|
||||
locator = page_or_frame.locator(selector)
|
||||
count = locator.count()
|
||||
for i in range(min(count, 10)):
|
||||
el = locator.nth(i)
|
||||
try:
|
||||
if not el.is_visible(timeout=300):
|
||||
continue
|
||||
box = el.bounding_box()
|
||||
if not box:
|
||||
continue
|
||||
w, h = box.get("width", 0), box.get("height", 0)
|
||||
# 二维码通常是正方形,大小在100-400之间
|
||||
if 80 <= w <= 400 and 80 <= h <= 400 and abs(w - h) < 60:
|
||||
screenshot = el.screenshot()
|
||||
if screenshot and len(screenshot) > 500:
|
||||
return screenshot
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def request_qr(self, force: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
请求登录二维码
|
||||
|
||||
Args:
|
||||
force: 是否强制重新登录
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"logged_in": bool, # 是否已登录
|
||||
"qr_image": str, # base64编码的二维码图片
|
||||
"error": str # 错误信息
|
||||
}
|
||||
"""
|
||||
from config import get_config, KDOCS_LOGIN_STATE_FILE
|
||||
|
||||
config = get_config()
|
||||
doc_url = config.kdocs.doc_url.strip()
|
||||
|
||||
if not doc_url:
|
||||
return {"success": False, "error": "未配置金山文档链接"}
|
||||
|
||||
if force:
|
||||
# 清除登录状态
|
||||
try:
|
||||
if KDOCS_LOGIN_STATE_FILE.exists():
|
||||
KDOCS_LOGIN_STATE_FILE.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
self._cleanup_browser()
|
||||
|
||||
if not self._ensure_playwright(use_storage_state=not force):
|
||||
return {"success": False, "error": self._last_error or "浏览器不可用"}
|
||||
|
||||
if not self._open_document(doc_url):
|
||||
return {"success": False, "error": self._last_error or "打开文档失败"}
|
||||
|
||||
# 检查是否已登录
|
||||
self.log(f"[KDocs] 当前页面URL: {self._page.url}")
|
||||
if not force and self._is_logged_in():
|
||||
self._logged_in = True
|
||||
self._save_login_state()
|
||||
return {"success": True, "logged_in": True, "qr_image": ""}
|
||||
|
||||
# 需要登录,获取二维码
|
||||
self.log("[KDocs] 需要登录,尝试打开登录对话框...")
|
||||
self._ensure_login_dialog()
|
||||
time.sleep(2) # 等待登录对话框加载
|
||||
|
||||
self.log("[KDocs] 尝试捕获二维码...")
|
||||
qr_image = None
|
||||
for i in range(15): # 增加尝试次数
|
||||
qr_image = self._capture_qr_image()
|
||||
if qr_image and len(qr_image) > 1024:
|
||||
self.log(f"[KDocs] 二维码捕获成功,大小: {len(qr_image)} bytes")
|
||||
break
|
||||
self.log(f"[KDocs] 第{i+1}次尝试捕获二维码...")
|
||||
time.sleep(1)
|
||||
|
||||
if not qr_image:
|
||||
# 尝试截取整个页面帮助调试
|
||||
self.log("[KDocs] 二维码捕获失败,当前页面可能没有显示二维码")
|
||||
return {"success": False, "error": "二维码获取失败,请检查网络或手动打开金山文档链接确认"}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"logged_in": False,
|
||||
"qr_image": base64.b64encode(qr_image).decode("ascii"),
|
||||
}
|
||||
|
||||
def check_login_status(self) -> Dict[str, Any]:
|
||||
"""检查登录状态(不重新打开页面,只检查当前状态)"""
|
||||
# 如果页面不存在或已关闭,说明还没开始登录流程
|
||||
if not self._page or self._page.is_closed():
|
||||
return {"success": False, "logged_in": False, "error": "页面未打开"}
|
||||
|
||||
try:
|
||||
clicked_confirm = False
|
||||
|
||||
# 在主页面和所有iframe中查找确认按钮
|
||||
frames_to_check = [self._page] + list(self._page.frames)
|
||||
|
||||
for frame in frames_to_check:
|
||||
if clicked_confirm:
|
||||
break
|
||||
|
||||
# 尝试点击确认登录按钮(微信扫码后PC端需要再点一下确认)
|
||||
confirm_names = ["确认登录", "确定登录", "登录", "确定", "确认", "同意并登录"]
|
||||
for name in confirm_names:
|
||||
try:
|
||||
confirm_btn = frame.get_by_role("button", name=name)
|
||||
if confirm_btn.count() > 0 and confirm_btn.first.is_visible(timeout=200):
|
||||
self.log(f"[KDocs] 找到确认按钮: {name}")
|
||||
confirm_btn.first.click()
|
||||
clicked_confirm = True
|
||||
time.sleep(3)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 如果按钮角色没找到,尝试用文本查找
|
||||
if not clicked_confirm:
|
||||
for name in confirm_names:
|
||||
try:
|
||||
el = frame.get_by_text(name, exact=True)
|
||||
if el.count() > 0 and el.first.is_visible(timeout=200):
|
||||
self.log(f"[KDocs] 找到确认文本: {name}")
|
||||
el.first.click()
|
||||
clicked_confirm = True
|
||||
time.sleep(3)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 尝试用CSS选择器查找
|
||||
if not clicked_confirm:
|
||||
try:
|
||||
# WPS登录页面的确认按钮可能的选择器
|
||||
selectors = [
|
||||
"button.ant-btn-primary",
|
||||
"button[type='primary']",
|
||||
".confirm-btn",
|
||||
".login-confirm",
|
||||
".btn-primary",
|
||||
".wps-btn-primary",
|
||||
"a.confirm",
|
||||
"div.confirm",
|
||||
"[class*='confirm']",
|
||||
"[class*='login-btn']"
|
||||
]
|
||||
for selector in selectors:
|
||||
btns = frame.locator(selector)
|
||||
if btns.count() > 0:
|
||||
for i in range(min(btns.count(), 3)):
|
||||
btn = btns.nth(i)
|
||||
try:
|
||||
if btn.is_visible(timeout=100):
|
||||
btn_text = btn.inner_text() or ""
|
||||
if any(kw in btn_text for kw in ["确认", "登录", "确定"]):
|
||||
self.log(f"[KDocs] 找到按钮(CSS): {btn_text}")
|
||||
btn.click()
|
||||
clicked_confirm = True
|
||||
time.sleep(3)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if clicked_confirm:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 如果点击了确认按钮,等待页面自动跳转(不要reload!)
|
||||
if clicked_confirm:
|
||||
self.log("[KDocs] 已点击确认,等待页面跳转...")
|
||||
time.sleep(3) # 等待页面自动跳转
|
||||
|
||||
# 检查当前URL是否已经到达文档页面
|
||||
current_url = self._page.url
|
||||
self.log(f"[KDocs] 当前URL: {current_url}")
|
||||
|
||||
# 直接检查URL判断是否已登录
|
||||
if "kdocs.cn/l/" in current_url and "account.wps.cn" not in current_url:
|
||||
# 已到达文档页面,登录成功
|
||||
logged_in = True
|
||||
# 尝试点击可能存在的"加入编辑"按钮
|
||||
try:
|
||||
join_btn = self._page.get_by_role("button", name="登录并加入编辑")
|
||||
if join_btn.count() > 0 and join_btn.first.is_visible(timeout=500):
|
||||
self.log("[KDocs] 点击加入编辑")
|
||||
join_btn.first.click()
|
||||
time.sleep(1)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
# 还在登录页面或其他页面
|
||||
logged_in = self._is_logged_in()
|
||||
|
||||
self._logged_in = logged_in
|
||||
|
||||
if logged_in:
|
||||
self._save_login_state()
|
||||
self.log("[KDocs] 登录状态检测:已登录")
|
||||
|
||||
return {"success": True, "logged_in": logged_in}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "logged_in": False, "error": str(e)}
|
||||
|
||||
def _navigate_to_cell(self, cell_address: str):
|
||||
"""导航到指定单元格"""
|
||||
try:
|
||||
name_box = self._page.locator("input.edit-box").first
|
||||
name_box.click()
|
||||
name_box.fill(cell_address)
|
||||
name_box.press("Enter")
|
||||
except Exception:
|
||||
name_box = self._page.locator('#root input[type="text"]').first
|
||||
name_box.click()
|
||||
name_box.fill(cell_address)
|
||||
name_box.press("Enter")
|
||||
time.sleep(0.3)
|
||||
|
||||
def _get_current_cell_address(self) -> str:
|
||||
"""获取当前单元格地址"""
|
||||
try:
|
||||
name_box = self._page.locator("input.edit-box").first
|
||||
value = name_box.input_value()
|
||||
if value and re.match(r"^[A-Z]+\d+$", value.upper()):
|
||||
return value.upper()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
def _search_and_get_row(self, search_text: str, expected_col: str = None,
|
||||
row_start: int = 0, row_end: int = 0) -> int:
|
||||
"""搜索并获取行号"""
|
||||
# 打开搜索
|
||||
self._page.keyboard.press("Control+f")
|
||||
time.sleep(0.3)
|
||||
|
||||
# 输入搜索内容
|
||||
try:
|
||||
search_input = self._page.get_by_role("textbox").nth(3)
|
||||
if search_input.is_visible(timeout=500):
|
||||
search_input.fill(search_text)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
time.sleep(0.2)
|
||||
|
||||
# 点击查找
|
||||
try:
|
||||
find_btn = self._page.get_by_role("button", name="查找").first
|
||||
find_btn.click()
|
||||
except Exception:
|
||||
self._page.keyboard.press("Enter")
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
# 获取当前位置
|
||||
self._page.keyboard.press("Escape")
|
||||
time.sleep(0.3)
|
||||
|
||||
address = self._get_current_cell_address()
|
||||
if not address:
|
||||
return -1
|
||||
|
||||
# 提取行号
|
||||
match = re.search(r"(\d+)$", address)
|
||||
if not match:
|
||||
return -1
|
||||
|
||||
row_num = int(match.group(1))
|
||||
col_letter = "".join(c for c in address if c.isalpha()).upper()
|
||||
|
||||
# 检查列
|
||||
if expected_col and col_letter != expected_col.upper():
|
||||
return -1
|
||||
|
||||
# 检查行范围
|
||||
if row_start > 0 and row_num < row_start:
|
||||
return -1
|
||||
if row_end > 0 and row_num > row_end:
|
||||
return -1
|
||||
|
||||
return row_num
|
||||
|
||||
def _upload_image_to_cell(self, row_num: int, image_path: str, image_col: str) -> bool:
|
||||
"""上传图片到单元格"""
|
||||
cell_address = f"{image_col}{row_num}"
|
||||
self._navigate_to_cell(cell_address)
|
||||
time.sleep(0.3)
|
||||
|
||||
# 清除单元格内容
|
||||
try:
|
||||
self._page.keyboard.press("Escape")
|
||||
time.sleep(0.2)
|
||||
self._page.keyboard.press("Delete")
|
||||
time.sleep(0.3)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 插入 -> 图片 -> 单元格图片
|
||||
try:
|
||||
insert_btn = self._page.get_by_role("button", name="插入")
|
||||
insert_btn.click()
|
||||
time.sleep(0.3)
|
||||
|
||||
image_btn = self._page.get_by_role("button", name="图片")
|
||||
image_btn.click()
|
||||
time.sleep(0.3)
|
||||
|
||||
cell_image_option = self._page.get_by_role("option", name="单元格图片")
|
||||
cell_image_option.click()
|
||||
time.sleep(0.2)
|
||||
|
||||
local_option = self._page.get_by_role("option", name="本地")
|
||||
with self._page.expect_file_chooser() as fc_info:
|
||||
local_option.click()
|
||||
file_chooser = fc_info.value
|
||||
file_chooser.set_files(image_path)
|
||||
|
||||
time.sleep(2)
|
||||
self.log(f"[KDocs] 图片已上传到 {cell_address}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self._last_error = f"上传图片失败: {e}"
|
||||
return False
|
||||
|
||||
def upload_image(
|
||||
self,
|
||||
image_path: str,
|
||||
unit: str,
|
||||
name: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
上传截图到金山文档
|
||||
|
||||
Args:
|
||||
image_path: 图片路径
|
||||
unit: 县区名(用于定位行)
|
||||
name: 姓名(用于定位行)
|
||||
|
||||
Returns:
|
||||
{"success": bool, "error": str}
|
||||
"""
|
||||
from config import get_config
|
||||
|
||||
config = get_config()
|
||||
kdocs_config = config.kdocs
|
||||
|
||||
if not kdocs_config.enabled:
|
||||
return {"success": False, "error": "金山文档上传未启用"}
|
||||
|
||||
doc_url = kdocs_config.doc_url.strip()
|
||||
if not doc_url:
|
||||
return {"success": False, "error": "未配置金山文档链接"}
|
||||
|
||||
if not unit or not name:
|
||||
return {"success": False, "error": "缺少县区或姓名"}
|
||||
|
||||
if not image_path or not os.path.exists(image_path):
|
||||
return {"success": False, "error": "图片文件不存在"}
|
||||
|
||||
if not self._ensure_playwright():
|
||||
return {"success": False, "error": self._last_error or "浏览器不可用"}
|
||||
|
||||
if not self._open_document(doc_url):
|
||||
return {"success": False, "error": self._last_error or "打开文档失败"}
|
||||
|
||||
if not self._is_logged_in():
|
||||
return {"success": False, "error": "未登录,请先扫码登录"}
|
||||
|
||||
try:
|
||||
# 选择工作表
|
||||
if kdocs_config.sheet_name:
|
||||
try:
|
||||
tab = self._page.locator("[role='tab']").filter(has_text=kdocs_config.sheet_name)
|
||||
if tab.count() > 0:
|
||||
tab.first.click()
|
||||
time.sleep(0.5)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 搜索姓名找到行
|
||||
self.log(f"[KDocs] 搜索人员: {name}")
|
||||
row_num = self._search_and_get_row(
|
||||
name,
|
||||
expected_col=kdocs_config.name_column,
|
||||
row_start=kdocs_config.row_start,
|
||||
row_end=kdocs_config.row_end,
|
||||
)
|
||||
|
||||
if row_num < 0:
|
||||
return {"success": False, "error": f"未找到人员: {name}"}
|
||||
|
||||
self.log(f"[KDocs] 找到人员在第 {row_num} 行")
|
||||
|
||||
# 上传图片
|
||||
if self._upload_image_to_cell(row_num, image_path, kdocs_config.image_column):
|
||||
return {"success": True}
|
||||
else:
|
||||
return {"success": False, "error": self._last_error or "上传失败"}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
def clear_login(self):
|
||||
"""清除登录状态"""
|
||||
from config import KDOCS_LOGIN_STATE_FILE
|
||||
|
||||
try:
|
||||
if KDOCS_LOGIN_STATE_FILE.exists():
|
||||
KDOCS_LOGIN_STATE_FILE.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._logged_in = False
|
||||
self._cleanup_browser()
|
||||
|
||||
def close(self):
|
||||
"""关闭上传器"""
|
||||
self._cleanup_browser()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
return False
|
||||
|
||||
|
||||
# 全局实例
|
||||
_uploader: Optional[KDocsUploader] = None
|
||||
|
||||
|
||||
def get_kdocs_uploader() -> KDocsUploader:
|
||||
"""获取金山文档上传器实例"""
|
||||
global _uploader
|
||||
if _uploader is None:
|
||||
_uploader = KDocsUploader()
|
||||
return _uploader
|
||||
324
core/screenshot.py
Normal file
324
core/screenshot.py
Normal file
@@ -0,0 +1,324 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
截图模块 - 精简版
|
||||
使用wkhtmltoimage进行网页截图
|
||||
移除了线程池、复杂重试逻辑,保持简单
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from typing import Optional, Callable, List, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .api_browser import APIBrowser, get_cookie_jar_path, is_cookie_jar_fresh
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScreenshotResult:
|
||||
"""截图结果"""
|
||||
success: bool
|
||||
filename: str = ""
|
||||
filepath: str = ""
|
||||
error_message: str = ""
|
||||
|
||||
|
||||
def _resolve_wkhtmltoimage_path() -> Optional[str]:
|
||||
"""查找wkhtmltoimage路径"""
|
||||
from config import get_config
|
||||
config = get_config()
|
||||
|
||||
# 优先使用配置的路径
|
||||
custom_path = config.screenshot.wkhtmltoimage_path
|
||||
if custom_path and os.path.exists(custom_path):
|
||||
return custom_path
|
||||
|
||||
# 先尝试PATH
|
||||
found = shutil.which("wkhtmltoimage")
|
||||
if found:
|
||||
return found
|
||||
|
||||
# Windows默认安装路径
|
||||
win_paths = [
|
||||
r"C:\Program Files\wkhtmltopdf\bin\wkhtmltoimage.exe",
|
||||
r"C:\Program Files (x86)\wkhtmltopdf\bin\wkhtmltoimage.exe",
|
||||
os.path.expandvars(r"%ProgramFiles%\wkhtmltopdf\bin\wkhtmltoimage.exe"),
|
||||
os.path.expandvars(r"%ProgramFiles(x86)%\wkhtmltopdf\bin\wkhtmltoimage.exe"),
|
||||
]
|
||||
for p in win_paths:
|
||||
if os.path.exists(p):
|
||||
return p
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _read_cookie_pairs(cookies_path: str) -> List[Tuple[str, str]]:
|
||||
"""读取cookie文件"""
|
||||
if not cookies_path or not os.path.exists(cookies_path):
|
||||
return []
|
||||
|
||||
pairs = []
|
||||
try:
|
||||
with open(cookies_path, "r", encoding="utf-8", errors="ignore") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
if len(parts) < 7:
|
||||
continue
|
||||
name = parts[5].strip()
|
||||
value = parts[6].strip()
|
||||
if name:
|
||||
pairs.append((name, value))
|
||||
except Exception:
|
||||
return []
|
||||
return pairs
|
||||
|
||||
|
||||
def _select_cookie_pairs(pairs: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
"""选择关键cookie"""
|
||||
preferred_names = {"ASP.NET_SessionId", ".ASPXAUTH"}
|
||||
preferred = [(name, value) for name, value in pairs if name in preferred_names and value]
|
||||
if preferred:
|
||||
return preferred
|
||||
return [(name, value) for name, value in pairs if name and value and name.isascii() and value.isascii()]
|
||||
|
||||
|
||||
def take_screenshot_wkhtmltoimage(
|
||||
url: str,
|
||||
output_path: str,
|
||||
cookies_path: Optional[str] = None,
|
||||
proxy_server: Optional[str] = None,
|
||||
run_script: Optional[str] = None,
|
||||
window_status: Optional[str] = None,
|
||||
log_callback: Optional[Callable] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
使用wkhtmltoimage截图
|
||||
|
||||
Args:
|
||||
url: 要截图的URL
|
||||
output_path: 输出文件路径
|
||||
cookies_path: cookie文件路径
|
||||
proxy_server: 代理服务器
|
||||
run_script: 运行的JavaScript脚本
|
||||
window_status: 等待的window.status值
|
||||
log_callback: 日志回调
|
||||
|
||||
Returns:
|
||||
是否成功
|
||||
"""
|
||||
from config import get_config
|
||||
config = get_config()
|
||||
screenshot_config = config.screenshot
|
||||
|
||||
wkhtmltoimage_path = _resolve_wkhtmltoimage_path()
|
||||
if not wkhtmltoimage_path:
|
||||
if log_callback:
|
||||
log_callback("wkhtmltoimage 未安装或不在 PATH 中")
|
||||
return False
|
||||
|
||||
ext = os.path.splitext(output_path)[1].lower()
|
||||
image_format = "jpg" if ext in (".jpg", ".jpeg") else "png"
|
||||
|
||||
cmd = [
|
||||
wkhtmltoimage_path,
|
||||
"--format", image_format,
|
||||
"--width", str(screenshot_config.width),
|
||||
"--disable-smart-width",
|
||||
"--javascript-delay", str(screenshot_config.js_delay_ms),
|
||||
"--load-error-handling", "ignore",
|
||||
"--enable-local-file-access",
|
||||
"--encoding", "utf-8",
|
||||
]
|
||||
|
||||
# User-Agent
|
||||
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
cmd.extend(["--custom-header", "User-Agent", ua, "--custom-header-propagation"])
|
||||
|
||||
# 图片质量
|
||||
if image_format in ("jpg", "jpeg"):
|
||||
cmd.extend(["--quality", str(screenshot_config.quality)])
|
||||
|
||||
# 高度
|
||||
if screenshot_config.height > 0:
|
||||
cmd.extend(["--height", str(screenshot_config.height)])
|
||||
|
||||
# 自定义脚本
|
||||
if run_script:
|
||||
cmd.extend(["--run-script", run_script])
|
||||
if window_status:
|
||||
cmd.extend(["--window-status", window_status])
|
||||
|
||||
# Cookies
|
||||
if cookies_path:
|
||||
cookie_pairs = _select_cookie_pairs(_read_cookie_pairs(cookies_path))
|
||||
if cookie_pairs:
|
||||
for name, value in cookie_pairs:
|
||||
cmd.extend(["--cookie", name, value])
|
||||
else:
|
||||
cmd.extend(["--cookie-jar", cookies_path])
|
||||
|
||||
# 代理
|
||||
if proxy_server:
|
||||
cmd.extend(["--proxy", proxy_server])
|
||||
|
||||
cmd.extend([url, output_path])
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=screenshot_config.timeout_seconds
|
||||
)
|
||||
if result.returncode != 0:
|
||||
if log_callback:
|
||||
err_msg = (result.stderr or result.stdout or "").strip()
|
||||
log_callback(f"wkhtmltoimage 截图失败: {err_msg[:200]}")
|
||||
return False
|
||||
return True
|
||||
except subprocess.TimeoutExpired:
|
||||
if log_callback:
|
||||
log_callback("wkhtmltoimage 截图超时")
|
||||
return False
|
||||
except Exception as e:
|
||||
if log_callback:
|
||||
log_callback(f"wkhtmltoimage 截图异常: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def take_screenshot(
|
||||
username: str,
|
||||
password: str,
|
||||
browse_type: str = "应读",
|
||||
remark: str = "",
|
||||
log_callback: Optional[Callable] = None,
|
||||
proxy_config: Optional[dict] = None,
|
||||
) -> ScreenshotResult:
|
||||
"""
|
||||
为账号执行完整的截图流程
|
||||
|
||||
Args:
|
||||
username: 用户名
|
||||
password: 密码
|
||||
browse_type: 浏览类型
|
||||
remark: 账号备注(用于文件名)
|
||||
log_callback: 日志回调
|
||||
proxy_config: 代理配置
|
||||
|
||||
Returns:
|
||||
截图结果
|
||||
"""
|
||||
from config import get_config, SCREENSHOTS_DIR
|
||||
config = get_config()
|
||||
|
||||
result = ScreenshotResult(success=False)
|
||||
|
||||
def log(msg: str):
|
||||
if log_callback:
|
||||
log_callback(msg)
|
||||
|
||||
# 确保截图目录存在
|
||||
SCREENSHOTS_DIR.mkdir(exist_ok=True)
|
||||
|
||||
# 获取或刷新cookies
|
||||
cookie_path = get_cookie_jar_path(username)
|
||||
proxy_server = proxy_config.get("server") if proxy_config else None
|
||||
|
||||
if not is_cookie_jar_fresh(cookie_path):
|
||||
log("正在登录获取Cookie...")
|
||||
with APIBrowser(log_callback=log, proxy_config=proxy_config) as browser:
|
||||
if not browser.login(username, password):
|
||||
result.error_message = "登录失败"
|
||||
return result
|
||||
if not browser.save_cookies_for_screenshot(username):
|
||||
result.error_message = "保存Cookie失败"
|
||||
return result
|
||||
|
||||
log(f"导航到 '{browse_type}' 页面...")
|
||||
|
||||
# 构建截图URL
|
||||
from urllib.parse import urlsplit
|
||||
parsed = urlsplit(config.zsgl.login_url)
|
||||
base = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
bz = 0 # 应读
|
||||
target_url = f"{base}/admin/center.aspx?bz={bz}"
|
||||
index_url = f"{base}/admin/index.aspx"
|
||||
|
||||
# 生成文件名
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
account_name = remark if remark else username
|
||||
screenshot_filename = f"{account_name}_{browse_type}_{timestamp}.jpg"
|
||||
screenshot_path = str(SCREENSHOTS_DIR / screenshot_filename)
|
||||
|
||||
# 构建JavaScript注入脚本(用于正确显示页面)
|
||||
run_script = (
|
||||
"(function(){"
|
||||
"function done(){window.status='ready';}"
|
||||
"function ensureNav(){try{if(typeof loadMenuTree==='function'){loadMenuTree(true);}}catch(e){}}"
|
||||
"function expandMenu(){"
|
||||
"try{var body=document.body;if(body&&body.classList.contains('lay-mini')){body.classList.remove('lay-mini');}}catch(e){}"
|
||||
"try{if(typeof mainPageResize==='function'){mainPageResize();}}catch(e){}"
|
||||
"}"
|
||||
"function navReady(){"
|
||||
"try{var nav=document.getElementById('sidebar-nav');return nav && nav.querySelectorAll('a').length>0;}catch(e){return false;}"
|
||||
"}"
|
||||
"function frameReady(){"
|
||||
"try{var f=document.getElementById('mainframe');return f && f.contentDocument && f.contentDocument.readyState==='complete';}catch(e){return false;}"
|
||||
"}"
|
||||
"function check(){"
|
||||
"if(navReady() && frameReady()){done();return;}"
|
||||
"setTimeout(check,300);"
|
||||
"}"
|
||||
"var f=document.getElementById('mainframe');"
|
||||
"ensureNav();"
|
||||
"expandMenu();"
|
||||
"if(!f){done();return;}"
|
||||
f"f.src='{target_url}';"
|
||||
"f.onload=function(){ensureNav();expandMenu();setTimeout(check,300);};"
|
||||
"setTimeout(check,5000);"
|
||||
"})();"
|
||||
)
|
||||
|
||||
# 尝试截图(先尝试完整页面,失败则直接截目标页)
|
||||
log("正在截图...")
|
||||
|
||||
cookies_for_shot = cookie_path if is_cookie_jar_fresh(cookie_path) else None
|
||||
|
||||
success = take_screenshot_wkhtmltoimage(
|
||||
index_url,
|
||||
screenshot_path,
|
||||
cookies_path=cookies_for_shot,
|
||||
proxy_server=proxy_server,
|
||||
run_script=run_script,
|
||||
window_status="ready",
|
||||
log_callback=log,
|
||||
)
|
||||
|
||||
if not success:
|
||||
# 备选:直接截目标页
|
||||
log("尝试直接截图目标页...")
|
||||
success = take_screenshot_wkhtmltoimage(
|
||||
target_url,
|
||||
screenshot_path,
|
||||
cookies_path=cookies_for_shot,
|
||||
proxy_server=proxy_server,
|
||||
log_callback=log,
|
||||
)
|
||||
|
||||
if success and os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 1000:
|
||||
log(f"[OK] 截图成功: {screenshot_filename}")
|
||||
result.success = True
|
||||
result.filename = screenshot_filename
|
||||
result.filepath = screenshot_path
|
||||
else:
|
||||
result.error_message = "截图失败"
|
||||
if os.path.exists(screenshot_path):
|
||||
os.remove(screenshot_path)
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user