feat: 知识管理平台精简版 - PyQt6桌面应用

主要功能:
- 账号管理:添加/编辑/删除账号,测试登录
- 浏览任务:批量浏览应读/选读内容并标记已读
- 截图管理:wkhtmltoimage截图,查看历史
- 金山文档:扫码登录/微信快捷登录,自动上传截图

技术栈:
- PyQt6 GUI框架
- Playwright 浏览器自动化
- SQLite 本地数据存储
- wkhtmltoimage 网页截图

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-18 22:16:36 +08:00
commit 83fef6dff2
24 changed files with 6133 additions and 0 deletions

504
core/api_browser.py Normal file
View File

@@ -0,0 +1,504 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
API浏览器 - 精简版
用纯HTTP请求实现浏览功能比浏览器自动化快30-60倍
从原项目精简提取,移除了缓存、诊断日志等复杂功能
"""
import os
import re
import time
import hashlib
from typing import Optional, Callable, List, Dict, Any
from dataclasses import dataclass
from urllib.parse import urlsplit
import requests
from bs4 import BeautifulSoup
@dataclass
class APIBrowseResult:
"""API浏览结果"""
success: bool
total_items: int = 0
total_attachments: int = 0
error_message: str = ""
def get_cookie_jar_path(username: str) -> str:
"""获取截图用的cookies文件路径Netscape Cookie格式"""
from config import COOKIES_DIR
COOKIES_DIR.mkdir(exist_ok=True)
filename = hashlib.sha256(username.encode()).hexdigest()[:32] + ".cookies.txt"
return str(COOKIES_DIR / filename)
def is_cookie_jar_fresh(cookie_path: str, max_age_seconds: int = 86400) -> bool:
"""判断cookies文件是否存在且未过期默认24小时"""
if not cookie_path or not os.path.exists(cookie_path):
return False
try:
file_age = time.time() - os.path.getmtime(cookie_path)
return file_age <= max(0, int(max_age_seconds or 0))
except Exception:
return False
class APIBrowser:
"""
API浏览器 - 使用纯HTTP请求实现浏览
用法:
with APIBrowser(log_callback=print) as browser:
if browser.login(username, password):
result = browser.browse_content("应读")
"""
def __init__(self, log_callback: Optional[Callable] = None, proxy_config: Optional[dict] = None):
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
})
self.logged_in = False
self.log_callback = log_callback
self.stop_flag = False
self._closed = False
self.last_total_records = 0
self._username = ""
# 获取配置
from config import get_config
config = get_config()
self.base_url = config.zsgl.base_url
self.login_url = config.zsgl.login_url
self.index_url_pattern = config.zsgl.index_url_pattern
# 设置代理
if proxy_config and proxy_config.get("server"):
proxy_server = proxy_config["server"]
self.session.proxies = {"http": proxy_server, "https": proxy_server}
self.proxy_server = proxy_server
else:
self.proxy_server = None
def log(self, message: str):
"""记录日志"""
if self.log_callback:
self.log_callback(message)
def _request_with_retry(self, method: str, url: str, max_retries: int = 3,
retry_delay: float = 1, **kwargs) -> requests.Response:
"""带重试机制的请求方法"""
kwargs.setdefault("timeout", 10.0)
last_error = None
for attempt in range(1, max_retries + 1):
try:
if method.lower() == "get":
resp = self.session.get(url, **kwargs)
else:
resp = self.session.post(url, **kwargs)
return resp
except Exception as e:
last_error = e
if attempt < max_retries:
self.log(f"[API] 请求超时,{retry_delay}秒后重试 ({attempt}/{max_retries})...")
time.sleep(retry_delay)
else:
self.log(f"[API] 请求失败,已重试{max_retries}次: {str(e)}")
raise last_error
def _get_aspnet_fields(self, soup: BeautifulSoup) -> Dict[str, str]:
"""获取ASP.NET隐藏字段"""
fields = {}
for name in ["__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION"]:
field = soup.find("input", {"name": name})
if field:
fields[name] = field.get("value", "")
return fields
def login(self, username: str, password: str) -> bool:
"""登录"""
self.log(f"[API] 登录: {username}")
self._username = username
try:
resp = self._request_with_retry("get", self.login_url)
soup = BeautifulSoup(resp.text, "html.parser")
fields = self._get_aspnet_fields(soup)
data = fields.copy()
data["txtUserName"] = username
data["txtPassword"] = password
data["btnSubmit"] = "登 录"
resp = self._request_with_retry(
"post",
self.login_url,
data=data,
headers={
"Content-Type": "application/x-www-form-urlencoded",
"Origin": self.base_url,
"Referer": self.login_url,
},
allow_redirects=True,
)
if self.index_url_pattern in resp.url:
self.logged_in = True
self.log(f"[API] 登录成功")
return True
else:
soup = BeautifulSoup(resp.text, "html.parser")
error = soup.find(id="lblMsg")
error_msg = error.get_text().strip() if error else "未知错误"
self.log(f"[API] 登录失败: {error_msg}")
return False
except Exception as e:
self.log(f"[API] 登录异常: {str(e)}")
return False
def get_real_name(self) -> Optional[str]:
"""获取用户真实姓名"""
if not self.logged_in:
return None
try:
url = f"{self.base_url}/admin/center.aspx"
resp = self._request_with_retry("get", url)
soup = BeautifulSoup(resp.text, "html.parser")
nlist = soup.find("div", {"class": "nlist-5"})
if nlist:
first_li = nlist.find("li")
if first_li:
text = first_li.get_text()
match = re.search(r"姓名[:]\s*([^\(]+)", text)
if match:
return match.group(1).strip()
return None
except Exception:
return None
def save_cookies_for_screenshot(self, username: str) -> bool:
"""保存cookies供wkhtmltoimage使用Netscape Cookie格式"""
cookies_path = get_cookie_jar_path(username)
try:
parsed = urlsplit(self.base_url)
cookie_domain = parsed.hostname or "postoa.aidunsoft.com"
lines = [
"# Netscape HTTP Cookie File",
"# Generated by zsglpt-lite",
]
for cookie in self.session.cookies:
domain = cookie.domain or cookie_domain
include_subdomains = "TRUE" if domain.startswith(".") else "FALSE"
path = cookie.path or "/"
secure = "TRUE" if getattr(cookie, "secure", False) else "FALSE"
expires = int(getattr(cookie, "expires", 0) or 0)
lines.append("\t".join([
domain,
include_subdomains,
path,
secure,
str(expires),
cookie.name,
cookie.value,
]))
with open(cookies_path, "w", encoding="utf-8") as f:
f.write("\n".join(lines) + "\n")
self.log(f"[API] Cookies已保存供截图使用")
return True
except Exception as e:
self.log(f"[API] 保存cookies失败: {e}")
return False
def get_article_list_page(self, bz: int = 0, page: int = 1) -> tuple:
"""获取单页文章列表"""
if not self.logged_in:
return [], 0, None
if page > 1:
url = f"{self.base_url}/admin/center.aspx?bz={bz}&page={page}"
else:
url = f"{self.base_url}/admin/center.aspx?bz={bz}"
resp = self._request_with_retry("get", url)
soup = BeautifulSoup(resp.text, "html.parser")
articles = []
ltable = soup.find("table", {"class": "ltable"})
if ltable:
rows = ltable.find_all("tr")[1:]
for row in rows:
if "暂无记录" in row.get_text():
continue
link = row.find("a", href=True)
if link:
href = link.get("href", "")
title = link.get_text().strip()
match = re.search(r"id=(\d+)", href)
article_id = match.group(1) if match else None
articles.append({
"title": title,
"href": href,
"article_id": article_id,
})
# 获取总页数
total_pages = 1
total_records = 0
page_content = soup.find(id="PageContent")
if page_content:
text = page_content.get_text()
total_match = re.search(r"共(\d+)记录", text)
if total_match:
total_records = int(total_match.group(1))
total_pages = (total_records + 9) // 10
self.last_total_records = total_records
return articles, total_pages, None
def get_article_attachments(self, article_href: str) -> tuple:
"""获取文章的附件列表和文章信息"""
if not article_href.startswith("http"):
url = f"{self.base_url}/admin/{article_href}"
else:
url = article_href
resp = self._request_with_retry("get", url)
soup = BeautifulSoup(resp.text, "html.parser")
attachments = []
article_info = {"channel_id": None, "article_id": None}
# 从saveread按钮获取channel_id和article_id
for elem in soup.find_all(["button", "input"]):
onclick = elem.get("onclick", "")
match = re.search(r"saveread\((\d+),(\d+)\)", onclick)
if match:
article_info["channel_id"] = match.group(1)
article_info["article_id"] = match.group(2)
break
attach_list = soup.find("div", {"class": "attach-list2"})
if attach_list:
items = attach_list.find_all("li")
for item in items:
download_links = item.find_all("a", onclick=re.compile(r"download2?\.ashx"))
for link in download_links:
onclick = link.get("onclick", "")
id_match = re.search(r"id=(\d+)", onclick)
channel_match = re.search(r"channel_id=(\d+)", onclick)
if id_match:
attach_id = id_match.group(1)
channel_id = channel_match.group(1) if channel_match else "1"
h3 = item.find("h3")
filename = h3.get_text().strip() if h3 else f"附件{attach_id}"
attachments.append({
"id": attach_id,
"channel_id": channel_id,
"filename": filename
})
break
return attachments, article_info
def mark_article_read(self, channel_id: str, article_id: str) -> bool:
"""通过saveread API标记文章已读"""
if not channel_id or not article_id:
return False
import random
saveread_url = (
f"{self.base_url}/tools/submit_ajax.ashx?action=saveread"
f"&time={random.random()}&fl={channel_id}&id={article_id}"
)
try:
resp = self._request_with_retry("post", saveread_url)
if resp.status_code == 200:
try:
data = resp.json()
return data.get("status") == 1
except:
return True
return False
except:
return False
def mark_attachment_read(self, attach_id: str, channel_id: str = "1") -> bool:
"""通过访问预览通道标记附件已读"""
download_url = f"{self.base_url}/tools/download2.ashx?site=main&id={attach_id}&channel_id={channel_id}"
try:
resp = self._request_with_retry("get", download_url, stream=True)
resp.close()
return resp.status_code == 200
except:
return False
def browse_content(
self,
browse_type: str,
should_stop_callback: Optional[Callable] = None,
progress_callback: Optional[Callable] = None,
) -> APIBrowseResult:
"""
浏览内容并标记已读
Args:
browse_type: 浏览类型 (应读/注册前未读)
should_stop_callback: 检查是否应该停止的回调函数
progress_callback: 进度回调,用于实时上报已浏览内容数量
回调参数: {"total_items": int, "browsed_items": int}
Returns:
浏览结果
"""
result = APIBrowseResult(success=False)
if not self.logged_in:
result.error_message = "未登录"
return result
# 根据浏览类型确定bz参数网站更新后 bz=0 为应读)
bz = 0
self.log(f"[API] 开始浏览 '{browse_type}' (bz={bz})...")
try:
total_items = 0
total_attachments = 0
# 获取第一页
articles, total_pages, _ = self.get_article_list_page(bz, 1)
if not articles:
self.log(f"[API] '{browse_type}' 没有待处理内容")
result.success = True
return result
total_records = self.last_total_records
self.log(f"[API] 共 {total_records} 条记录,开始处理...")
# 上报初始进度
if progress_callback:
progress_callback({"total_items": total_records, "browsed_items": 0})
processed_hrefs = set()
current_page = 1
max_iterations = total_records + 20
for iteration in range(max_iterations):
if should_stop_callback and should_stop_callback():
self.log("[API] 收到停止信号")
break
if not articles:
break
new_articles_in_page = 0
for article in articles:
if should_stop_callback and should_stop_callback():
break
article_href = article["href"]
if article_href in processed_hrefs:
continue
processed_hrefs.add(article_href)
new_articles_in_page += 1
title = article["title"][:30]
# 获取附件和文章信息
try:
attachments, article_info = self.get_article_attachments(article_href)
except Exception as e:
self.log(f"[API] 获取文章失败: {title} | {str(e)}")
continue
total_items += 1
# 标记文章已读
article_marked = False
if article_info.get("channel_id") and article_info.get("article_id"):
article_marked = self.mark_article_read(
article_info["channel_id"],
article_info["article_id"]
)
# 处理附件
if attachments:
for attach in attachments:
if self.mark_attachment_read(attach["id"], attach["channel_id"]):
total_attachments += 1
self.log(f"[API] [{total_items}] {title} - {len(attachments)}个附件")
else:
status = "已标记" if article_marked else "标记失败"
self.log(f"[API] [{total_items}] {title} - 无附件({status})")
# 上报进度
if progress_callback:
progress_callback({"total_items": total_records, "browsed_items": total_items})
# 简单延迟,避免请求太快
time.sleep(0.05)
# 决定下一步
if new_articles_in_page > 0:
current_page = 1
else:
current_page += 1
if current_page > total_pages:
break
# 获取下一页
try:
articles, new_total_pages, _ = self.get_article_list_page(bz, current_page)
if new_total_pages > 0:
total_pages = new_total_pages
except Exception as e:
self.log(f"[API] 获取第{current_page}页列表失败: {str(e)}")
break
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件")
result.success = True
result.total_items = total_items
result.total_attachments = total_attachments
return result
except Exception as e:
result.error_message = str(e)
self.log(f"[API] 浏览出错: {str(e)}")
return result
def close(self):
"""关闭会话"""
if self._closed:
return
self._closed = True
try:
self.session.close()
except:
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False