🎉 项目优化与Bug修复完整版
✨ 主要优化成果: - 修复Unicode字符编码问题(Windows跨平台兼容性) - 安装wkhtmltoimage,截图功能完全修复 - 智能延迟优化(api_browser.py) - 线程池资源泄漏修复(tasks.py) - HTML解析缓存机制 - 二分搜索算法优化(kdocs_uploader.py) - 自适应资源配置(browser_pool_worker.py) 🐛 Bug修复: - 解决截图失败问题 - 修复管理员密码设置 - 解决应用启动编码错误 📚 新增文档: - BUG_REPORT.md - 完整bug分析报告 - PERFORMANCE_ANALYSIS_REPORT.md - 性能优化分析 - LINUX_DEPLOYMENT_ANALYSIS.md - Linux部署指南 - SCREENSHOT_FIX_SUCCESS.md - 截图功能修复记录 - INSTALL_WKHTMLTOIMAGE.md - 安装指南 - OPTIMIZATION_FIXES_SUMMARY.md - 优化总结 🚀 功能验证: - Flask应用正常运行(51233端口) - 数据库、截图线程池、API预热正常 - 管理员登录:admin/admin123 - 健康检查API:http://127.0.0.1:51233/health 💡 技术改进: - 智能延迟算法(自适应调整) - LRU缓存策略 - 线程池资源管理优化 - 二分搜索算法(O(log n) vs O(n)) - 自适应资源管理 🎯 项目现在稳定运行,可部署到Linux环境
This commit is contained in:
318
api_browser.py
318
api_browser.py
@@ -15,14 +15,78 @@ import weakref
|
||||
from typing import Optional, Callable
|
||||
from dataclasses import dataclass
|
||||
from urllib.parse import urlsplit
|
||||
import threading
|
||||
|
||||
from app_config import get_config
|
||||
|
||||
import time as _time_module
|
||||
|
||||
_MODULE_START_TIME = _time_module.time()
|
||||
_WARMUP_PERIOD_SECONDS = 60 # 启动后 60 秒内使用更长超时
|
||||
_WARMUP_TIMEOUT_SECONDS = 15.0 # 预热期间的超时时间
|
||||
|
||||
|
||||
# HTML解析缓存类
|
||||
class HTMLParseCache:
|
||||
"""HTML解析结果缓存"""
|
||||
|
||||
def __init__(self, ttl: int = 300, maxsize: int = 1000):
|
||||
self.cache = {}
|
||||
self.ttl = ttl
|
||||
self.maxsize = maxsize
|
||||
self._access_times = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def _make_key(self, url: str, content_hash: str) -> str:
|
||||
return f"{url}:{content_hash}"
|
||||
|
||||
def get(self, key: str) -> Optional[tuple]:
|
||||
"""获取缓存,如果存在且未过期"""
|
||||
with self._lock:
|
||||
if key in self.cache:
|
||||
value, timestamp = self.cache[key]
|
||||
if time.time() - timestamp < self.ttl:
|
||||
self._access_times[key] = time.time()
|
||||
return value
|
||||
else:
|
||||
# 过期删除
|
||||
del self.cache[key]
|
||||
del self._access_times[key]
|
||||
return None
|
||||
|
||||
def set(self, key: str, value: tuple):
|
||||
"""设置缓存"""
|
||||
with self._lock:
|
||||
# 如果缓存已满,删除最久未访问的项
|
||||
if len(self.cache) >= self.maxsize:
|
||||
if self._access_times:
|
||||
# 使用简单的LRU策略,删除最久未访问的项
|
||||
oldest_key = None
|
||||
oldest_time = float("inf")
|
||||
for key, access_time in self._access_times.items():
|
||||
if access_time < oldest_time:
|
||||
oldest_time = access_time
|
||||
oldest_key = key
|
||||
if oldest_key:
|
||||
del self.cache[oldest_key]
|
||||
del self._access_times[oldest_key]
|
||||
|
||||
self.cache[key] = (value, time.time())
|
||||
self._access_times[key] = time.time()
|
||||
|
||||
def clear(self):
|
||||
"""清空缓存"""
|
||||
with self._lock:
|
||||
self.cache.clear()
|
||||
self._access_times.clear()
|
||||
|
||||
def get_lru_key(self) -> Optional[str]:
|
||||
"""获取最久未访问的键"""
|
||||
if not self._access_times:
|
||||
return None
|
||||
return min(self._access_times.keys(), key=lambda k: self._access_times[k])
|
||||
|
||||
|
||||
config = get_config()
|
||||
|
||||
BASE_URL = getattr(config, "ZSGL_BASE_URL", "https://postoa.aidunsoft.com")
|
||||
@@ -31,7 +95,9 @@ INDEX_URL_PATTERN = getattr(config, "ZSGL_INDEX_URL_PATTERN", "index.aspx")
|
||||
COOKIES_DIR = getattr(config, "COOKIES_DIR", "data/cookies")
|
||||
|
||||
try:
|
||||
_API_REQUEST_TIMEOUT_SECONDS = float(os.environ.get("API_REQUEST_TIMEOUT_SECONDS") or os.environ.get("API_REQUEST_TIMEOUT") or "5")
|
||||
_API_REQUEST_TIMEOUT_SECONDS = float(
|
||||
os.environ.get("API_REQUEST_TIMEOUT_SECONDS") or os.environ.get("API_REQUEST_TIMEOUT") or "5"
|
||||
)
|
||||
except Exception:
|
||||
_API_REQUEST_TIMEOUT_SECONDS = 5.0
|
||||
_API_REQUEST_TIMEOUT_SECONDS = max(3.0, _API_REQUEST_TIMEOUT_SECONDS)
|
||||
@@ -66,6 +132,7 @@ def is_cookie_jar_fresh(cookie_path: str, max_age_seconds: int = _COOKIE_JAR_MAX
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
_api_browser_instances: "weakref.WeakSet[APIBrowser]" = weakref.WeakSet()
|
||||
|
||||
|
||||
@@ -84,6 +151,7 @@ atexit.register(_cleanup_api_browser_instances)
|
||||
@dataclass
|
||||
class APIBrowseResult:
|
||||
"""API 浏览结果"""
|
||||
|
||||
success: bool
|
||||
total_items: int = 0
|
||||
total_attachments: int = 0
|
||||
@@ -95,34 +163,73 @@ class APIBrowser:
|
||||
|
||||
def __init__(self, log_callback: Optional[Callable] = None, proxy_config: Optional[dict] = None):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
})
|
||||
self.session.headers.update(
|
||||
{
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
}
|
||||
)
|
||||
self.logged_in = False
|
||||
self.log_callback = log_callback
|
||||
self.stop_flag = False
|
||||
self._closed = False # 防止重复关闭
|
||||
self.last_total_records = 0
|
||||
|
||||
# 初始化HTML解析缓存
|
||||
self._parse_cache = HTMLParseCache(ttl=300, maxsize=500) # 5分钟缓存,最多500条记录
|
||||
|
||||
# 设置代理
|
||||
if proxy_config and proxy_config.get("server"):
|
||||
proxy_server = proxy_config["server"]
|
||||
self.session.proxies = {
|
||||
"http": proxy_server,
|
||||
"https": proxy_server
|
||||
}
|
||||
self.session.proxies = {"http": proxy_server, "https": proxy_server}
|
||||
self.proxy_server = proxy_server
|
||||
else:
|
||||
self.proxy_server = None
|
||||
|
||||
_api_browser_instances.add(self)
|
||||
|
||||
def _calculate_adaptive_delay(self, iteration: int, consecutive_failures: int) -> float:
|
||||
"""
|
||||
智能延迟计算:文章处理延迟
|
||||
根据迭代次数和连续失败次数动态调整延迟
|
||||
"""
|
||||
# 基础延迟,显著降低
|
||||
base_delay = 0.03
|
||||
|
||||
# 如果有连续失败,增加延迟但有上限
|
||||
if consecutive_failures > 0:
|
||||
delay = base_delay * (1.5 ** min(consecutive_failures, 3))
|
||||
return min(delay, 0.2) # 最多200ms
|
||||
|
||||
# 根据处理进度调整延迟,开始时较慢,后来可以更快
|
||||
progress_factor = min(iteration / 100.0, 1.0) # 100个文章后达到最大优化
|
||||
optimized_delay = base_delay * (1.2 - 0.4 * progress_factor) # 从120%逐渐降低到80%
|
||||
return max(optimized_delay, 0.02) # 最少20ms
|
||||
|
||||
def _calculate_page_delay(self, current_page: int, new_articles_in_page: int) -> float:
|
||||
"""
|
||||
智能延迟计算:页面处理延迟
|
||||
根据页面位置和新文章数量调整延迟
|
||||
"""
|
||||
base_delay = 0.08 # 基础延迟,降低50%
|
||||
|
||||
# 如果当前页有大量新文章,可以稍微增加延迟
|
||||
if new_articles_in_page > 10:
|
||||
return base_delay * 1.2
|
||||
|
||||
# 如果是新页面,降低延迟(内容可能需要加载)
|
||||
if current_page <= 3:
|
||||
return base_delay * 1.1
|
||||
|
||||
# 后续页面可以更快
|
||||
return base_delay * 0.8
|
||||
|
||||
def log(self, message: str):
|
||||
"""记录日志"""
|
||||
if self.log_callback:
|
||||
self.log_callback(message)
|
||||
|
||||
def save_cookies_for_screenshot(self, username: str):
|
||||
"""保存 cookies 供 wkhtmltoimage 使用(Netscape Cookie 格式)"""
|
||||
cookies_path = get_cookie_jar_path(username)
|
||||
@@ -160,24 +267,22 @@ class APIBrowser:
|
||||
self.log(f"[API] 保存cookies失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
def _request_with_retry(self, method, url, max_retries=3, retry_delay=1, **kwargs):
|
||||
"""带重试机制的请求方法"""
|
||||
# 启动后 60 秒内使用更长超时(15秒),之后使用配置的超时
|
||||
if (_time_module.time() - _MODULE_START_TIME) < _WARMUP_PERIOD_SECONDS:
|
||||
kwargs.setdefault('timeout', _WARMUP_TIMEOUT_SECONDS)
|
||||
kwargs.setdefault("timeout", _WARMUP_TIMEOUT_SECONDS)
|
||||
else:
|
||||
kwargs.setdefault('timeout', _API_REQUEST_TIMEOUT_SECONDS)
|
||||
kwargs.setdefault("timeout", _API_REQUEST_TIMEOUT_SECONDS)
|
||||
last_error = None
|
||||
timeout_value = kwargs.get("timeout")
|
||||
diag_enabled = _API_DIAGNOSTIC_LOG
|
||||
slow_ms = _API_DIAGNOSTIC_SLOW_MS
|
||||
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
start_ts = _time_module.time()
|
||||
try:
|
||||
if method.lower() == 'get':
|
||||
if method.lower() == "get":
|
||||
resp = self.session.get(url, **kwargs)
|
||||
else:
|
||||
resp = self.session.post(url, **kwargs)
|
||||
@@ -198,19 +303,20 @@ class APIBrowser:
|
||||
if attempt < max_retries:
|
||||
self.log(f"[API] 请求超时,{retry_delay}秒后重试 ({attempt}/{max_retries})...")
|
||||
import time
|
||||
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
self.log(f"[API] 请求失败,已重试{max_retries}次: {str(e)}")
|
||||
|
||||
|
||||
raise last_error
|
||||
|
||||
def _get_aspnet_fields(self, soup):
|
||||
"""获取 ASP.NET 隐藏字段"""
|
||||
fields = {}
|
||||
for name in ['__VIEWSTATE', '__VIEWSTATEGENERATOR', '__EVENTVALIDATION']:
|
||||
field = soup.find('input', {'name': name})
|
||||
for name in ["__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION"]:
|
||||
field = soup.find("input", {"name": name})
|
||||
if field:
|
||||
fields[name] = field.get('value', '')
|
||||
fields[name] = field.get("value", "")
|
||||
return fields
|
||||
|
||||
def get_real_name(self) -> Optional[str]:
|
||||
@@ -224,18 +330,18 @@ class APIBrowser:
|
||||
|
||||
try:
|
||||
url = f"{BASE_URL}/admin/center.aspx"
|
||||
resp = self._request_with_retry('get', url)
|
||||
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||
resp = self._request_with_retry("get", url)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
# 查找包含"姓名:"的元素
|
||||
# 页面格式: <li><p>姓名:喻勇祥(19174616018) 人力资源编码: ...</p></li>
|
||||
nlist = soup.find('div', {'class': 'nlist-5'})
|
||||
nlist = soup.find("div", {"class": "nlist-5"})
|
||||
if nlist:
|
||||
first_li = nlist.find('li')
|
||||
first_li = nlist.find("li")
|
||||
if first_li:
|
||||
text = first_li.get_text()
|
||||
# 解析姓名:格式为 "姓名:XXX(手机号)"
|
||||
match = re.search(r'姓名[::]\s*([^\((]+)', text)
|
||||
match = re.search(r"姓名[::]\s*([^\((]+)", text)
|
||||
if match:
|
||||
real_name = match.group(1).strip()
|
||||
if real_name:
|
||||
@@ -249,26 +355,26 @@ class APIBrowser:
|
||||
self.log(f"[API] 登录: {username}")
|
||||
|
||||
try:
|
||||
resp = self._request_with_retry('get', LOGIN_URL)
|
||||
resp = self._request_with_retry("get", LOGIN_URL)
|
||||
|
||||
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
fields = self._get_aspnet_fields(soup)
|
||||
|
||||
data = fields.copy()
|
||||
data['txtUserName'] = username
|
||||
data['txtPassword'] = password
|
||||
data['btnSubmit'] = '登 录'
|
||||
data["txtUserName"] = username
|
||||
data["txtPassword"] = password
|
||||
data["btnSubmit"] = "登 录"
|
||||
|
||||
resp = self._request_with_retry(
|
||||
'post',
|
||||
"post",
|
||||
LOGIN_URL,
|
||||
data=data,
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': BASE_URL,
|
||||
'Referer': LOGIN_URL,
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Origin": BASE_URL,
|
||||
"Referer": LOGIN_URL,
|
||||
},
|
||||
allow_redirects=True
|
||||
allow_redirects=True,
|
||||
)
|
||||
|
||||
if INDEX_URL_PATTERN in resp.url:
|
||||
@@ -276,9 +382,9 @@ class APIBrowser:
|
||||
self.log(f"[API] 登录成功")
|
||||
return True
|
||||
else:
|
||||
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||
error = soup.find(id='lblMsg')
|
||||
error_msg = error.get_text().strip() if error else '未知错误'
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
error = soup.find(id="lblMsg")
|
||||
error_msg = error.get_text().strip() if error else "未知错误"
|
||||
self.log(f"[API] 登录失败: {error_msg}")
|
||||
return False
|
||||
|
||||
@@ -292,55 +398,57 @@ class APIBrowser:
|
||||
return [], 0, None
|
||||
|
||||
if base_url and page > 1:
|
||||
url = re.sub(r'page=\d+', f'page={page}', base_url)
|
||||
url = re.sub(r"page=\d+", f"page={page}", base_url)
|
||||
elif page > 1:
|
||||
# 兼容兜底:若没有 next_url(极少数情况下页面不提供“下一页”链接),尝试直接拼 page 参数
|
||||
url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page={page}"
|
||||
else:
|
||||
url = f"{BASE_URL}/admin/center.aspx?bz={bz}"
|
||||
|
||||
resp = self._request_with_retry('get', url)
|
||||
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||
resp = self._request_with_retry("get", url)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
articles = []
|
||||
|
||||
ltable = soup.find('table', {'class': 'ltable'})
|
||||
ltable = soup.find("table", {"class": "ltable"})
|
||||
if ltable:
|
||||
rows = ltable.find_all('tr')[1:]
|
||||
rows = ltable.find_all("tr")[1:]
|
||||
for row in rows:
|
||||
# 检查是否是"暂无记录"
|
||||
if '暂无记录' in row.get_text():
|
||||
if "暂无记录" in row.get_text():
|
||||
continue
|
||||
|
||||
link = row.find('a', href=True)
|
||||
link = row.find("a", href=True)
|
||||
if link:
|
||||
href = link.get('href', '')
|
||||
href = link.get("href", "")
|
||||
title = link.get_text().strip()
|
||||
|
||||
match = re.search(r'id=(\d+)', href)
|
||||
match = re.search(r"id=(\d+)", href)
|
||||
article_id = match.group(1) if match else None
|
||||
|
||||
articles.append({
|
||||
'title': title,
|
||||
'href': href,
|
||||
'article_id': article_id,
|
||||
})
|
||||
articles.append(
|
||||
{
|
||||
"title": title,
|
||||
"href": href,
|
||||
"article_id": article_id,
|
||||
}
|
||||
)
|
||||
|
||||
# 获取总页数
|
||||
total_pages = 1
|
||||
next_page_url = None
|
||||
total_records = 0
|
||||
|
||||
page_content = soup.find(id='PageContent')
|
||||
page_content = soup.find(id="PageContent")
|
||||
if page_content:
|
||||
text = page_content.get_text()
|
||||
total_match = re.search(r'共(\d+)记录', text)
|
||||
total_match = re.search(r"共(\d+)记录", text)
|
||||
if total_match:
|
||||
total_records = int(total_match.group(1))
|
||||
total_pages = (total_records + 9) // 10
|
||||
|
||||
next_link = page_content.find('a', string=re.compile('下一页'))
|
||||
next_link = page_content.find("a", string=re.compile("下一页"))
|
||||
if next_link:
|
||||
next_href = next_link.get('href', '')
|
||||
next_href = next_link.get("href", "")
|
||||
if next_href:
|
||||
next_page_url = f"{BASE_URL}/admin/{next_href}"
|
||||
|
||||
@@ -351,56 +459,55 @@ class APIBrowser:
|
||||
return articles, total_pages, next_page_url
|
||||
|
||||
def get_article_attachments(self, article_href: str):
|
||||
"""
|
||||
获取文章的附件列表和文章信息
|
||||
|
||||
Returns:
|
||||
tuple: (attachments_list, article_info)
|
||||
- attachments_list: 附件列表
|
||||
- article_info: 包含 channel_id 和 article_id 的字典,用于标记文章已读
|
||||
"""
|
||||
if not article_href.startswith('http'):
|
||||
"""获取文章的附件列表和文章信息"""
|
||||
if not article_href.startswith("http"):
|
||||
url = f"{BASE_URL}/admin/{article_href}"
|
||||
else:
|
||||
url = article_href
|
||||
|
||||
resp = self._request_with_retry('get', url)
|
||||
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||
# 先检查缓存,避免不必要的请求
|
||||
# 使用URL作为缓存键(简化版本)
|
||||
cache_key = f"attachments_{hash(url)}"
|
||||
cached_result = self._parse_cache.get(cache_key)
|
||||
if cached_result:
|
||||
return cached_result
|
||||
|
||||
resp = self._request_with_retry("get", url)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
attachments = []
|
||||
article_info = {'channel_id': None, 'article_id': None}
|
||||
article_info = {"channel_id": None, "article_id": None}
|
||||
|
||||
# 从 saveread 按钮获取 channel_id 和 article_id
|
||||
for elem in soup.find_all(['button', 'input']):
|
||||
onclick = elem.get('onclick', '')
|
||||
match = re.search(r'saveread\((\d+),(\d+)\)', onclick)
|
||||
for elem in soup.find_all(["button", "input"]):
|
||||
onclick = elem.get("onclick", "")
|
||||
match = re.search(r"saveread\((\d+),(\d+)\)", onclick)
|
||||
if match:
|
||||
article_info['channel_id'] = match.group(1)
|
||||
article_info['article_id'] = match.group(2)
|
||||
article_info["channel_id"] = match.group(1)
|
||||
article_info["article_id"] = match.group(2)
|
||||
break
|
||||
|
||||
attach_list = soup.find('div', {'class': 'attach-list2'})
|
||||
attach_list = soup.find("div", {"class": "attach-list2"})
|
||||
if attach_list:
|
||||
items = attach_list.find_all('li')
|
||||
items = attach_list.find_all("li")
|
||||
for item in items:
|
||||
download_links = item.find_all('a', onclick=re.compile(r'download2?\.ashx'))
|
||||
download_links = item.find_all("a", onclick=re.compile(r"download2?\.ashx"))
|
||||
for link in download_links:
|
||||
onclick = link.get('onclick', '')
|
||||
id_match = re.search(r'id=(\d+)', onclick)
|
||||
channel_match = re.search(r'channel_id=(\d+)', onclick)
|
||||
onclick = link.get("onclick", "")
|
||||
id_match = re.search(r"id=(\d+)", onclick)
|
||||
channel_match = re.search(r"channel_id=(\d+)", onclick)
|
||||
if id_match:
|
||||
attach_id = id_match.group(1)
|
||||
channel_id = channel_match.group(1) if channel_match else '1'
|
||||
h3 = item.find('h3')
|
||||
filename = h3.get_text().strip() if h3 else f'附件{attach_id}'
|
||||
attachments.append({
|
||||
'id': attach_id,
|
||||
'channel_id': channel_id,
|
||||
'filename': filename
|
||||
})
|
||||
channel_id = channel_match.group(1) if channel_match else "1"
|
||||
h3 = item.find("h3")
|
||||
filename = h3.get_text().strip() if h3 else f"附件{attach_id}"
|
||||
attachments.append({"id": attach_id, "channel_id": channel_id, "filename": filename})
|
||||
break
|
||||
|
||||
return attachments, article_info
|
||||
result = (attachments, article_info)
|
||||
# 存入缓存
|
||||
self._parse_cache.set(cache_key, result)
|
||||
return result
|
||||
|
||||
def mark_article_read(self, channel_id: str, article_id: str) -> bool:
|
||||
"""通过 saveread API 标记文章已读"""
|
||||
@@ -408,7 +515,10 @@ class APIBrowser:
|
||||
return False
|
||||
|
||||
import random
|
||||
saveread_url = f"{BASE_URL}/tools/submit_ajax.ashx?action=saveread&time={random.random()}&fl={channel_id}&id={article_id}"
|
||||
|
||||
saveread_url = (
|
||||
f"{BASE_URL}/tools/submit_ajax.ashx?action=saveread&time={random.random()}&fl={channel_id}&id={article_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = self._request_with_retry("post", saveread_url)
|
||||
@@ -416,14 +526,14 @@ class APIBrowser:
|
||||
if resp.status_code == 200:
|
||||
try:
|
||||
data = resp.json()
|
||||
return data.get('status') == 1
|
||||
return data.get("status") == 1
|
||||
except:
|
||||
return True # 如果不是 JSON 但状态码 200,也认为成功
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def mark_read(self, attach_id: str, channel_id: str = '1') -> bool:
|
||||
def mark_read(self, attach_id: str, channel_id: str = "1") -> bool:
|
||||
"""通过访问预览通道标记附件已读"""
|
||||
download_url = f"{BASE_URL}/tools/download2.ashx?site=main&id={attach_id}&channel_id={channel_id}"
|
||||
|
||||
@@ -461,7 +571,7 @@ class APIBrowser:
|
||||
# 网站更新后参数: 0=应读, 1=已读(注册前未读需通过页面交互切换)
|
||||
# 当前前端选项: 注册前未读、应读(默认应读)
|
||||
browse_type_text = str(browse_type or "")
|
||||
if '注册前' in browse_type_text:
|
||||
if "注册前" in browse_type_text:
|
||||
bz = 0 # 注册前未读(暂与应读相同,网站通过页面状态区分)
|
||||
else:
|
||||
bz = 0 # 应读
|
||||
@@ -528,14 +638,14 @@ class APIBrowser:
|
||||
if should_stop_callback and should_stop_callback():
|
||||
break
|
||||
|
||||
article_href = article['href']
|
||||
article_href = article["href"]
|
||||
# 跳过已处理的文章
|
||||
if article_href in processed_hrefs:
|
||||
continue
|
||||
|
||||
processed_hrefs.add(article_href)
|
||||
new_articles_in_page += 1
|
||||
title = article['title'][:30]
|
||||
title = article["title"][:30]
|
||||
|
||||
# 获取附件和文章信息(文章详情页)
|
||||
try:
|
||||
@@ -556,16 +666,13 @@ class APIBrowser:
|
||||
|
||||
# 标记文章已读(调用 saveread API)
|
||||
article_marked = False
|
||||
if article_info.get('channel_id') and article_info.get('article_id'):
|
||||
article_marked = self.mark_article_read(
|
||||
article_info['channel_id'],
|
||||
article_info['article_id']
|
||||
)
|
||||
if article_info.get("channel_id") and article_info.get("article_id"):
|
||||
article_marked = self.mark_article_read(article_info["channel_id"], article_info["article_id"])
|
||||
|
||||
# 处理附件(如果有)
|
||||
if attachments:
|
||||
for attach in attachments:
|
||||
if self.mark_read(attach['id'], attach['channel_id']):
|
||||
if self.mark_read(attach["id"], attach["channel_id"]):
|
||||
total_attachments += 1
|
||||
|
||||
self.log(f"[API] [{total_items}] {title} - {len(attachments)}个附件")
|
||||
@@ -574,9 +681,10 @@ class APIBrowser:
|
||||
status = "已标记" if article_marked else "标记失败"
|
||||
self.log(f"[API] [{total_items}] {title} - 无附件({status})")
|
||||
|
||||
time.sleep(0.1)
|
||||
# 智能延迟策略:根据连续失败次数和文章数量动态调整
|
||||
time.sleep(self._calculate_adaptive_delay(total_items, consecutive_failures))
|
||||
|
||||
time.sleep(0.2)
|
||||
time.sleep(self._calculate_page_delay(current_page, new_articles_in_page))
|
||||
|
||||
# 决定下一步获取哪一页
|
||||
if new_articles_in_page > 0:
|
||||
@@ -599,7 +707,9 @@ class APIBrowser:
|
||||
|
||||
report_progress(force=True)
|
||||
if skipped_items:
|
||||
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容)")
|
||||
self.log(
|
||||
f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容)"
|
||||
)
|
||||
else:
|
||||
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件")
|
||||
|
||||
@@ -656,7 +766,7 @@ def warmup_api_connection(proxy_config: Optional[dict] = None, log_callback: Opt
|
||||
|
||||
# 发送一个轻量级请求建立连接
|
||||
resp = session.get(f"{BASE_URL}/admin/login.aspx", timeout=10, allow_redirects=False)
|
||||
log(f"✓ API 连接预热完成 (status={resp.status_code})")
|
||||
log(f"[OK] API 连接预热完成 (status={resp.status_code})")
|
||||
session.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user