fix(api): 超时按单条处理避免中途结束
This commit is contained in:
@@ -233,9 +233,11 @@ class APIBrowser:
|
||||
if not self.logged_in:
|
||||
return [], 0, None
|
||||
|
||||
try:
|
||||
if base_url and page > 1:
|
||||
url = re.sub(r'page=\d+', f'page={page}', base_url)
|
||||
elif page > 1:
|
||||
# 兼容兜底:若没有 next_url(极少数情况下页面不提供“下一页”链接),尝试直接拼 page 参数
|
||||
url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page={page}"
|
||||
else:
|
||||
url = f"{BASE_URL}/admin/center.aspx?bz={bz}"
|
||||
|
||||
@@ -290,13 +292,8 @@ class APIBrowser:
|
||||
self.last_total_records = 0
|
||||
return articles, total_pages, next_page_url
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"[API] 获取列表失败: {str(e)}")
|
||||
return [], 0, None
|
||||
|
||||
def get_article_attachments(self, article_href: str):
|
||||
"""获取文章的附件列表"""
|
||||
try:
|
||||
if not article_href.startswith('http'):
|
||||
url = f"{BASE_URL}/admin/{article_href}"
|
||||
else:
|
||||
@@ -330,9 +327,6 @@ class APIBrowser:
|
||||
|
||||
return attachments
|
||||
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
def mark_read(self, attach_id: str, channel_id: str = '1') -> bool:
|
||||
"""通过访问下载链接标记已读"""
|
||||
download_url = f"{BASE_URL}/tools/download.ashx?site=main&id={attach_id}&channel_id={channel_id}"
|
||||
@@ -383,9 +377,19 @@ class APIBrowser:
|
||||
total_attachments = 0
|
||||
page = 1
|
||||
base_url = None
|
||||
skipped_items = 0
|
||||
skipped_pages = 0
|
||||
consecutive_failures = 0
|
||||
max_consecutive_failures = 3
|
||||
|
||||
# 获取第一页
|
||||
try:
|
||||
articles, total_pages, next_url = self.get_article_list_page(bz, page)
|
||||
consecutive_failures = 0
|
||||
except Exception as e:
|
||||
result.error_message = str(e)
|
||||
self.log(f"[API] 获取第1页列表失败: {str(e)}")
|
||||
return result
|
||||
|
||||
if not articles:
|
||||
self.log(f"[API] '{browse_type}' 没有待处理内容")
|
||||
@@ -396,6 +400,8 @@ class APIBrowser:
|
||||
|
||||
if next_url:
|
||||
base_url = next_url
|
||||
elif total_pages > 1:
|
||||
base_url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page=2"
|
||||
|
||||
total_records = int(getattr(self, "last_total_records", 0) or 0)
|
||||
last_report_ts = 0.0
|
||||
@@ -416,22 +422,51 @@ class APIBrowser:
|
||||
report_progress(force=True)
|
||||
|
||||
# 处理所有页面
|
||||
while True:
|
||||
while page <= total_pages:
|
||||
if should_stop_callback and should_stop_callback():
|
||||
self.log("[API] 收到停止信号")
|
||||
break
|
||||
|
||||
# page==1 已取过,后续页在这里获取
|
||||
if page > 1:
|
||||
try:
|
||||
articles, _, next_url = self.get_article_list_page(bz, page, base_url)
|
||||
consecutive_failures = 0
|
||||
if next_url:
|
||||
base_url = next_url
|
||||
except Exception as e:
|
||||
skipped_pages += 1
|
||||
consecutive_failures += 1
|
||||
self.log(
|
||||
f"[API] 获取第{page}页列表失败,跳过本页(连续失败{consecutive_failures}/{max_consecutive_failures}): {str(e)}"
|
||||
)
|
||||
if consecutive_failures >= max_consecutive_failures:
|
||||
raise
|
||||
page += 1
|
||||
continue
|
||||
|
||||
for article in articles:
|
||||
if should_stop_callback and should_stop_callback():
|
||||
break
|
||||
|
||||
title = article['title'][:30]
|
||||
# 获取附件(文章详情页)
|
||||
try:
|
||||
attachments = self.get_article_attachments(article['href'])
|
||||
consecutive_failures = 0
|
||||
except Exception as e:
|
||||
skipped_items += 1
|
||||
consecutive_failures += 1
|
||||
self.log(
|
||||
f"[API] 获取文章失败,跳过(连续失败{consecutive_failures}/{max_consecutive_failures}): {title} | {str(e)}"
|
||||
)
|
||||
if consecutive_failures >= max_consecutive_failures:
|
||||
raise
|
||||
continue
|
||||
|
||||
total_items += 1
|
||||
report_progress()
|
||||
|
||||
# 获取附件
|
||||
attachments = self.get_article_attachments(article['href'])
|
||||
|
||||
if attachments:
|
||||
for attach in attachments:
|
||||
if self.mark_read(attach['id'], attach['channel_id']):
|
||||
@@ -441,21 +476,15 @@ class APIBrowser:
|
||||
|
||||
time.sleep(0.1)
|
||||
|
||||
# 下一页
|
||||
page += 1
|
||||
if page > total_pages:
|
||||
break
|
||||
|
||||
articles, _, next_url = self.get_article_list_page(bz, page, base_url)
|
||||
if not articles:
|
||||
break
|
||||
|
||||
if next_url:
|
||||
base_url = next_url
|
||||
|
||||
time.sleep(0.2)
|
||||
|
||||
report_progress(force=True)
|
||||
if skipped_items or skipped_pages:
|
||||
self.log(
|
||||
f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容,{skipped_pages} 页列表)"
|
||||
)
|
||||
else:
|
||||
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件")
|
||||
|
||||
result.success = True
|
||||
|
||||
Reference in New Issue
Block a user