fix: 修复分页错位问题,改为循环获取第1页直到清空

问题:标记已读后文章从列表消失,导致后续页面上移,
造成按页码遍历时遗漏部分内容。

解决:每次处理完当前页后重新获取第1页,循环直到没有内容。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-14 13:08:34 +08:00
parent 2ff9e18842
commit b2b0dfd500

View File

@@ -433,15 +433,13 @@ class APIBrowser:
try: try:
total_items = 0 total_items = 0
total_attachments = 0 total_attachments = 0
page = 1
base_url = None
skipped_items = 0 skipped_items = 0
consecutive_failures = 0 consecutive_failures = 0
max_consecutive_failures = 3 max_consecutive_failures = 3
# 获取第一页 # 获取第一页,了解总记录数
try: try:
articles, total_pages, next_url = self.get_article_list_page(bz, page) articles, total_pages, _ = self.get_article_list_page(bz, 1)
consecutive_failures = 0 consecutive_failures = 0
except Exception as e: except Exception as e:
result.error_message = str(e) result.error_message = str(e)
@@ -453,14 +451,9 @@ class APIBrowser:
result.success = True result.success = True
return result return result
self.log(f"[API] 共 {total_pages} 页,开始处理...")
if next_url:
base_url = next_url
elif total_pages > 1:
base_url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page=2"
total_records = int(getattr(self, "last_total_records", 0) or 0) total_records = int(getattr(self, "last_total_records", 0) or 0)
self.log(f"[API] 共 {total_records} 条记录,开始处理...")
last_report_ts = 0.0 last_report_ts = 0.0
def report_progress(force: bool = False): def report_progress(force: bool = False):
@@ -478,23 +471,18 @@ class APIBrowser:
report_progress(force=True) report_progress(force=True)
# 处理所有页面 # 循环处理每次获取第1页直到没有内容
while page <= total_pages: # 这样可以避免分页错位问题(标记已读后文章从列表消失导致后续页面上移)
max_iterations = total_records + 10 # 防止无限循环
iteration = 0
while articles and iteration < max_iterations:
iteration += 1
if should_stop_callback and should_stop_callback(): if should_stop_callback and should_stop_callback():
self.log("[API] 收到停止信号") self.log("[API] 收到停止信号")
break break
# page==1 已取过,后续页在这里获取
if page > 1:
try:
articles, _, next_url = self.get_article_list_page(bz, page, base_url)
consecutive_failures = 0
if next_url:
base_url = next_url
except Exception as e:
self.log(f"[API] 获取第{page}页列表失败,终止本次浏览: {str(e)}")
raise
for article in articles: for article in articles:
if should_stop_callback and should_stop_callback(): if should_stop_callback and should_stop_callback():
break break
@@ -526,9 +514,15 @@ class APIBrowser:
time.sleep(0.1) time.sleep(0.1)
page += 1
time.sleep(0.2) time.sleep(0.2)
# 重新获取第1页检查是否还有未处理的内容
try:
articles, _, _ = self.get_article_list_page(bz, 1)
except Exception as e:
self.log(f"[API] 重新获取列表失败: {str(e)}")
break
report_progress(force=True) report_progress(force=True)
if skipped_items: if skipped_items:
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容)") self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容)")