fix: 修复分页错位问题，改为循环获取第1页直到清空

问题：标记已读后文章从列表消失，导致后续页面上移，造成按页码遍历时遗漏部分内容。解决：每次处理完当前页后重新获取第1页，循环直到没有内容。 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 13:08:34 +08:00
parent 2ff9e18842
commit b2b0dfd500
1 changed files with 19 additions and 25 deletions
--- a/api_browser.py
+++ b/api_browser.py
@@ -433,15 +433,13 @@ class APIBrowser:
        try:
            total_items = 0
            total_attachments = 0
            page = 1
            base_url = None
            skipped_items = 0
            consecutive_failures = 0
            max_consecutive_failures = 3
-            # 获取第一页
+            # 获取第一页，了解总记录数
            try:
-                articles, total_pages, next_url = self.get_article_list_page(bz, page)
+                articles, total_pages, _ = self.get_article_list_page(bz, 1)
                consecutive_failures = 0
            except Exception as e:
                result.error_message = str(e)
@@ -453,14 +451,9 @@ class APIBrowser:
                result.success = True
                return result
            self.log(f"[API] 共 {total_pages} 页，开始处理...")
            if next_url:
                base_url = next_url
            elif total_pages > 1:
                base_url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page=2"
            total_records = int(getattr(self, "last_total_records", 0) or 0)
            self.log(f"[API] 共 {total_records} 条记录，开始处理...")
            last_report_ts = 0.0
            def report_progress(force: bool = False):
@@ -478,23 +471,18 @@ class APIBrowser:
            report_progress(force=True)
-            # 处理所有页面
+            # 循环处理：每次获取第1页，直到没有内容
-            while page <= total_pages:
+            # 这样可以避免分页错位问题（标记已读后文章从列表消失导致后续页面上移）
            max_iterations = total_records + 10  # 防止无限循环
            iteration = 0
            while articles and iteration < max_iterations:
                iteration += 1
                if should_stop_callback and should_stop_callback():
                    self.log("[API] 收到停止信号")
                    break
                # page==1 已取过，后续页在这里获取
                if page > 1:
                    try:
                        articles, _, next_url = self.get_article_list_page(bz, page, base_url)
                        consecutive_failures = 0
                        if next_url:
                            base_url = next_url
                    except Exception as e:
                        self.log(f"[API] 获取第{page}页列表失败，终止本次浏览: {str(e)}")
                        raise
                for article in articles:
                    if should_stop_callback and should_stop_callback():
                        break
@@ -526,9 +514,15 @@ class APIBrowser:
                    time.sleep(0.1)
                page += 1
                time.sleep(0.2)
                # 重新获取第1页，检查是否还有未处理的内容
                try:
                    articles, _, _ = self.get_article_list_page(bz, 1)
                except Exception as e:
                    self.log(f"[API] 重新获取列表失败: {str(e)}")
                    break
            report_progress(force=True)
            if skipped_items:
                self.log(f"[API] 浏览完成: {total_items} 条内容，{total_attachments} 个附件（跳过 {skipped_items} 条内容）")