From b2b0dfd5006852983e2bc199423b2e6315bb597a Mon Sep 17 00:00:00 2001
From: yuyx <237899745@qq.com>
Date: Wed, 14 Jan 2026 13:08:34 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=88=86=E9=A1=B5?=
 =?UTF-8?q?=E9=94=99=E4=BD=8D=E9=97=AE=E9=A2=98=EF=BC=8C=E6=94=B9=E4=B8=BA?=
 =?UTF-8?q?=E5=BE=AA=E7=8E=AF=E8=8E=B7=E5=8F=96=E7=AC=AC1=E9=A1=B5?=
 =?UTF-8?q?=E7=9B=B4=E5=88=B0=E6=B8=85=E7=A9=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

问题：标记已读后文章从列表消失，导致后续页面上移，
造成按页码遍历时遗漏部分内容。

解决：每次处理完当前页后重新获取第1页，循环直到没有内容。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 api_browser.py | 44 +++++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/api_browser.py b/api_browser.py
index 703b01c..5eacf23 100755
--- a/api_browser.py
+++ b/api_browser.py
@@ -433,15 +433,13 @@ class APIBrowser:
         try:
             total_items = 0
             total_attachments = 0
-            page = 1
-            base_url = None
             skipped_items = 0
             consecutive_failures = 0
             max_consecutive_failures = 3
 
-            # 获取第一页
+            # 获取第一页，了解总记录数
             try:
-                articles, total_pages, next_url = self.get_article_list_page(bz, page)
+                articles, total_pages, _ = self.get_article_list_page(bz, 1)
                 consecutive_failures = 0
             except Exception as e:
                 result.error_message = str(e)
@@ -453,14 +451,9 @@ class APIBrowser:
                 result.success = True
                 return result
 
-            self.log(f"[API] 共 {total_pages} 页，开始处理...")
-
-            if next_url:
-                base_url = next_url
-            elif total_pages > 1:
-                base_url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page=2"
-
             total_records = int(getattr(self, "last_total_records", 0) or 0)
+            self.log(f"[API] 共 {total_records} 条记录，开始处理...")
+
             last_report_ts = 0.0
 
             def report_progress(force: bool = False):
@@ -478,23 +471,18 @@ class APIBrowser:
 
             report_progress(force=True)
 
-            # 处理所有页面
-            while page <= total_pages:
+            # 循环处理：每次获取第1页，直到没有内容
+            # 这样可以避免分页错位问题（标记已读后文章从列表消失导致后续页面上移）
+            max_iterations = total_records + 10  # 防止无限循环
+            iteration = 0
+
+            while articles and iteration < max_iterations:
+                iteration += 1
+
                 if should_stop_callback and should_stop_callback():
                     self.log("[API] 收到停止信号")
                     break
 
-                # page==1 已取过，后续页在这里获取
-                if page > 1:
-                    try:
-                        articles, _, next_url = self.get_article_list_page(bz, page, base_url)
-                        consecutive_failures = 0
-                        if next_url:
-                            base_url = next_url
-                    except Exception as e:
-                        self.log(f"[API] 获取第{page}页列表失败，终止本次浏览: {str(e)}")
-                        raise
-
                 for article in articles:
                     if should_stop_callback and should_stop_callback():
                         break
@@ -526,9 +514,15 @@ class APIBrowser:
 
                     time.sleep(0.1)
 
-                page += 1
                 time.sleep(0.2)
 
+                # 重新获取第1页，检查是否还有未处理的内容
+                try:
+                    articles, _, _ = self.get_article_list_page(bz, 1)
+                except Exception as e:
+                    self.log(f"[API] 重新获取列表失败: {str(e)}")
+                    break
+
             report_progress(force=True)
             if skipped_items:
                 self.log(f"[API] 浏览完成: {total_items} 条内容，{total_attachments} 个附件（跳过 {skipped_items} 条内容）")