From b2b0dfd5006852983e2bc199423b2e6315bb597a Mon Sep 17 00:00:00 2001 From: yuyx <237899745@qq.com> Date: Wed, 14 Jan 2026 13:08:34 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=88=86=E9=A1=B5?= =?UTF-8?q?=E9=94=99=E4=BD=8D=E9=97=AE=E9=A2=98=EF=BC=8C=E6=94=B9=E4=B8=BA?= =?UTF-8?q?=E5=BE=AA=E7=8E=AF=E8=8E=B7=E5=8F=96=E7=AC=AC1=E9=A1=B5?= =?UTF-8?q?=E7=9B=B4=E5=88=B0=E6=B8=85=E7=A9=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 问题:标记已读后文章从列表消失,导致后续页面上移, 造成按页码遍历时遗漏部分内容。 解决:每次处理完当前页后重新获取第1页,循环直到没有内容。 Co-Authored-By: Claude Opus 4.5 --- api_browser.py | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/api_browser.py b/api_browser.py index 703b01c..5eacf23 100755 --- a/api_browser.py +++ b/api_browser.py @@ -433,15 +433,13 @@ class APIBrowser: try: total_items = 0 total_attachments = 0 - page = 1 - base_url = None skipped_items = 0 consecutive_failures = 0 max_consecutive_failures = 3 - # 获取第一页 + # 获取第一页,了解总记录数 try: - articles, total_pages, next_url = self.get_article_list_page(bz, page) + articles, total_pages, _ = self.get_article_list_page(bz, 1) consecutive_failures = 0 except Exception as e: result.error_message = str(e) @@ -453,14 +451,9 @@ class APIBrowser: result.success = True return result - self.log(f"[API] 共 {total_pages} 页,开始处理...") - - if next_url: - base_url = next_url - elif total_pages > 1: - base_url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page=2" - total_records = int(getattr(self, "last_total_records", 0) or 0) + self.log(f"[API] 共 {total_records} 条记录,开始处理...") + last_report_ts = 0.0 def report_progress(force: bool = False): @@ -478,23 +471,18 @@ class APIBrowser: report_progress(force=True) - # 处理所有页面 - while page <= total_pages: + # 循环处理:每次获取第1页,直到没有内容 + # 这样可以避免分页错位问题(标记已读后文章从列表消失导致后续页面上移) + max_iterations = total_records + 10 # 防止无限循环 + iteration = 0 + + while articles and iteration < max_iterations: + iteration += 1 + if should_stop_callback and should_stop_callback(): self.log("[API] 收到停止信号") break - # page==1 已取过,后续页在这里获取 - if page > 1: - try: - articles, _, next_url = self.get_article_list_page(bz, page, base_url) - consecutive_failures = 0 - if next_url: - base_url = next_url - except Exception as e: - self.log(f"[API] 获取第{page}页列表失败,终止本次浏览: {str(e)}") - raise - for article in articles: if should_stop_callback and should_stop_callback(): break @@ -526,9 +514,15 @@ class APIBrowser: time.sleep(0.1) - page += 1 time.sleep(0.2) + # 重新获取第1页,检查是否还有未处理的内容 + try: + articles, _, _ = self.get_article_list_page(bz, 1) + except Exception as e: + self.log(f"[API] 重新获取列表失败: {str(e)}") + break + report_progress(force=True) if skipped_items: self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容)")