fix: 修复分页错位问题,改为循环获取第1页直到清空
问题:标记已读后文章从列表消失,导致后续页面上移, 造成按页码遍历时遗漏部分内容。 解决:每次处理完当前页后重新获取第1页,循环直到没有内容。 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -433,15 +433,13 @@ class APIBrowser:
|
|||||||
try:
|
try:
|
||||||
total_items = 0
|
total_items = 0
|
||||||
total_attachments = 0
|
total_attachments = 0
|
||||||
page = 1
|
|
||||||
base_url = None
|
|
||||||
skipped_items = 0
|
skipped_items = 0
|
||||||
consecutive_failures = 0
|
consecutive_failures = 0
|
||||||
max_consecutive_failures = 3
|
max_consecutive_failures = 3
|
||||||
|
|
||||||
# 获取第一页
|
# 获取第一页,了解总记录数
|
||||||
try:
|
try:
|
||||||
articles, total_pages, next_url = self.get_article_list_page(bz, page)
|
articles, total_pages, _ = self.get_article_list_page(bz, 1)
|
||||||
consecutive_failures = 0
|
consecutive_failures = 0
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result.error_message = str(e)
|
result.error_message = str(e)
|
||||||
@@ -453,14 +451,9 @@ class APIBrowser:
|
|||||||
result.success = True
|
result.success = True
|
||||||
return result
|
return result
|
||||||
|
|
||||||
self.log(f"[API] 共 {total_pages} 页,开始处理...")
|
|
||||||
|
|
||||||
if next_url:
|
|
||||||
base_url = next_url
|
|
||||||
elif total_pages > 1:
|
|
||||||
base_url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page=2"
|
|
||||||
|
|
||||||
total_records = int(getattr(self, "last_total_records", 0) or 0)
|
total_records = int(getattr(self, "last_total_records", 0) or 0)
|
||||||
|
self.log(f"[API] 共 {total_records} 条记录,开始处理...")
|
||||||
|
|
||||||
last_report_ts = 0.0
|
last_report_ts = 0.0
|
||||||
|
|
||||||
def report_progress(force: bool = False):
|
def report_progress(force: bool = False):
|
||||||
@@ -478,23 +471,18 @@ class APIBrowser:
|
|||||||
|
|
||||||
report_progress(force=True)
|
report_progress(force=True)
|
||||||
|
|
||||||
# 处理所有页面
|
# 循环处理:每次获取第1页,直到没有内容
|
||||||
while page <= total_pages:
|
# 这样可以避免分页错位问题(标记已读后文章从列表消失导致后续页面上移)
|
||||||
|
max_iterations = total_records + 10 # 防止无限循环
|
||||||
|
iteration = 0
|
||||||
|
|
||||||
|
while articles and iteration < max_iterations:
|
||||||
|
iteration += 1
|
||||||
|
|
||||||
if should_stop_callback and should_stop_callback():
|
if should_stop_callback and should_stop_callback():
|
||||||
self.log("[API] 收到停止信号")
|
self.log("[API] 收到停止信号")
|
||||||
break
|
break
|
||||||
|
|
||||||
# page==1 已取过,后续页在这里获取
|
|
||||||
if page > 1:
|
|
||||||
try:
|
|
||||||
articles, _, next_url = self.get_article_list_page(bz, page, base_url)
|
|
||||||
consecutive_failures = 0
|
|
||||||
if next_url:
|
|
||||||
base_url = next_url
|
|
||||||
except Exception as e:
|
|
||||||
self.log(f"[API] 获取第{page}页列表失败,终止本次浏览: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
for article in articles:
|
for article in articles:
|
||||||
if should_stop_callback and should_stop_callback():
|
if should_stop_callback and should_stop_callback():
|
||||||
break
|
break
|
||||||
@@ -526,9 +514,15 @@ class APIBrowser:
|
|||||||
|
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
page += 1
|
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
|
||||||
|
# 重新获取第1页,检查是否还有未处理的内容
|
||||||
|
try:
|
||||||
|
articles, _, _ = self.get_article_list_page(bz, 1)
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"[API] 重新获取列表失败: {str(e)}")
|
||||||
|
break
|
||||||
|
|
||||||
report_progress(force=True)
|
report_progress(force=True)
|
||||||
if skipped_items:
|
if skipped_items:
|
||||||
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容)")
|
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件(跳过 {skipped_items} 条内容)")
|
||||||
|
|||||||
Reference in New Issue
Block a user