diff --git a/api_browser.py b/api_browser.py index 5eacf23..de96912 100755 --- a/api_browser.py +++ b/api_browser.py @@ -475,6 +475,7 @@ class APIBrowser: # 这样可以避免分页错位问题(标记已读后文章从列表消失导致后续页面上移) max_iterations = total_records + 10 # 防止无限循环 iteration = 0 + processed_hrefs = set() # 跟踪已处理的文章,防止重复处理 while articles and iteration < max_iterations: iteration += 1 @@ -483,14 +484,24 @@ class APIBrowser: self.log("[API] 收到停止信号") break + new_articles_in_page = 0 # 本次迭代中新处理的文章数 + for article in articles: if should_stop_callback and should_stop_callback(): break + article_href = article['href'] + # 跳过已处理的文章 + if article_href in processed_hrefs: + continue + + processed_hrefs.add(article_href) + new_articles_in_page += 1 title = article['title'][:30] + # 获取附件(文章详情页) try: - attachments = self.get_article_attachments(article['href']) + attachments = self.get_article_attachments(article_href) consecutive_failures = 0 except Exception as e: skipped_items += 1 @@ -514,6 +525,11 @@ class APIBrowser: time.sleep(0.1) + # 如果当前页没有新文章被处理,说明所有文章都已处理过,退出循环 + if new_articles_in_page == 0: + self.log(f"[API] 当前页所有文章均已处理,结束循环") + break + time.sleep(0.2) # 重新获取第1页,检查是否还有未处理的内容