fix(api): 超时按单条处理避免中途结束

2025-12-17 15:49:05 +08:00
parent 6827d11f40
commit 3f667dd21b
1 changed files with 137 additions and 108 deletions
--- a/api_browser.py
+++ b/api_browser.py
@@ -233,9 +233,11 @@ class APIBrowser:
        if not self.logged_in:
            return [], 0, None
        try:
        if base_url and page > 1:
            url = re.sub(r'page=\d+', f'page={page}', base_url)
        elif page > 1:
            # 兼容兜底：若没有 next_url（极少数情况下页面不提供“下一页”链接），尝试直接拼 page 参数
            url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page={page}"
        else:
            url = f"{BASE_URL}/admin/center.aspx?bz={bz}"
@@ -290,13 +292,8 @@ class APIBrowser:
            self.last_total_records = 0
        return articles, total_pages, next_page_url
        except Exception as e:
            self.log(f"[API] 获取列表失败: {str(e)}")
            return [], 0, None
    def get_article_attachments(self, article_href: str):
        """获取文章的附件列表"""
        try:
        if not article_href.startswith('http'):
            url = f"{BASE_URL}/admin/{article_href}"
        else:
@@ -330,9 +327,6 @@ class APIBrowser:
        return attachments
        except Exception as e:
            return []
    def mark_read(self, attach_id: str, channel_id: str = '1') -> bool:
        """通过访问下载链接标记已读"""
        download_url = f"{BASE_URL}/tools/download.ashx?site=main&id={attach_id}&channel_id={channel_id}"
@@ -383,9 +377,19 @@ class APIBrowser:
            total_attachments = 0
            page = 1
            base_url = None
            skipped_items = 0
            skipped_pages = 0
            consecutive_failures = 0
            max_consecutive_failures = 3
            # 获取第一页
            try:
                articles, total_pages, next_url = self.get_article_list_page(bz, page)
                consecutive_failures = 0
            except Exception as e:
                result.error_message = str(e)
                self.log(f"[API] 获取第1页列表失败: {str(e)}")
                return result
            if not articles:
                self.log(f"[API] '{browse_type}' 没有待处理内容")
@@ -396,6 +400,8 @@ class APIBrowser:
            if next_url:
                base_url = next_url
            elif total_pages > 1:
                base_url = f"{BASE_URL}/admin/center.aspx?bz={bz}&page=2"
            total_records = int(getattr(self, "last_total_records", 0) or 0)
            last_report_ts = 0.0
@@ -416,22 +422,51 @@ class APIBrowser:
            report_progress(force=True)
            # 处理所有页面
-            while True:
+            while page <= total_pages:
                if should_stop_callback and should_stop_callback():
                    self.log("[API] 收到停止信号")
                    break
                # page==1 已取过，后续页在这里获取
                if page > 1:
                    try:
                        articles, _, next_url = self.get_article_list_page(bz, page, base_url)
                        consecutive_failures = 0
                        if next_url:
                            base_url = next_url
                    except Exception as e:
                        skipped_pages += 1
                        consecutive_failures += 1
                        self.log(
                            f"[API] 获取第{page}页列表失败，跳过本页（连续失败{consecutive_failures}/{max_consecutive_failures}）: {str(e)}"
                        )
                        if consecutive_failures >= max_consecutive_failures:
                            raise
                        page += 1
                        continue
                for article in articles:
                    if should_stop_callback and should_stop_callback():
                        break
                    title = article['title'][:30]
                    # 获取附件（文章详情页）
                    try:
                        attachments = self.get_article_attachments(article['href'])
                        consecutive_failures = 0
                    except Exception as e:
                        skipped_items += 1
                        consecutive_failures += 1
                        self.log(
                            f"[API] 获取文章失败，跳过（连续失败{consecutive_failures}/{max_consecutive_failures}）: {title} | {str(e)}"
                        )
                        if consecutive_failures >= max_consecutive_failures:
                            raise
                        continue
                    total_items += 1
                    report_progress()
                    # 获取附件
                    attachments = self.get_article_attachments(article['href'])
                    if attachments:
                        for attach in attachments:
                            if self.mark_read(attach['id'], attach['channel_id']):
@@ -441,21 +476,15 @@ class APIBrowser:
                    time.sleep(0.1)
                # 下一页
                page += 1
                if page > total_pages:
                    break
                articles, _, next_url = self.get_article_list_page(bz, page, base_url)
                if not articles:
                    break
                if next_url:
                    base_url = next_url
                time.sleep(0.2)
            report_progress(force=True)
            if skipped_items or skipped_pages:
                self.log(
                    f"[API] 浏览完成: {total_items} 条内容，{total_attachments} 个附件（跳过 {skipped_items} 条内容，{skipped_pages} 页列表）"
                )
            else:
                self.log(f"[API] 浏览完成: {total_items} 条内容，{total_attachments} 个附件")
            result.success = True