From 71cc518fe8afd4034fd0605680e748ad73c0ddc3 Mon Sep 17 00:00:00 2001 From: yuyx <237899745@qq.com> Date: Tue, 16 Dec 2025 19:51:27 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=B5=8F=E8=A7=88=E5=86=85=E5=AE=B9/?= =?UTF-8?q?=E9=99=84=E4=BB=B6=E8=BF=9B=E5=BA=A6=E5=AE=9E=E6=97=B6=E6=9B=B4?= =?UTF-8?q?=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api_browser.py | 70 ++++++++++++++++++++++++++++++++++++----------- services/tasks.py | 36 +++++++++++++++++++++++- 2 files changed, 89 insertions(+), 17 deletions(-) diff --git a/api_browser.py b/api_browser.py index 7bef2d3..b3b418c 100755 --- a/api_browser.py +++ b/api_browser.py @@ -230,7 +230,7 @@ class APIBrowser: def get_article_list_page(self, bz: int = 2, page: int = 1, base_url: str = None): """获取单页文章列表""" if not self.logged_in: - return [], 0, None + return [], 0, None, 0 try: if base_url and page > 1: @@ -264,9 +264,10 @@ class APIBrowser: 'article_id': article_id, }) - # 获取总页数 + # 获取总页数 / 总记录数 total_pages = 1 next_page_url = None + total_records = 0 page_content = soup.find(id='PageContent') if page_content: @@ -282,11 +283,11 @@ class APIBrowser: if next_href: next_page_url = f"{BASE_URL}/admin/{next_href}" - return articles, total_pages, next_page_url + return articles, total_pages, next_page_url, total_records except Exception as e: self.log(f"[API] 获取列表失败: {str(e)}") - return [], 0, None + return [], 0, None, 0 def get_article_attachments(self, article_href: str): """获取文章的附件列表""" @@ -338,14 +339,19 @@ class APIBrowser: except: return False - def browse_content(self, browse_type: str, - should_stop_callback: Optional[Callable] = None) -> APIBrowseResult: + def browse_content( + self, + browse_type: str, + should_stop_callback: Optional[Callable] = None, + progress_callback: Optional[Callable] = None, + ) -> APIBrowseResult: """ 浏览内容并标记已读 Args: browse_type: 浏览类型 (应读/注册前未读) should_stop_callback: 检查是否应该停止的回调函数 + progress_callback: 进度回调(可选),用于实时上报已处理/总数 Returns: 浏览结果 @@ -368,13 +374,14 @@ class APIBrowser: self.log(f"[API] 开始浏览 '{browse_type}' (bz={bz})...") try: - total_items = 0 - total_attachments = 0 + browsed_items = 0 + viewed_attachments = 0 + discovered_attachments = 0 page = 1 base_url = None # 获取第一页 - articles, total_pages, next_url = self.get_article_list_page(bz, page) + articles, total_pages, next_url, total_records = self.get_article_list_page(bz, page) if not articles: self.log(f"[API] '{browse_type}' 没有待处理内容") @@ -386,6 +393,32 @@ class APIBrowser: if next_url: base_url = next_url + last_report_ts = 0.0 + + def report_progress(force: bool = False): + nonlocal last_report_ts + if not progress_callback: + return + now_ts = time.time() + if not force and now_ts - last_report_ts < 1.0: + return + last_report_ts = now_ts + try: + progress_callback( + { + "total_items": int(total_records or 0), + "browsed_items": int(browsed_items or 0), + "total_attachments": int(discovered_attachments or 0), + "viewed_attachments": int(viewed_attachments or 0), + "page": int(page or 0), + "total_pages": int(total_pages or 0), + } + ) + except Exception: + pass + + report_progress(force=True) + # 处理所有页面 while True: if should_stop_callback and should_stop_callback(): @@ -397,17 +430,20 @@ class APIBrowser: break title = article['title'][:30] - total_items += 1 + browsed_items += 1 # 获取附件 attachments = self.get_article_attachments(article['href']) if attachments: + discovered_attachments += len(attachments) for attach in attachments: if self.mark_read(attach['id'], attach['channel_id']): - total_attachments += 1 + viewed_attachments += 1 - self.log(f"[API] [{total_items}] {title} - {len(attachments)}个附件") + self.log(f"[API] [{browsed_items}] {title} - {len(attachments)}个附件") + + report_progress() time.sleep(0.1) @@ -416,20 +452,22 @@ class APIBrowser: if page > total_pages: break - articles, _, next_url = self.get_article_list_page(bz, page, base_url) + articles, _, next_url, _ = self.get_article_list_page(bz, page, base_url) if not articles: break if next_url: base_url = next_url + report_progress(force=True) time.sleep(0.2) - self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件") + report_progress(force=True) + self.log(f"[API] 浏览完成: {browsed_items} 条内容,{viewed_attachments} 个附件") result.success = True - result.total_items = total_items - result.total_attachments = total_attachments + result.total_items = browsed_items + result.total_attachments = viewed_attachments return result except Exception as e: diff --git a/services/tasks.py b/services/tasks.py index 73aa9b3..a757d1d 100644 --- a/services/tasks.py +++ b/services/tasks.py @@ -590,12 +590,46 @@ def run_task(user_id, account_id, browse_type, enable_screenshot=True, source="m safe_update_task_status(account_id, {"detail_status": "正在浏览"}) log_to_client(f"开始浏览 '{browse_type}' 内容...", user_id, account_id) + account.total_items = 0 + account.total_attachments = 0 + safe_update_task_status(account_id, {"progress": {"items": 0, "attachments": 0}}) def should_stop(): return account.should_stop + last_progress_ts = 0.0 + + def on_browse_progress(progress: dict): + nonlocal last_progress_ts + try: + now_ts = time_module.time() + if now_ts - last_progress_ts < 0.5: + return + last_progress_ts = now_ts + + total_items = int(progress.get("total_items") or 0) + browsed_items = int(progress.get("browsed_items") or 0) + total_attachments = int(progress.get("total_attachments") or 0) + viewed_attachments = int(progress.get("viewed_attachments") or 0) + + if total_items > 0: + account.total_items = total_items + else: + account.total_items = max(int(getattr(account, "total_items", 0) or 0), browsed_items) + + account.total_attachments = max(total_attachments, viewed_attachments) + safe_update_task_status( + account_id, {"progress": {"items": browsed_items, "attachments": viewed_attachments}} + ) + except Exception: + pass + checkpoint_mgr.update_stage(task_id, TaskStage.BROWSING, progress_percent=50) - result = api_browser.browse_content(browse_type=browse_type, should_stop_callback=should_stop) + result = api_browser.browse_content( + browse_type=browse_type, + should_stop_callback=should_stop, + progress_callback=on_browse_progress, + ) else: error_message = "登录失败" log_to_client(f"❌ {error_message}", user_id, account_id)