fix: 浏览内容/附件进度实时更新

This commit is contained in:
2025-12-16 19:51:27 +08:00
parent 2abb9ab494
commit 71cc518fe8
2 changed files with 89 additions and 17 deletions

View File

@@ -230,7 +230,7 @@ class APIBrowser:
def get_article_list_page(self, bz: int = 2, page: int = 1, base_url: str = None):
"""获取单页文章列表"""
if not self.logged_in:
return [], 0, None
return [], 0, None, 0
try:
if base_url and page > 1:
@@ -264,9 +264,10 @@ class APIBrowser:
'article_id': article_id,
})
# 获取总页数
# 获取总页数 / 总记录数
total_pages = 1
next_page_url = None
total_records = 0
page_content = soup.find(id='PageContent')
if page_content:
@@ -282,11 +283,11 @@ class APIBrowser:
if next_href:
next_page_url = f"{BASE_URL}/admin/{next_href}"
return articles, total_pages, next_page_url
return articles, total_pages, next_page_url, total_records
except Exception as e:
self.log(f"[API] 获取列表失败: {str(e)}")
return [], 0, None
return [], 0, None, 0
def get_article_attachments(self, article_href: str):
"""获取文章的附件列表"""
@@ -338,14 +339,19 @@ class APIBrowser:
except:
return False
def browse_content(self, browse_type: str,
should_stop_callback: Optional[Callable] = None) -> APIBrowseResult:
def browse_content(
self,
browse_type: str,
should_stop_callback: Optional[Callable] = None,
progress_callback: Optional[Callable] = None,
) -> APIBrowseResult:
"""
浏览内容并标记已读
Args:
browse_type: 浏览类型 (应读/注册前未读)
should_stop_callback: 检查是否应该停止的回调函数
progress_callback: 进度回调(可选),用于实时上报已处理/总数
Returns:
浏览结果
@@ -368,13 +374,14 @@ class APIBrowser:
self.log(f"[API] 开始浏览 '{browse_type}' (bz={bz})...")
try:
total_items = 0
total_attachments = 0
browsed_items = 0
viewed_attachments = 0
discovered_attachments = 0
page = 1
base_url = None
# 获取第一页
articles, total_pages, next_url = self.get_article_list_page(bz, page)
articles, total_pages, next_url, total_records = self.get_article_list_page(bz, page)
if not articles:
self.log(f"[API] '{browse_type}' 没有待处理内容")
@@ -386,6 +393,32 @@ class APIBrowser:
if next_url:
base_url = next_url
last_report_ts = 0.0
def report_progress(force: bool = False):
nonlocal last_report_ts
if not progress_callback:
return
now_ts = time.time()
if not force and now_ts - last_report_ts < 1.0:
return
last_report_ts = now_ts
try:
progress_callback(
{
"total_items": int(total_records or 0),
"browsed_items": int(browsed_items or 0),
"total_attachments": int(discovered_attachments or 0),
"viewed_attachments": int(viewed_attachments or 0),
"page": int(page or 0),
"total_pages": int(total_pages or 0),
}
)
except Exception:
pass
report_progress(force=True)
# 处理所有页面
while True:
if should_stop_callback and should_stop_callback():
@@ -397,17 +430,20 @@ class APIBrowser:
break
title = article['title'][:30]
total_items += 1
browsed_items += 1
# 获取附件
attachments = self.get_article_attachments(article['href'])
if attachments:
discovered_attachments += len(attachments)
for attach in attachments:
if self.mark_read(attach['id'], attach['channel_id']):
total_attachments += 1
viewed_attachments += 1
self.log(f"[API] [{total_items}] {title} - {len(attachments)}个附件")
self.log(f"[API] [{browsed_items}] {title} - {len(attachments)}个附件")
report_progress()
time.sleep(0.1)
@@ -416,20 +452,22 @@ class APIBrowser:
if page > total_pages:
break
articles, _, next_url = self.get_article_list_page(bz, page, base_url)
articles, _, next_url, _ = self.get_article_list_page(bz, page, base_url)
if not articles:
break
if next_url:
base_url = next_url
report_progress(force=True)
time.sleep(0.2)
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件")
report_progress(force=True)
self.log(f"[API] 浏览完成: {browsed_items} 条内容,{viewed_attachments} 个附件")
result.success = True
result.total_items = total_items
result.total_attachments = total_attachments
result.total_items = browsed_items
result.total_attachments = viewed_attachments
return result
except Exception as e:

View File

@@ -590,12 +590,46 @@ def run_task(user_id, account_id, browse_type, enable_screenshot=True, source="m
safe_update_task_status(account_id, {"detail_status": "正在浏览"})
log_to_client(f"开始浏览 '{browse_type}' 内容...", user_id, account_id)
account.total_items = 0
account.total_attachments = 0
safe_update_task_status(account_id, {"progress": {"items": 0, "attachments": 0}})
def should_stop():
return account.should_stop
last_progress_ts = 0.0
def on_browse_progress(progress: dict):
nonlocal last_progress_ts
try:
now_ts = time_module.time()
if now_ts - last_progress_ts < 0.5:
return
last_progress_ts = now_ts
total_items = int(progress.get("total_items") or 0)
browsed_items = int(progress.get("browsed_items") or 0)
total_attachments = int(progress.get("total_attachments") or 0)
viewed_attachments = int(progress.get("viewed_attachments") or 0)
if total_items > 0:
account.total_items = total_items
else:
account.total_items = max(int(getattr(account, "total_items", 0) or 0), browsed_items)
account.total_attachments = max(total_attachments, viewed_attachments)
safe_update_task_status(
account_id, {"progress": {"items": browsed_items, "attachments": viewed_attachments}}
)
except Exception:
pass
checkpoint_mgr.update_stage(task_id, TaskStage.BROWSING, progress_percent=50)
result = api_browser.browse_content(browse_type=browse_type, should_stop_callback=should_stop)
result = api_browser.browse_content(
browse_type=browse_type,
should_stop_callback=should_stop,
progress_callback=on_browse_progress,
)
else:
error_message = "登录失败"
log_to_client(f"{error_message}", user_id, account_id)