fix: 浏览内容/附件进度实时更新
This commit is contained in:
@@ -230,7 +230,7 @@ class APIBrowser:
|
|||||||
def get_article_list_page(self, bz: int = 2, page: int = 1, base_url: str = None):
|
def get_article_list_page(self, bz: int = 2, page: int = 1, base_url: str = None):
|
||||||
"""获取单页文章列表"""
|
"""获取单页文章列表"""
|
||||||
if not self.logged_in:
|
if not self.logged_in:
|
||||||
return [], 0, None
|
return [], 0, None, 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if base_url and page > 1:
|
if base_url and page > 1:
|
||||||
@@ -264,9 +264,10 @@ class APIBrowser:
|
|||||||
'article_id': article_id,
|
'article_id': article_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
# 获取总页数
|
# 获取总页数 / 总记录数
|
||||||
total_pages = 1
|
total_pages = 1
|
||||||
next_page_url = None
|
next_page_url = None
|
||||||
|
total_records = 0
|
||||||
|
|
||||||
page_content = soup.find(id='PageContent')
|
page_content = soup.find(id='PageContent')
|
||||||
if page_content:
|
if page_content:
|
||||||
@@ -282,11 +283,11 @@ class APIBrowser:
|
|||||||
if next_href:
|
if next_href:
|
||||||
next_page_url = f"{BASE_URL}/admin/{next_href}"
|
next_page_url = f"{BASE_URL}/admin/{next_href}"
|
||||||
|
|
||||||
return articles, total_pages, next_page_url
|
return articles, total_pages, next_page_url, total_records
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log(f"[API] 获取列表失败: {str(e)}")
|
self.log(f"[API] 获取列表失败: {str(e)}")
|
||||||
return [], 0, None
|
return [], 0, None, 0
|
||||||
|
|
||||||
def get_article_attachments(self, article_href: str):
|
def get_article_attachments(self, article_href: str):
|
||||||
"""获取文章的附件列表"""
|
"""获取文章的附件列表"""
|
||||||
@@ -338,14 +339,19 @@ class APIBrowser:
|
|||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def browse_content(self, browse_type: str,
|
def browse_content(
|
||||||
should_stop_callback: Optional[Callable] = None) -> APIBrowseResult:
|
self,
|
||||||
|
browse_type: str,
|
||||||
|
should_stop_callback: Optional[Callable] = None,
|
||||||
|
progress_callback: Optional[Callable] = None,
|
||||||
|
) -> APIBrowseResult:
|
||||||
"""
|
"""
|
||||||
浏览内容并标记已读
|
浏览内容并标记已读
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
browse_type: 浏览类型 (应读/注册前未读)
|
browse_type: 浏览类型 (应读/注册前未读)
|
||||||
should_stop_callback: 检查是否应该停止的回调函数
|
should_stop_callback: 检查是否应该停止的回调函数
|
||||||
|
progress_callback: 进度回调(可选),用于实时上报已处理/总数
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
浏览结果
|
浏览结果
|
||||||
@@ -368,13 +374,14 @@ class APIBrowser:
|
|||||||
self.log(f"[API] 开始浏览 '{browse_type}' (bz={bz})...")
|
self.log(f"[API] 开始浏览 '{browse_type}' (bz={bz})...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
total_items = 0
|
browsed_items = 0
|
||||||
total_attachments = 0
|
viewed_attachments = 0
|
||||||
|
discovered_attachments = 0
|
||||||
page = 1
|
page = 1
|
||||||
base_url = None
|
base_url = None
|
||||||
|
|
||||||
# 获取第一页
|
# 获取第一页
|
||||||
articles, total_pages, next_url = self.get_article_list_page(bz, page)
|
articles, total_pages, next_url, total_records = self.get_article_list_page(bz, page)
|
||||||
|
|
||||||
if not articles:
|
if not articles:
|
||||||
self.log(f"[API] '{browse_type}' 没有待处理内容")
|
self.log(f"[API] '{browse_type}' 没有待处理内容")
|
||||||
@@ -386,6 +393,32 @@ class APIBrowser:
|
|||||||
if next_url:
|
if next_url:
|
||||||
base_url = next_url
|
base_url = next_url
|
||||||
|
|
||||||
|
last_report_ts = 0.0
|
||||||
|
|
||||||
|
def report_progress(force: bool = False):
|
||||||
|
nonlocal last_report_ts
|
||||||
|
if not progress_callback:
|
||||||
|
return
|
||||||
|
now_ts = time.time()
|
||||||
|
if not force and now_ts - last_report_ts < 1.0:
|
||||||
|
return
|
||||||
|
last_report_ts = now_ts
|
||||||
|
try:
|
||||||
|
progress_callback(
|
||||||
|
{
|
||||||
|
"total_items": int(total_records or 0),
|
||||||
|
"browsed_items": int(browsed_items or 0),
|
||||||
|
"total_attachments": int(discovered_attachments or 0),
|
||||||
|
"viewed_attachments": int(viewed_attachments or 0),
|
||||||
|
"page": int(page or 0),
|
||||||
|
"total_pages": int(total_pages or 0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
report_progress(force=True)
|
||||||
|
|
||||||
# 处理所有页面
|
# 处理所有页面
|
||||||
while True:
|
while True:
|
||||||
if should_stop_callback and should_stop_callback():
|
if should_stop_callback and should_stop_callback():
|
||||||
@@ -397,17 +430,20 @@ class APIBrowser:
|
|||||||
break
|
break
|
||||||
|
|
||||||
title = article['title'][:30]
|
title = article['title'][:30]
|
||||||
total_items += 1
|
browsed_items += 1
|
||||||
|
|
||||||
# 获取附件
|
# 获取附件
|
||||||
attachments = self.get_article_attachments(article['href'])
|
attachments = self.get_article_attachments(article['href'])
|
||||||
|
|
||||||
if attachments:
|
if attachments:
|
||||||
|
discovered_attachments += len(attachments)
|
||||||
for attach in attachments:
|
for attach in attachments:
|
||||||
if self.mark_read(attach['id'], attach['channel_id']):
|
if self.mark_read(attach['id'], attach['channel_id']):
|
||||||
total_attachments += 1
|
viewed_attachments += 1
|
||||||
|
|
||||||
self.log(f"[API] [{total_items}] {title} - {len(attachments)}个附件")
|
self.log(f"[API] [{browsed_items}] {title} - {len(attachments)}个附件")
|
||||||
|
|
||||||
|
report_progress()
|
||||||
|
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
@@ -416,20 +452,22 @@ class APIBrowser:
|
|||||||
if page > total_pages:
|
if page > total_pages:
|
||||||
break
|
break
|
||||||
|
|
||||||
articles, _, next_url = self.get_article_list_page(bz, page, base_url)
|
articles, _, next_url, _ = self.get_article_list_page(bz, page, base_url)
|
||||||
if not articles:
|
if not articles:
|
||||||
break
|
break
|
||||||
|
|
||||||
if next_url:
|
if next_url:
|
||||||
base_url = next_url
|
base_url = next_url
|
||||||
|
|
||||||
|
report_progress(force=True)
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
|
||||||
self.log(f"[API] 浏览完成: {total_items} 条内容,{total_attachments} 个附件")
|
report_progress(force=True)
|
||||||
|
self.log(f"[API] 浏览完成: {browsed_items} 条内容,{viewed_attachments} 个附件")
|
||||||
|
|
||||||
result.success = True
|
result.success = True
|
||||||
result.total_items = total_items
|
result.total_items = browsed_items
|
||||||
result.total_attachments = total_attachments
|
result.total_attachments = viewed_attachments
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -590,12 +590,46 @@ def run_task(user_id, account_id, browse_type, enable_screenshot=True, source="m
|
|||||||
|
|
||||||
safe_update_task_status(account_id, {"detail_status": "正在浏览"})
|
safe_update_task_status(account_id, {"detail_status": "正在浏览"})
|
||||||
log_to_client(f"开始浏览 '{browse_type}' 内容...", user_id, account_id)
|
log_to_client(f"开始浏览 '{browse_type}' 内容...", user_id, account_id)
|
||||||
|
account.total_items = 0
|
||||||
|
account.total_attachments = 0
|
||||||
|
safe_update_task_status(account_id, {"progress": {"items": 0, "attachments": 0}})
|
||||||
|
|
||||||
def should_stop():
|
def should_stop():
|
||||||
return account.should_stop
|
return account.should_stop
|
||||||
|
|
||||||
|
last_progress_ts = 0.0
|
||||||
|
|
||||||
|
def on_browse_progress(progress: dict):
|
||||||
|
nonlocal last_progress_ts
|
||||||
|
try:
|
||||||
|
now_ts = time_module.time()
|
||||||
|
if now_ts - last_progress_ts < 0.5:
|
||||||
|
return
|
||||||
|
last_progress_ts = now_ts
|
||||||
|
|
||||||
|
total_items = int(progress.get("total_items") or 0)
|
||||||
|
browsed_items = int(progress.get("browsed_items") or 0)
|
||||||
|
total_attachments = int(progress.get("total_attachments") or 0)
|
||||||
|
viewed_attachments = int(progress.get("viewed_attachments") or 0)
|
||||||
|
|
||||||
|
if total_items > 0:
|
||||||
|
account.total_items = total_items
|
||||||
|
else:
|
||||||
|
account.total_items = max(int(getattr(account, "total_items", 0) or 0), browsed_items)
|
||||||
|
|
||||||
|
account.total_attachments = max(total_attachments, viewed_attachments)
|
||||||
|
safe_update_task_status(
|
||||||
|
account_id, {"progress": {"items": browsed_items, "attachments": viewed_attachments}}
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
checkpoint_mgr.update_stage(task_id, TaskStage.BROWSING, progress_percent=50)
|
checkpoint_mgr.update_stage(task_id, TaskStage.BROWSING, progress_percent=50)
|
||||||
result = api_browser.browse_content(browse_type=browse_type, should_stop_callback=should_stop)
|
result = api_browser.browse_content(
|
||||||
|
browse_type=browse_type,
|
||||||
|
should_stop_callback=should_stop,
|
||||||
|
progress_callback=on_browse_progress,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
error_message = "登录失败"
|
error_message = "登录失败"
|
||||||
log_to_client(f"❌ {error_message}", user_id, account_id)
|
log_to_client(f"❌ {error_message}", user_id, account_id)
|
||||||
|
|||||||
Reference in New Issue
Block a user