fix: use process uptime and host-service stats fallback

This commit is contained in:
2026-02-07 09:13:20 +08:00
parent d097571f62
commit f7832c3c15
2 changed files with 111 additions and 76 deletions

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import os
import socket
import time
from datetime import datetime
@@ -36,8 +37,8 @@ def get_browser_pool_stats():
stats = pool.get_stats() or {}
worker_details = []
for w in stats.get("workers") or []:
last_ts = float(w.get("last_active_ts") or 0)
for worker in stats.get("workers") or []:
last_ts = float(worker.get("last_active_ts") or 0)
last_active_at = None
if last_ts > 0:
try:
@@ -45,7 +46,7 @@ def get_browser_pool_stats():
except Exception:
last_active_at = None
created_ts = w.get("browser_created_at")
created_ts = worker.get("browser_created_at")
created_at = None
if created_ts:
try:
@@ -55,17 +56,17 @@ def get_browser_pool_stats():
worker_details.append(
{
"worker_id": w.get("worker_id"),
"idle": bool(w.get("idle")),
"has_browser": bool(w.get("has_browser")),
"total_tasks": int(w.get("total_tasks") or 0),
"failed_tasks": int(w.get("failed_tasks") or 0),
"browser_use_count": int(w.get("browser_use_count") or 0),
"worker_id": worker.get("worker_id"),
"idle": bool(worker.get("idle")),
"has_browser": bool(worker.get("has_browser")),
"total_tasks": int(worker.get("total_tasks") or 0),
"failed_tasks": int(worker.get("failed_tasks") or 0),
"browser_use_count": int(worker.get("browser_use_count") or 0),
"browser_created_at": created_at,
"browser_created_ts": created_ts,
"last_active_at": last_active_at,
"last_active_ts": last_ts,
"thread_alive": bool(w.get("thread_alive")),
"thread_alive": bool(worker.get("thread_alive")),
}
)
@@ -90,12 +91,47 @@ def get_browser_pool_stats():
return jsonify({"error": "获取截图线程池状态失败"}), 500
def _format_duration(seconds: int) -> str:
total = max(0, int(seconds or 0))
days = total // 86400
hours = (total % 86400) // 3600
minutes = (total % 3600) // 60
if days > 0:
return f"{days}{hours}小时{minutes}分钟"
if hours > 0:
return f"{hours}小时{minutes}分钟"
return f"{minutes}分钟"
def _fill_host_service_stats(docker_status: dict) -> None:
import psutil
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
virtual_memory = psutil.virtual_memory()
rss_bytes = float(memory_info.rss or 0)
total_bytes = float(virtual_memory.total or 0)
memory_percent = (rss_bytes / total_bytes * 100.0) if total_bytes > 0 else 0.0
docker_status.update(
{
"running": True,
"status": "Host Service",
"container_name": f"host:{socket.gethostname()}",
"uptime": _format_duration(int(time.time() - float(process.create_time() or time.time()))),
"memory_usage": f"{rss_bytes / 1024 / 1024:.2f} MB",
"memory_limit": f"{total_bytes / 1024 / 1024 / 1024:.2f} GB" if total_bytes > 0 else "N/A",
"memory_percent": f"{memory_percent:.2f}%",
"cpu_percent": f"{max(0.0, float(process.cpu_percent(interval=0.1))):.2f}%",
}
)
@admin_api_bp.route("/docker_stats", methods=["GET"])
@admin_required
def get_docker_stats():
"""获取Docker容器运行状态"""
import subprocess
"""获取容器运行状态(非容器部署时返回当前服务进程状态)"""
docker_status = {
"running": False,
"container_name": "N/A",
@@ -112,115 +148,101 @@ def get_docker_stats():
docker_status["running"] = True
try:
with open("/etc/hostname", "r") as f:
docker_status["container_name"] = f.read().strip()
with open("/etc/hostname", "r", encoding="utf-8") as f:
docker_status["container_name"] = f.read().strip() or "N/A"
except Exception as e:
logger.debug(f"读取容器名称失败: {e}")
try:
if os.path.exists("/sys/fs/cgroup/memory.current"):
with open("/sys/fs/cgroup/memory.current", "r") as f:
with open("/sys/fs/cgroup/memory.current", "r", encoding="utf-8") as f:
mem_total = int(f.read().strip())
cache = 0
if os.path.exists("/sys/fs/cgroup/memory.stat"):
with open("/sys/fs/cgroup/memory.stat", "r") as f:
with open("/sys/fs/cgroup/memory.stat", "r", encoding="utf-8") as f:
for line in f:
if line.startswith("inactive_file "):
cache = int(line.split()[1])
break
mem_bytes = mem_total - cache
docker_status["memory_usage"] = "{:.2f} MB".format(mem_bytes / 1024 / 1024)
mem_bytes = max(0, mem_total - cache)
docker_status["memory_usage"] = f"{mem_bytes / 1024 / 1024:.2f} MB"
if os.path.exists("/sys/fs/cgroup/memory.max"):
with open("/sys/fs/cgroup/memory.max", "r") as f:
with open("/sys/fs/cgroup/memory.max", "r", encoding="utf-8") as f:
limit_str = f.read().strip()
if limit_str != "max":
limit_bytes = int(limit_str)
docker_status["memory_limit"] = "{:.2f} GB".format(limit_bytes / 1024 / 1024 / 1024)
docker_status["memory_percent"] = "{:.2f}%".format(mem_bytes / limit_bytes * 100)
if limit_bytes > 0:
docker_status["memory_limit"] = f"{limit_bytes / 1024 / 1024 / 1024:.2f} GB"
docker_status["memory_percent"] = f"{mem_bytes / limit_bytes * 100:.2f}%"
elif os.path.exists("/sys/fs/cgroup/memory/memory.usage_in_bytes"):
with open("/sys/fs/cgroup/memory/memory.usage_in_bytes", "r") as f:
with open("/sys/fs/cgroup/memory/memory.usage_in_bytes", "r", encoding="utf-8") as f:
mem_bytes = int(f.read().strip())
docker_status["memory_usage"] = "{:.2f} MB".format(mem_bytes / 1024 / 1024)
docker_status["memory_usage"] = f"{mem_bytes / 1024 / 1024:.2f} MB"
with open("/sys/fs/cgroup/memory/memory.limit_in_bytes", "r") as f:
with open("/sys/fs/cgroup/memory/memory.limit_in_bytes", "r", encoding="utf-8") as f:
limit_bytes = int(f.read().strip())
if limit_bytes < 1e18:
docker_status["memory_limit"] = "{:.2f} GB".format(limit_bytes / 1024 / 1024 / 1024)
docker_status["memory_percent"] = "{:.2f}%".format(mem_bytes / limit_bytes * 100)
if 0 < limit_bytes < 1e18:
docker_status["memory_limit"] = f"{limit_bytes / 1024 / 1024 / 1024:.2f} GB"
docker_status["memory_percent"] = f"{mem_bytes / limit_bytes * 100:.2f}%"
except Exception as e:
logger.debug(f"读取内存信息失败: {e}")
logger.debug(f"读取容器内存信息失败: {e}")
try:
if os.path.exists("/sys/fs/cgroup/cpu.stat"):
cpu_usage = 0
with open("/sys/fs/cgroup/cpu.stat", "r") as f:
usage1 = 0
with open("/sys/fs/cgroup/cpu.stat", "r", encoding="utf-8") as f:
for line in f:
if line.startswith("usage_usec"):
cpu_usage = int(line.split()[1])
usage1 = int(line.split()[1])
break
time.sleep(0.1)
cpu_usage2 = 0
with open("/sys/fs/cgroup/cpu.stat", "r") as f:
usage2 = 0
with open("/sys/fs/cgroup/cpu.stat", "r", encoding="utf-8") as f:
for line in f:
if line.startswith("usage_usec"):
cpu_usage2 = int(line.split()[1])
usage2 = int(line.split()[1])
break
cpu_percent = (cpu_usage2 - cpu_usage) / 0.1 / 1e6 * 100
docker_status["cpu_percent"] = "{:.2f}%".format(cpu_percent)
cpu_percent = (usage2 - usage1) / 0.1 / 1e6 * 100
docker_status["cpu_percent"] = f"{max(0.0, cpu_percent):.2f}%"
elif os.path.exists("/sys/fs/cgroup/cpu/cpuacct.usage"):
with open("/sys/fs/cgroup/cpu/cpuacct.usage", "r") as f:
cpu_usage = int(f.read().strip())
with open("/sys/fs/cgroup/cpu/cpuacct.usage", "r", encoding="utf-8") as f:
usage1 = int(f.read().strip())
time.sleep(0.1)
with open("/sys/fs/cgroup/cpu/cpuacct.usage", "r") as f:
cpu_usage2 = int(f.read().strip())
cpu_percent = (cpu_usage2 - cpu_usage) / 0.1 / 1e9 * 100
docker_status["cpu_percent"] = "{:.2f}%".format(cpu_percent)
with open("/sys/fs/cgroup/cpu/cpuacct.usage", "r", encoding="utf-8") as f:
usage2 = int(f.read().strip())
cpu_percent = (usage2 - usage1) / 0.1 / 1e9 * 100
docker_status["cpu_percent"] = f"{max(0.0, cpu_percent):.2f}%"
except Exception as e:
logger.debug(f"读取CPU信息失败: {e}")
logger.debug(f"读取容器CPU信息失败: {e}")
try:
# 读取系统运行时间
with open('/proc/uptime', 'r') as f:
with open("/proc/uptime", "r", encoding="utf-8") as f:
system_uptime = float(f.read().split()[0])
# 读取 PID 1 的启动时间 (jiffies)
with open('/proc/1/stat', 'r') as f:
with open("/proc/1/stat", "r", encoding="utf-8") as f:
stat = f.read().split()
starttime_jiffies = int(stat[21])
# 获取 CLK_TCK (通常是 100)
clk_tck = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
# 计算容器运行时长(秒)
container_uptime_seconds = system_uptime - (starttime_jiffies / clk_tck)
# 格式化为可读字符串
days = int(container_uptime_seconds // 86400)
hours = int((container_uptime_seconds % 86400) // 3600)
minutes = int((container_uptime_seconds % 3600) // 60)
if days > 0:
docker_status["uptime"] = f"{days}{hours}小时{minutes}分钟"
elif hours > 0:
docker_status["uptime"] = f"{hours}小时{minutes}分钟"
else:
docker_status["uptime"] = f"{minutes}分钟"
clk_tck = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
uptime_seconds = int(system_uptime - (starttime_jiffies / clk_tck))
docker_status["uptime"] = _format_duration(uptime_seconds)
except Exception as e:
logger.debug(f"获取容器运行时间失败: {e}")
docker_status["status"] = "Running"
else:
docker_status["status"] = "Not in Docker"
_fill_host_service_stats(docker_status)
except Exception as e:
docker_status["status"] = f"Error: {str(e)}"
logger.exception(f"获取容器/服务状态失败: {e}")
docker_status["status"] = f"Error: {e}"
return jsonify(docker_status)

View File

@@ -211,11 +211,24 @@ def get_server_info_api():
disk_used = f"{disk.used / (1024**3):.1f}GB"
disk_percent = disk.percent
boot_time = datetime.fromtimestamp(psutil.boot_time(), tz=BEIJING_TZ)
uptime_delta = get_beijing_now() - boot_time
days = uptime_delta.days
hours = uptime_delta.seconds // 3600
uptime = f"{days}{hours}小时"
try:
process = psutil.Process()
process_start_at = datetime.fromtimestamp(process.create_time(), tz=BEIJING_TZ)
uptime_delta = get_beijing_now() - process_start_at
except Exception:
boot_time = datetime.fromtimestamp(psutil.boot_time(), tz=BEIJING_TZ)
uptime_delta = get_beijing_now() - boot_time
uptime_seconds = max(0, int(uptime_delta.total_seconds()))
days = uptime_seconds // 86400
hours = (uptime_seconds % 86400) // 3600
minutes = (uptime_seconds % 3600) // 60
if days > 0:
uptime = f"{days}{hours}小时"
elif hours > 0:
uptime = f"{hours}小时{minutes}分钟"
else:
uptime = f"{minutes}分钟"
return jsonify(
{