Files
S905L3A/watchdog/print-watchdog.sh
2026-06-09 14:58:13 +08:00

163 lines
4.0 KiB
Bash

#!/bin/bash
#
# Print service watchdog for the CUPS print server.
# It checks systemd service state plus quick CUPS command/HTTP probes.
# When checks fail repeatedly, it restarts CUPS and related services.
#
set -u
CONFIG_FILE="${CUPS_PRINT_WATCHDOG_CONFIG:-/etc/cups-watchdog/print-watchdog.conf}"
STATE_DIR="/run/cups-watchdog"
SERVICES="cups avahi-daemon cups-driver-manager"
CUPS_URL="http://127.0.0.1:631/"
CHECK_CUPS_HTTP=1
CHECK_LPSTAT=1
COMMAND_TIMEOUT=8
FAIL_THRESHOLD=2
RESTART_COOLDOWN=60
LOG_FILE="/var/log/cups-watchdog/print.log"
if [ -f "$CONFIG_FILE" ]; then
# shellcheck disable=SC1090
. "$CONFIG_FILE"
fi
mkdir -p "$STATE_DIR"
mkdir -p "$(dirname "$LOG_FILE")" 2>/dev/null || true
log_msg() {
local msg="$1"
local line
line="$(date '+%Y-%m-%d %H:%M:%S') [print-watchdog] $msg"
echo "$line"
echo "$line" >> "$LOG_FILE" 2>/dev/null || true
}
unit_exists() {
local service="$1"
systemctl list-unit-files --no-legend "$service.service" 2>/dev/null | awk '{print $1}' | grep -qx "$service.service" && return 0
systemctl list-units --all --no-legend "$service.service" 2>/dev/null | awk '{print $1}' | grep -qx "$service.service"
}
service_should_check() {
local service="$1"
unit_exists "$service" || return 1
if systemctl is-enabled --quiet "$service.service" 2>/dev/null; then
return 0
fi
if systemctl is-active --quiet "$service.service" 2>/dev/null; then
return 0
fi
return 1
}
check_systemd_services() {
local service
local failed=0
for service in $SERVICES; do
service_should_check "$service" || continue
if ! systemctl is-active --quiet "$service.service" 2>/dev/null; then
log_msg "service not active: $service"
failed=1
fi
done
return "$failed"
}
check_lpstat() {
[ "$CHECK_LPSTAT" = "1" ] || return 0
command -v lpstat >/dev/null 2>&1 || return 0
timeout "$COMMAND_TIMEOUT" lpstat -r >/dev/null 2>&1
}
check_http() {
[ "$CHECK_CUPS_HTTP" = "1" ] || return 0
command -v curl >/dev/null 2>&1 || return 0
curl -fsS --max-time "$COMMAND_TIMEOUT" "$CUPS_URL" >/dev/null 2>&1
}
restart_print_stack() {
local now
local last_file="$STATE_DIR/print.last_restart"
local last=0
local service
now="$(date +%s)"
if [ -f "$last_file" ]; then
last="$(cat "$last_file" 2>/dev/null || echo 0)"
fi
if [ $((now - last)) -lt "$RESTART_COOLDOWN" ]; then
log_msg "restart skipped: cooldown active"
return 0
fi
log_msg "restarting print stack"
systemctl reset-failed cups.service >/dev/null 2>&1 || true
systemctl restart cups.service
for service in avahi-daemon cups-driver-manager; do
service_should_check "$service" || continue
systemctl reset-failed "$service.service" >/dev/null 2>&1 || true
systemctl restart "$service.service" >/dev/null 2>&1 || true
done
echo "$now" > "$last_file"
}
check_print_stack() {
local count_file="$STATE_DIR/print.fail_count"
local fail_count=0
local failed=0
check_systemd_services || failed=1
if ! check_lpstat; then
log_msg "lpstat check failed or timed out"
failed=1
fi
if ! check_http; then
log_msg "CUPS HTTP check failed or timed out"
failed=1
fi
if [ "$failed" = "0" ]; then
echo 0 > "$count_file"
log_msg "print stack ok"
return 0
fi
if [ -f "$count_file" ]; then
fail_count="$(cat "$count_file" 2>/dev/null || echo 0)"
fi
fail_count=$((fail_count + 1))
echo "$fail_count" > "$count_file"
log_msg "print stack failed ($fail_count/$FAIL_THRESHOLD)"
if [ "$fail_count" -ge "$FAIL_THRESHOLD" ]; then
restart_print_stack
echo 0 > "$count_file"
fi
}
case "${1:-check}" in
check)
check_print_stack
;;
restart)
restart_print_stack
;;
*)
echo "Usage: $0 [check|restart]"
exit 2
;;
esac