#!/bin/bash # # Print service watchdog for the CUPS print server. # It checks systemd service state plus quick CUPS command/HTTP probes. # When checks fail repeatedly, it restarts CUPS and related services. # set -u CONFIG_FILE="${CUPS_PRINT_WATCHDOG_CONFIG:-/etc/cups-watchdog/print-watchdog.conf}" STATE_DIR="/run/cups-watchdog" SERVICES="cups avahi-daemon cups-driver-manager" CUPS_URL="http://127.0.0.1:631/" CHECK_CUPS_HTTP=1 CHECK_LPSTAT=1 COMMAND_TIMEOUT=8 FAIL_THRESHOLD=2 RESTART_COOLDOWN=60 LOG_FILE="/var/log/cups-watchdog/print.log" if [ -f "$CONFIG_FILE" ]; then # shellcheck disable=SC1090 . "$CONFIG_FILE" fi mkdir -p "$STATE_DIR" mkdir -p "$(dirname "$LOG_FILE")" 2>/dev/null || true log_msg() { local msg="$1" local line line="$(date '+%Y-%m-%d %H:%M:%S') [print-watchdog] $msg" echo "$line" echo "$line" >> "$LOG_FILE" 2>/dev/null || true } unit_exists() { local service="$1" systemctl list-unit-files --no-legend "$service.service" 2>/dev/null | awk '{print $1}' | grep -qx "$service.service" && return 0 systemctl list-units --all --no-legend "$service.service" 2>/dev/null | awk '{print $1}' | grep -qx "$service.service" } service_should_check() { local service="$1" unit_exists "$service" || return 1 if systemctl is-enabled --quiet "$service.service" 2>/dev/null; then return 0 fi if systemctl is-active --quiet "$service.service" 2>/dev/null; then return 0 fi return 1 } check_systemd_services() { local service local failed=0 for service in $SERVICES; do service_should_check "$service" || continue if ! systemctl is-active --quiet "$service.service" 2>/dev/null; then log_msg "service not active: $service" failed=1 fi done return "$failed" } check_lpstat() { [ "$CHECK_LPSTAT" = "1" ] || return 0 command -v lpstat >/dev/null 2>&1 || return 0 timeout "$COMMAND_TIMEOUT" lpstat -r >/dev/null 2>&1 } check_http() { [ "$CHECK_CUPS_HTTP" = "1" ] || return 0 command -v curl >/dev/null 2>&1 || return 0 curl -fsS --max-time "$COMMAND_TIMEOUT" "$CUPS_URL" >/dev/null 2>&1 } restart_print_stack() { local now local last_file="$STATE_DIR/print.last_restart" local last=0 local service now="$(date +%s)" if [ -f "$last_file" ]; then last="$(cat "$last_file" 2>/dev/null || echo 0)" fi if [ $((now - last)) -lt "$RESTART_COOLDOWN" ]; then log_msg "restart skipped: cooldown active" return 0 fi log_msg "restarting print stack" systemctl reset-failed cups.service >/dev/null 2>&1 || true systemctl restart cups.service for service in avahi-daemon cups-driver-manager; do service_should_check "$service" || continue systemctl reset-failed "$service.service" >/dev/null 2>&1 || true systemctl restart "$service.service" >/dev/null 2>&1 || true done echo "$now" > "$last_file" } check_print_stack() { local count_file="$STATE_DIR/print.fail_count" local fail_count=0 local failed=0 check_systemd_services || failed=1 if ! check_lpstat; then log_msg "lpstat check failed or timed out" failed=1 fi if ! check_http; then log_msg "CUPS HTTP check failed or timed out" failed=1 fi if [ "$failed" = "0" ]; then echo 0 > "$count_file" log_msg "print stack ok" return 0 fi if [ -f "$count_file" ]; then fail_count="$(cat "$count_file" 2>/dev/null || echo 0)" fi fail_count=$((fail_count + 1)) echo "$fail_count" > "$count_file" log_msg "print stack failed ($fail_count/$FAIL_THRESHOLD)" if [ "$fail_count" -ge "$FAIL_THRESHOLD" ]; then restart_print_stack echo 0 > "$count_file" fi } case "${1:-check}" in check) check_print_stack ;; restart) restart_print_stack ;; *) echo "Usage: $0 [check|restart]" exit 2 ;; esac