feat: add CUPS watchdog timers

This commit is contained in:
2026-06-09 14:58:13 +08:00
parent 2a688d2514
commit 00d512f9d9
11 changed files with 668 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
[Unit]
Description=CUPS network watchdog
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/opt/cups-watchdog/network-watchdog.sh check

View File

@@ -0,0 +1,11 @@
[Unit]
Description=Run CUPS network watchdog every minute
[Timer]
OnBootSec=2min
OnUnitActiveSec=1min
AccuracySec=15s
Persistent=true
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,8 @@
[Unit]
Description=CUPS print service watchdog
After=cups.service
Wants=cups.service
[Service]
Type=oneshot
ExecStart=/opt/cups-watchdog/print-watchdog.sh check

View File

@@ -0,0 +1,11 @@
[Unit]
Description=Run CUPS print service watchdog every minute
[Timer]
OnBootSec=2min
OnUnitActiveSec=1min
AccuracySec=15s
Persistent=true
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,275 @@
#!/bin/bash
#
# Network watchdog for the CUPS print server.
# Default behavior: when connectivity checks fail repeatedly, switch the
# configured interface back to DHCP so the box can regain network access.
#
# Edit /etc/cups-watchdog/network-watchdog.conf to set STATIC_IP,
# STATIC_PREFIX, STATIC_GATEWAY and STATIC_DNS. You can also run:
# /opt/cups-watchdog/network-watchdog.sh static
# /opt/cups-watchdog/network-watchdog.sh dhcp
#
set -u
CONFIG_FILE="${CUPS_NETWORK_WATCHDOG_CONFIG:-/etc/cups-watchdog/network-watchdog.conf}"
STATE_DIR="/run/cups-watchdog"
INTERFACE=""
STATIC_IP=""
STATIC_PREFIX="24"
STATIC_GATEWAY=""
STATIC_DNS="114.114.114.114 223.5.5.5"
PING_TARGETS="223.5.5.5 114.114.114.114"
FAIL_THRESHOLD=3
DHCP_AFTER_FAILURE=1
LOG_FILE="/var/log/cups-watchdog/network.log"
if [ -f "$CONFIG_FILE" ]; then
# shellcheck disable=SC1090
. "$CONFIG_FILE"
fi
mkdir -p "$STATE_DIR"
mkdir -p "$(dirname "$LOG_FILE")" 2>/dev/null || true
log_msg() {
local msg="$1"
local line
line="$(date '+%Y-%m-%d %H:%M:%S') [network-watchdog] $msg"
echo "$line"
echo "$line" >> "$LOG_FILE" 2>/dev/null || true
}
detect_interface() {
if [ -n "${INTERFACE:-}" ] && ip link show "$INTERFACE" >/dev/null 2>&1; then
echo "$INTERFACE"
return 0
fi
ip route show default 2>/dev/null | awk '{print $5; exit}'
}
nm_connection_for_interface() {
local iface="$1"
nmcli -t -f NAME,DEVICE con show --active 2>/dev/null | awk -F: -v iface="$iface" '$2 == iface {print $1; exit}'
}
configure_dhcp_nmcli() {
local iface="$1"
local conn
conn="$(nm_connection_for_interface "$iface")"
[ -n "$conn" ] || conn="$iface"
nmcli con mod "$conn" ipv4.method auto ipv4.addresses "" ipv4.gateway "" ipv4.dns "" >/dev/null
nmcli con down "$conn" >/dev/null 2>&1 || true
nmcli con up "$conn" >/dev/null
}
configure_dhcp_netplan() {
local iface="$1"
local target_file="/etc/netplan/99-cups-watchdog-dhcp.yaml"
cat > "$target_file" << EOF
# DHCP recovery config generated by cups network watchdog.
network:
version: 2
renderer: networkd
ethernets:
$iface:
dhcp4: yes
EOF
chmod 600 "$target_file"
rm -f /etc/netplan/01-static-ip.yaml /etc/netplan/99-cups-static-ip.yaml /etc/netplan/99-cups-watchdog-static.yaml 2>/dev/null || true
netplan apply
}
configure_dhcp_interfaces() {
local iface="$1"
if [ -f /etc/network/interfaces ]; then
cp /etc/network/interfaces /etc/network/interfaces.cups-watchdog.bak
fi
cat > /etc/network/interfaces << EOF
# DHCP recovery config generated by cups network watchdog.
auto lo
iface lo inet loopback
auto $iface
iface $iface inet dhcp
EOF
systemctl restart networking 2>/dev/null || /etc/init.d/networking restart 2>/dev/null || true
}
configure_dhcp() {
local iface
iface="$(detect_interface)"
if [ -z "$iface" ]; then
log_msg "cannot switch to DHCP: network interface not found"
return 1
fi
log_msg "switching $iface to DHCP"
if command -v nmcli >/dev/null 2>&1 && systemctl is-active --quiet NetworkManager 2>/dev/null; then
configure_dhcp_nmcli "$iface"
elif [ -d /etc/netplan ] && command -v netplan >/dev/null 2>&1; then
configure_dhcp_netplan "$iface"
else
configure_dhcp_interfaces "$iface"
fi
}
configure_static_nmcli() {
local iface="$1"
local conn
conn="$(nm_connection_for_interface "$iface")"
[ -n "$conn" ] || conn="$iface"
nmcli con mod "$conn" ipv4.method manual ipv4.addresses "$STATIC_IP/$STATIC_PREFIX" ipv4.gateway "$STATIC_GATEWAY" ipv4.dns "$STATIC_DNS" >/dev/null
nmcli con down "$conn" >/dev/null 2>&1 || true
nmcli con up "$conn" >/dev/null
}
configure_static_netplan() {
local iface="$1"
local target_file="/etc/netplan/99-cups-watchdog-static.yaml"
local dns_list=""
local d
for d in $STATIC_DNS; do
dns_list="$dns_list, $d"
done
dns_list="${dns_list#, }"
cat > "$target_file" << EOF
# Static network config generated by cups network watchdog.
network:
version: 2
renderer: networkd
ethernets:
$iface:
dhcp4: no
addresses:
- $STATIC_IP/$STATIC_PREFIX
routes:
- to: default
via: $STATIC_GATEWAY
nameservers:
addresses: [$dns_list]
EOF
chmod 600 "$target_file"
rm -f /etc/netplan/01-dhcp.yaml /etc/netplan/99-cups-dhcp.yaml /etc/netplan/99-cups-watchdog-dhcp.yaml 2>/dev/null || true
netplan apply
}
configure_static_interfaces() {
local iface="$1"
local netmask="255.255.255.0"
case "$STATIC_PREFIX" in
8) netmask="255.0.0.0" ;;
16) netmask="255.255.0.0" ;;
24) netmask="255.255.255.0" ;;
25) netmask="255.255.255.128" ;;
26) netmask="255.255.255.192" ;;
27) netmask="255.255.255.224" ;;
28) netmask="255.255.255.240" ;;
29) netmask="255.255.255.248" ;;
30) netmask="255.255.255.252" ;;
esac
if [ -f /etc/network/interfaces ]; then
cp /etc/network/interfaces /etc/network/interfaces.cups-watchdog.bak
fi
cat > /etc/network/interfaces << EOF
# Static network config generated by cups network watchdog.
auto lo
iface lo inet loopback
auto $iface
iface $iface inet static
address $STATIC_IP
netmask $netmask
gateway $STATIC_GATEWAY
dns-nameservers $STATIC_DNS
EOF
systemctl restart networking 2>/dev/null || /etc/init.d/networking restart 2>/dev/null || true
}
configure_static() {
local iface
iface="$(detect_interface)"
if [ -z "$iface" ]; then
log_msg "cannot switch to static: network interface not found"
return 1
fi
if [ -z "${STATIC_IP:-}" ] || [ -z "${STATIC_GATEWAY:-}" ]; then
log_msg "cannot switch to static: STATIC_IP or STATIC_GATEWAY is empty in $CONFIG_FILE"
return 1
fi
log_msg "switching $iface to static $STATIC_IP/$STATIC_PREFIX"
if command -v nmcli >/dev/null 2>&1 && systemctl is-active --quiet NetworkManager 2>/dev/null; then
configure_static_nmcli "$iface"
elif [ -d /etc/netplan ] && command -v netplan >/dev/null 2>&1; then
configure_static_netplan "$iface"
else
configure_static_interfaces "$iface"
fi
}
connectivity_ok() {
local target
for target in $PING_TARGETS; do
[ -n "$target" ] || continue
if ping -c 1 -W 2 "$target" >/dev/null 2>&1; then
return 0
fi
done
return 1
}
check_network() {
local count_file="$STATE_DIR/network.fail_count"
local fail_count=0
if connectivity_ok; then
echo 0 > "$count_file"
log_msg "connectivity ok"
return 0
fi
if [ -f "$count_file" ]; then
fail_count="$(cat "$count_file" 2>/dev/null || echo 0)"
fi
fail_count=$((fail_count + 1))
echo "$fail_count" > "$count_file"
log_msg "connectivity failed ($fail_count/$FAIL_THRESHOLD)"
if [ "$fail_count" -ge "$FAIL_THRESHOLD" ] && [ "$DHCP_AFTER_FAILURE" = "1" ]; then
configure_dhcp
echo 0 > "$count_file"
fi
}
case "${1:-check}" in
check)
check_network
;;
dhcp)
configure_dhcp
;;
static)
configure_static
;;
*)
echo "Usage: $0 [check|dhcp|static]"
exit 2
;;
esac

162
watchdog/print-watchdog.sh Normal file
View File

@@ -0,0 +1,162 @@
#!/bin/bash
#
# Print service watchdog for the CUPS print server.
# It checks systemd service state plus quick CUPS command/HTTP probes.
# When checks fail repeatedly, it restarts CUPS and related services.
#
set -u
CONFIG_FILE="${CUPS_PRINT_WATCHDOG_CONFIG:-/etc/cups-watchdog/print-watchdog.conf}"
STATE_DIR="/run/cups-watchdog"
SERVICES="cups avahi-daemon cups-driver-manager"
CUPS_URL="http://127.0.0.1:631/"
CHECK_CUPS_HTTP=1
CHECK_LPSTAT=1
COMMAND_TIMEOUT=8
FAIL_THRESHOLD=2
RESTART_COOLDOWN=60
LOG_FILE="/var/log/cups-watchdog/print.log"
if [ -f "$CONFIG_FILE" ]; then
# shellcheck disable=SC1090
. "$CONFIG_FILE"
fi
mkdir -p "$STATE_DIR"
mkdir -p "$(dirname "$LOG_FILE")" 2>/dev/null || true
log_msg() {
local msg="$1"
local line
line="$(date '+%Y-%m-%d %H:%M:%S') [print-watchdog] $msg"
echo "$line"
echo "$line" >> "$LOG_FILE" 2>/dev/null || true
}
unit_exists() {
local service="$1"
systemctl list-unit-files --no-legend "$service.service" 2>/dev/null | awk '{print $1}' | grep -qx "$service.service" && return 0
systemctl list-units --all --no-legend "$service.service" 2>/dev/null | awk '{print $1}' | grep -qx "$service.service"
}
service_should_check() {
local service="$1"
unit_exists "$service" || return 1
if systemctl is-enabled --quiet "$service.service" 2>/dev/null; then
return 0
fi
if systemctl is-active --quiet "$service.service" 2>/dev/null; then
return 0
fi
return 1
}
check_systemd_services() {
local service
local failed=0
for service in $SERVICES; do
service_should_check "$service" || continue
if ! systemctl is-active --quiet "$service.service" 2>/dev/null; then
log_msg "service not active: $service"
failed=1
fi
done
return "$failed"
}
check_lpstat() {
[ "$CHECK_LPSTAT" = "1" ] || return 0
command -v lpstat >/dev/null 2>&1 || return 0
timeout "$COMMAND_TIMEOUT" lpstat -r >/dev/null 2>&1
}
check_http() {
[ "$CHECK_CUPS_HTTP" = "1" ] || return 0
command -v curl >/dev/null 2>&1 || return 0
curl -fsS --max-time "$COMMAND_TIMEOUT" "$CUPS_URL" >/dev/null 2>&1
}
restart_print_stack() {
local now
local last_file="$STATE_DIR/print.last_restart"
local last=0
local service
now="$(date +%s)"
if [ -f "$last_file" ]; then
last="$(cat "$last_file" 2>/dev/null || echo 0)"
fi
if [ $((now - last)) -lt "$RESTART_COOLDOWN" ]; then
log_msg "restart skipped: cooldown active"
return 0
fi
log_msg "restarting print stack"
systemctl reset-failed cups.service >/dev/null 2>&1 || true
systemctl restart cups.service
for service in avahi-daemon cups-driver-manager; do
service_should_check "$service" || continue
systemctl reset-failed "$service.service" >/dev/null 2>&1 || true
systemctl restart "$service.service" >/dev/null 2>&1 || true
done
echo "$now" > "$last_file"
}
check_print_stack() {
local count_file="$STATE_DIR/print.fail_count"
local fail_count=0
local failed=0
check_systemd_services || failed=1
if ! check_lpstat; then
log_msg "lpstat check failed or timed out"
failed=1
fi
if ! check_http; then
log_msg "CUPS HTTP check failed or timed out"
failed=1
fi
if [ "$failed" = "0" ]; then
echo 0 > "$count_file"
log_msg "print stack ok"
return 0
fi
if [ -f "$count_file" ]; then
fail_count="$(cat "$count_file" 2>/dev/null || echo 0)"
fi
fail_count=$((fail_count + 1))
echo "$fail_count" > "$count_file"
log_msg "print stack failed ($fail_count/$FAIL_THRESHOLD)"
if [ "$fail_count" -ge "$FAIL_THRESHOLD" ]; then
restart_print_stack
echo 0 > "$count_file"
fi
}
case "${1:-check}" in
check)
check_print_stack
;;
restart)
restart_print_stack
;;
*)
echo "Usage: $0 [check|restart]"
exit 2
;;
esac