File: //lib64/nagios/plugins/check_salt_running.sh
#!/usr/bin/env bash
set -u
SERVICE_NAME="salt-minion"
STATE_FILE="/opt/.saltrunning"
WARN_AGE=5400
CRIT_AGE=7200
usage() {
cat <<'USAGE'
Usage: check_salt_running.sh [-s service] [-f state_file] [-w warn_age] [-c crit_age]
Checks that the salt-minion service is active and that the state file has been
updated recently. If Salt is not installed or the unit is unavailable, the
check reports WARNING instead of CRITICAL.
USAGE
}
while getopts ":s:f:w:c:h" opt; do
case "$opt" in
s) SERVICE_NAME="$OPTARG" ;;
f) STATE_FILE="$OPTARG" ;;
w) WARN_AGE="$OPTARG" ;;
c) CRIT_AGE="$OPTARG" ;;
h)
usage
exit 0
;;
\?)
echo "UNKNOWN - invalid option: -$OPTARG"
usage
exit 3
;;
:)
echo "UNKNOWN - option -$OPTARG requires an argument"
usage
exit 3
;;
esac
done
WARN_AGE_MINUTES=$(( WARN_AGE / 60 ))
CRIT_AGE_MINUTES=$(( CRIT_AGE / 60 ))
if ! command -v systemctl >/dev/null 2>&1; then
echo "UNKNOWN - systemctl not available"
exit 3
fi
service_load_state="$(systemctl show -p LoadState --value "$SERVICE_NAME" 2>/dev/null || true)"
service_load_state="${service_load_state#LoadState=}"
if [[ -z "$service_load_state" || "$service_load_state" == "not-found" ]]; then
echo "WARNING - $SERVICE_NAME is not installed or the unit is unavailable"
exit 1
fi
service_state="$(systemctl is-active "$SERVICE_NAME" 2>/dev/null || true)"
if [[ "$service_state" != "active" ]]; then
echo "CRITICAL - $SERVICE_NAME is not active (state: ${service_state:-unknown})"
exit 2
fi
if [[ ! -e "$STATE_FILE" ]]; then
echo "CRITICAL - $STATE_FILE does not exist"
exit 2
fi
if ! last_run="$(stat -c '%Y' "$STATE_FILE" 2>/dev/null)"; then
echo "UNKNOWN - unable to read mtime for $STATE_FILE"
exit 3
fi
now="$(date +%s)"
age=$(( now - last_run ))
age_minutes=$(( age / 60 ))
if (( age < 0 )); then
echo "UNKNOWN - $STATE_FILE has a future timestamp"
exit 3
fi
if (( age >= CRIT_AGE )); then
echo "CRITICAL - Salt has not updated for ${age_minutes} minutes (threshold ${CRIT_AGE_MINUTES} minutes) | saltrunning_age=${age_minutes}m;${WARN_AGE_MINUTES};${CRIT_AGE_MINUTES};0;"
exit 2
fi
if (( age >= WARN_AGE )); then
echo "WARNING - Salt has not updated for ${age_minutes} minutes (threshold ${WARN_AGE_MINUTES} minutes) | saltrunning_age=${age_minutes}m;${WARN_AGE_MINUTES};${CRIT_AGE_MINUTES};0;"
exit 1
fi
echo "OK - $SERVICE_NAME active, last update ${age_minutes} minutes ago | saltrunning_age=${age_minutes}m;${WARN_AGE_MINUTES};${CRIT_AGE_MINUTES};0;"
exit 0