1254 lines
43 KiB
Bash
1254 lines
43 KiB
Bash
#!/bin/bash
|
|
# Check_MK Agent for Linux
|
|
# +------------------------------------------------------------------+
|
|
# | ____ _ _ __ __ _ __ |
|
|
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
|
|
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
|
|
# | | |___| | | | __/ (__| < | | | | . \ |
|
|
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
|
|
# | |
|
|
# | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
|
|
# +------------------------------------------------------------------+
|
|
#
|
|
# This file is part of Check_MK.
|
|
# The official homepage is at http://mathias-kettner.de/check_mk.
|
|
#
|
|
# check_mk is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation in version 2. check_mk is distributed
|
|
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
|
|
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
|
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
|
|
# tails. You should have received a copy of the GNU General Public
|
|
# License along with GNU Make; see the file COPYING. If not, write
|
|
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
# Boston, MA 02110-1301 USA.
|
|
|
|
# Remove locale settings to eliminate localized outputs where possible
|
|
export LC_ALL=C
|
|
unset LANG
|
|
|
|
export MK_LIBDIR=${MK_LIBDIR:-/usr/lib/check_mk_agent}
|
|
export MK_CONFDIR=${MK_CONFDIR:-/etc/check_mk}
|
|
export MK_VARDIR=${MK_VARDIR:-/var/lib/check_mk_agent}
|
|
|
|
# Optionally set a tempdir for all subsequent calls
|
|
#export TMPDIR=
|
|
|
|
# Provide information about the remote host. That helps when data
|
|
# is being sent only once to each remote host.
|
|
if [ "$REMOTE_HOST" ]; then
|
|
export REMOTE=$REMOTE_HOST
|
|
elif [ "$SSH_CLIENT" ]; then
|
|
export REMOTE=${SSH_CLIENT%% *}
|
|
fi
|
|
|
|
# The package name gets patched for baked agents to either
|
|
# "check-mk-agent" or the name set by the "name of agent packages" rule
|
|
XINETD_SERVICE_NAME=check_mk
|
|
|
|
# Make sure, locally installed binaries are found
|
|
PATH=$PATH:/usr/local/bin
|
|
|
|
# All executables in PLUGINSDIR will simply be executed and their
|
|
# ouput appended to the output of the agent. Plugins define their own
|
|
# sections and must output headers with '<<<' and '>>>'
|
|
PLUGINSDIR=$MK_LIBDIR/plugins
|
|
|
|
# All executables in LOCALDIR will by executabled and their
|
|
# output inserted into the section <<<local>>>. Please
|
|
# refer to online documentation for details about local checks.
|
|
LOCALDIR=$MK_LIBDIR/local
|
|
|
|
# All files in SPOOLDIR will simply appended to the agent
|
|
# output if they are not outdated (see below)
|
|
SPOOLDIR=$MK_VARDIR/spool
|
|
|
|
# close standard input (for security reasons) and stderr when not
|
|
# explicitly in debug mode.
|
|
# When the nodes agent is executed by a e.g. docker node in a container,
|
|
# then don't close stdin, because the agent is piped through it in this
|
|
# case.
|
|
if [ "$1" = -d ]; then
|
|
set -xv
|
|
elif [ -z "$MK_FROM_NODE" ]; then
|
|
exec </dev/null 2>/dev/null
|
|
fi
|
|
|
|
# Detect whether or not the agent is being executed in a container
|
|
# environment.
|
|
if [ -f /.dockerenv ]; then
|
|
IS_DOCKERIZED=1
|
|
elif grep container=lxc /proc/1/environ >/dev/null 2>&1; then
|
|
# Works in lxc environment e.g. on Ubuntu bionic, but does not
|
|
# seem to work in proxmox (see CMK-1561)
|
|
IS_LXC_CONTAINER=1
|
|
elif grep 'lxcfs /proc/cpuinfo fuse.lxcfs' /proc/mounts >/dev/null 2>&1; then
|
|
# Seems to work in proxmox
|
|
IS_LXC_CONTAINER=1
|
|
else
|
|
unset IS_DOCKERIZED
|
|
unset IS_LXC_CONTAINER
|
|
fi
|
|
|
|
# Function to replace "if type [somecmd]" idiom
|
|
# 'command -v' tends to be more robust vs 'which' and 'type' based tests
|
|
inpath() {
|
|
command -v "${1:?No command to test}" >/dev/null 2>&1
|
|
}
|
|
|
|
# Prefer (relatively) new /usr/bin/timeout from coreutils against
|
|
# our shipped waitmax. waitmax is statically linked and crashes on
|
|
# some Ubuntu versions recently.
|
|
if inpath timeout; then
|
|
waitmax() {
|
|
timeout "$@"
|
|
}
|
|
export -f waitmax
|
|
fi
|
|
|
|
if [ -f "$MK_CONFDIR/encryption.cfg" ]; then
|
|
# shellcheck source=/dev/null
|
|
. "$MK_CONFDIR/encryption.cfg"
|
|
fi
|
|
|
|
if [ "$ENCRYPTED" == "yes" ]; then
|
|
OPENSSL_VERSION=$(openssl version | awk '{print $2}' | awk -F . '{print (($1 * 100) + $2) * 100+ $3}')
|
|
if [ $OPENSSL_VERSION -ge 10000 ]; then
|
|
echo -n "02"
|
|
exec > >(openssl enc -aes-256-cbc -md sha256 -k "$PASSPHRASE" -nosalt)
|
|
else
|
|
echo -n "00"
|
|
exec > >(openssl enc -aes-256-cbc -md md5 -k "$PASSPHRASE" -nosalt)
|
|
fi
|
|
fi
|
|
|
|
RTC_PLUGINS=""
|
|
if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
|
|
# shellcheck source=/dev/null
|
|
. "$MK_CONFDIR/real_time_checks.cfg"
|
|
fi
|
|
|
|
#
|
|
# CHECK SECTIONS
|
|
#
|
|
|
|
section_mem() {
|
|
if [ -z "$IS_DOCKERIZED" ]; then
|
|
echo '<<<mem>>>'
|
|
grep -E -v '^Swap:|^Mem:|total:' </proc/meminfo
|
|
else
|
|
echo '<<<docker_container_mem>>>'
|
|
cat /sys/fs/cgroup/memory/memory.stat
|
|
echo "usage_in_bytes $(cat /sys/fs/cgroup/memory/memory.usage_in_bytes)"
|
|
echo "limit_in_bytes $(cat /sys/fs/cgroup/memory/memory.limit_in_bytes)"
|
|
grep -F 'MemTotal:' /proc/meminfo
|
|
fi
|
|
}
|
|
|
|
section_cpu() {
|
|
if [ "$(uname -m)" = "armv7l" ]; then
|
|
CPU_REGEX='^processor'
|
|
else
|
|
CPU_REGEX='^CPU|^processor'
|
|
fi
|
|
NUM_CPUS=$(grep -c -E $CPU_REGEX </proc/cpuinfo)
|
|
|
|
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ]; then
|
|
echo '<<<cpu>>>'
|
|
echo "$(cat /proc/loadavg) $NUM_CPUS"
|
|
if [ -f "/proc/sys/kernel/threads-max" ]; then
|
|
cat /proc/sys/kernel/threads-max
|
|
fi
|
|
else
|
|
if [ -n "$IS_DOCKERIZED" ]; then
|
|
echo '<<<docker_container_cpu>>>'
|
|
else
|
|
echo '<<<lxc_container_cpu>>>'
|
|
fi
|
|
grep "^cpu " /proc/stat
|
|
echo "num_cpus $NUM_CPUS"
|
|
cat /sys/fs/cgroup/cpuacct/cpuacct.stat
|
|
fi
|
|
}
|
|
|
|
section_uptime() {
|
|
echo '<<<uptime>>>'
|
|
if [ -z "$IS_DOCKERIZED" ]; then
|
|
cat /proc/uptime
|
|
else
|
|
echo "$(($(date +%s) - $(stat -c %Z /dev/pts)))"
|
|
fi
|
|
}
|
|
|
|
# Print out Partitions / Filesystems. (-P gives non-wrapped POSIXed output)
|
|
# Heads up: NFS-mounts are generally supressed to avoid agent hangs.
|
|
# If hard NFS mounts are configured or you have too large nfs retry/timeout
|
|
# settings, accessing those mounts from the agent would leave you with
|
|
# thousands of agent processes and, ultimately, a dead monitored system.
|
|
# These should generally be monitored on the NFS server, not on the clients.
|
|
section_df() {
|
|
if [ -n "$IS_DOCKERIZED" ]; then
|
|
return
|
|
fi
|
|
|
|
# The exclusion list is getting a bit of a problem.
|
|
# -l should hide any remote FS but seems to be all but working.
|
|
local excludefs
|
|
excludefs="-x smbfs -x cifs -x iso9660 -x udf -x nfsv4 -x nfs -x mvfs -x prl_fs -x squashfs -x devtmpfs"
|
|
if [ -z "$IS_LXC_CONTAINER" ]; then
|
|
excludefs+=" -x zfs"
|
|
fi
|
|
|
|
echo '<<<df>>>'
|
|
# We really *need* word splitting below!
|
|
# shellcheck disable=SC2086
|
|
df -PTlk ${excludefs} | sed 1d
|
|
|
|
# df inodes information
|
|
echo '<<<df>>>'
|
|
echo '[df_inodes_start]'
|
|
# We really *need* word splitting below!
|
|
# shellcheck disable=SC2086
|
|
df -PTli ${excludefs} | sed 1d
|
|
echo '[df_inodes_end]'
|
|
}
|
|
|
|
sections_systemd() {
|
|
if inpath systemctl; then
|
|
echo '<<<systemd_units>>>'
|
|
echo "[list-unit-files]"
|
|
systemctl list-unit-files --no-pager
|
|
echo "[all]"
|
|
systemctl --all --no-pager | sed '/^$/q'
|
|
fi
|
|
}
|
|
|
|
run_mrpe() {
|
|
local descr=$1
|
|
shift
|
|
local cmdline=$*
|
|
|
|
echo '<<<mrpe>>>'
|
|
|
|
PLUGIN=${cmdline%% *}
|
|
OUTPUT=$(eval "$cmdline")
|
|
|
|
echo -n "(${PLUGIN##*/}) $descr $? $OUTPUT" | tr \\n \\1
|
|
echo
|
|
}
|
|
|
|
export -f run_mrpe
|
|
|
|
# Runs a command asynchronous by use of a cache file. Usage:
|
|
# run_cached [-s] NAME MAXAGE
|
|
# -s creates the section header <<<$NAME>>>
|
|
# -m mrpe-mode: stores exit code with the cache
|
|
# -ma mrpe-mode with age: stores exit code with the cache and adds the cache age
|
|
# NAME is the name of the section (also used as cache file name)
|
|
# MAXAGE is the maximum cache livetime in seconds
|
|
run_cached() {
|
|
local NOW
|
|
NOW=$(date +%s)
|
|
local section=
|
|
local mrpe=0
|
|
local append_age=0
|
|
# TODO: this function is unable to handle mulitple args at once
|
|
# for example: -s -m won't work, it is read as single token "-s -m"
|
|
if [ "$1" = -s ]; then
|
|
local section="echo '<<<$2:cached($NOW,$3)>>>' ; "
|
|
shift
|
|
fi
|
|
if [ "$1" = -m ]; then
|
|
local mrpe=1
|
|
shift
|
|
fi
|
|
if [ "$1" = "-ma" ]; then
|
|
local mrpe=1
|
|
local append_age=1
|
|
shift
|
|
fi
|
|
local NAME=$1
|
|
local MAXAGE=$2
|
|
shift 2
|
|
local CMDLINE=$section$*
|
|
|
|
if [ ! -d "$MK_VARDIR/cache" ]; then mkdir -p "$MK_VARDIR/cache"; fi
|
|
if [ "$mrpe" = 1 ]; then
|
|
CACHEFILE="$MK_VARDIR/cache/mrpe_$NAME.cache"
|
|
else
|
|
CACHEFILE="$MK_VARDIR/cache/$NAME.cache"
|
|
fi
|
|
|
|
# Check if the creation of the cache takes suspiciously long and kill the
|
|
# process if the age (access time) of $CACHEFILE.new is twice the MAXAGE.
|
|
# Output the evantually already cached section anyways and start the cache
|
|
# update again.
|
|
if [ -e "$CACHEFILE.new" ]; then
|
|
local CF_ATIME
|
|
CF_ATIME=$(stat -c %X "$CACHEFILE.new")
|
|
if [ $((NOW - CF_ATIME)) -ge $((MAXAGE * 2)) ]; then
|
|
# Kill the process still accessing that file in case
|
|
# it is still running. This avoids overlapping processes!
|
|
fuser -k -9 "$CACHEFILE.new" >/dev/null 2>&1
|
|
rm -f "$CACHEFILE.new"
|
|
fi
|
|
fi
|
|
|
|
# Check if cache file exists and is recent enough
|
|
if [ -s "$CACHEFILE" ]; then
|
|
local MTIME
|
|
MTIME=$(stat -c %Y "$CACHEFILE")
|
|
local AGE
|
|
AGE=$((NOW - MTIME))
|
|
if [ "$AGE" -le "$MAXAGE" ]; then local USE_CACHEFILE=1; fi
|
|
# Output the file in any case, even if it is
|
|
# outdated. The new file will not yet be available
|
|
if [ $append_age -eq 1 ]; then
|
|
# insert the cached-string before the pipe (first -e)
|
|
# or, if no pipe found (-e t) append it (third -e),
|
|
# but only once and on the second line (2!b) (first line is section header,
|
|
# all further lines are long output)
|
|
sed -e "2s/|/ (Cached: ${AGE}\/${MAXAGE}s)|/" -e t -e "2s/$/ (Cached: ${AGE}\/${MAXAGE}s)/" <"$CACHEFILE"
|
|
else
|
|
CACHE_INFO="cached($MTIME,$MAXAGE)"
|
|
if [[ $NAME == local_* ]]; then
|
|
sed -e "s/^/$CACHE_INFO /" "$CACHEFILE"
|
|
else
|
|
# insert the cache info in the section header (^= after '!'),
|
|
# if none is present (^= before '!')
|
|
sed -e '/^<<<.*\(:cached(\).*>>>/!s/^<<<\([^>]*\)>>>$/<<<\1:'"${CACHE_INFO}"'>>>/' "$CACHEFILE"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Cache file outdated and new job not yet running? Start it
|
|
if [ -z "$USE_CACHEFILE" ] && [ ! -e "$CACHEFILE.new" ]; then
|
|
# When the command fails, the output is throws away ignored
|
|
if [ $mrpe -eq 1 ]; then
|
|
echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; run_mrpe $NAME \"$CMDLINE\" && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /bin/bash >/dev/null 2>&1 &
|
|
else
|
|
echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; $CMDLINE && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /bin/bash >/dev/null 2>&1 &
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Make run_cached available for subshells (plugins, local checks, etc.)
|
|
export -f run_cached
|
|
|
|
# Implements Real-Time Check feature of the Check_MK agent which can send
|
|
# some section data in 1 second resolution. Useful for fast notifications and
|
|
# detailed graphing (if you configure your RRDs to this resolution).
|
|
run_real_time_checks() {
|
|
PIDFILE=$MK_VARDIR/real_time_checks.pid
|
|
echo $$ >"$PIDFILE"
|
|
|
|
if [ "$PASSPHRASE" != "" ]; then
|
|
# new mechanism to set the passphrase has priority
|
|
RTC_SECRET=$PASSPHRASE
|
|
fi
|
|
|
|
if [ "$ENCRYPTED_RT" != "no" ]; then
|
|
PROTOCOL=00
|
|
else
|
|
PROTOCOL=99
|
|
fi
|
|
|
|
while true; do
|
|
# terminate when pidfile is gone or other Real-Time Check process started or configured timeout
|
|
if [ ! -e "$PIDFILE" ] || [ "$(<"$PIDFILE")" -ne $$ ] || [ "$RTC_TIMEOUT" -eq 0 ]; then
|
|
exit 1
|
|
fi
|
|
|
|
for SECTION in $RTC_SECTIONS; do
|
|
# Be aware of maximum packet size. Maybe we need to check the size of the section
|
|
# output and do some kind of nicer error handling.
|
|
# 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
|
|
# dd is used to concatenate the output of all commands to a single write/block => udp packet
|
|
{
|
|
echo -n $PROTOCOL
|
|
date +%s | tr -d '\n'
|
|
if [ "$ENCRYPTED_RT" != "no" ]; then
|
|
export RTC_SECRET=$RTC_SECRET
|
|
section_"$SECTION" | openssl enc -aes-256-cbc -md md5 -pass env:RTC_SECRET -nosalt
|
|
else
|
|
section_"$SECTION"
|
|
fi
|
|
} | dd bs=9999 iflag=fullblock 2>/dev/null >"/dev/udp/${REMOTE}/${RTC_PORT}"
|
|
done
|
|
|
|
# Plugins
|
|
if cd "$PLUGINSDIR"; then
|
|
for PLUGIN in $RTC_PLUGINS; do
|
|
if [ ! -f $PLUGIN ]; then
|
|
continue
|
|
fi
|
|
|
|
# Be aware of maximum packet size. Maybe we need to check the size of the section
|
|
# output and do some kind of nicer error handling.
|
|
# 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data
|
|
# dd is used to concatenate the output of all commands to a single write/block => udp packet
|
|
{
|
|
echo -n $PROTOCOL
|
|
date +%s | tr -d '\n'
|
|
if [ "$ENCRYPTED_RT" != "no" ]; then
|
|
export RTC_SECRET=$RTC_SECRET
|
|
./$PLUGIN | openssl enc -aes-256-cbc -md md5 -pass env:RTC_SECRET -nosalt
|
|
else
|
|
./"$PLUGIN"
|
|
fi
|
|
} | dd bs=9999 iflag=fullblock 2>/dev/null >"/dev/udp/${REMOTE}/${RTC_PORT}"
|
|
done
|
|
fi
|
|
|
|
sleep 1
|
|
RTC_TIMEOUT=$((RTC_TIMEOUT - 1))
|
|
done
|
|
}
|
|
|
|
echo "<<<check_mk>>>"
|
|
echo "Version: 1.6.0p14"
|
|
echo "AgentOS: linux"
|
|
echo "Hostname: $(hostname)"
|
|
echo "AgentDirectory: $MK_CONFDIR"
|
|
echo "DataDirectory: $MK_VARDIR"
|
|
echo "SpoolDirectory: $SPOOLDIR"
|
|
echo "PluginsDirectory: $PLUGINSDIR"
|
|
echo "LocalDirectory: $LOCALDIR"
|
|
|
|
# If we are called via xinetd, try to find only_from configuration
|
|
if [ -n "$REMOTE_HOST" ]; then
|
|
echo -n 'OnlyFrom: '
|
|
sed -n '/^service[[:space:]]*'$XINETD_SERVICE_NAME'/,/}/s/^[[:space:]]*only_from[[:space:]]*=[[:space:]]*\(.*\)/\1/p' /etc/xinetd.d/* | head -n1
|
|
echo
|
|
fi
|
|
|
|
section_df
|
|
|
|
sections_systemd
|
|
|
|
# Filesystem usage for ZFS
|
|
if inpath zfs; then
|
|
echo '<<<zfsget:sep(9)>>>'
|
|
zfs get -t filesystem,volume -Hp name,quota,used,avail,mountpoint,type 2>/dev/null
|
|
echo '<<<zfsget>>>'
|
|
echo '[df]'
|
|
df -PTlk -t zfs | sed 1d
|
|
fi
|
|
|
|
# Check NFS mounts by accessing them with stat -f (System
|
|
# call statfs()). If this lasts more then 2 seconds we
|
|
# consider it as hanging. We need waitmax.
|
|
if inpath waitmax; then
|
|
STAT_VERSION=$(stat --version | head -1 | cut -d" " -f4)
|
|
STAT_BROKE="5.3.0"
|
|
|
|
echo '<<<nfsmounts>>>'
|
|
sed -n '/ nfs4\? /s/[^ ]* \([^ ]*\) .*/\1/p' </proc/mounts |
|
|
sed 's/\\040/ /g' |
|
|
while read -r MP; do
|
|
if [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
|
|
waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP" ||
|
|
echo "$MP hanging 0 0 0 0"
|
|
else
|
|
waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP" &&
|
|
printf '\n' || echo "$MP hanging 0 0 0 0"
|
|
fi
|
|
done
|
|
|
|
echo '<<<cifsmounts>>>'
|
|
sed -n '/ cifs\? /s/[^ ]* \([^ ]*\) .*/\1/p' </proc/mounts |
|
|
sed 's/\\040/ /g' |
|
|
while read -r MP; do
|
|
if [ ! -r "$MP" ]; then
|
|
echo "$MP Permission denied"
|
|
elif [ "$STAT_VERSION" != "$STAT_BROKE" ]; then
|
|
waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" ||
|
|
echo "$MP hanging 0 0 0 0"
|
|
else
|
|
waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" &&
|
|
printf '\n' || echo "$MP hanging 0 0 0 0"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Check mount options. Filesystems may switch to 'ro' in case
|
|
# of a read error.
|
|
echo '<<<mounts>>>'
|
|
grep ^/dev </proc/mounts | grep -v " squashfs "
|
|
|
|
if inpath ps; then
|
|
# processes including username, without kernel processes
|
|
echo '<<<ps>>>'
|
|
echo 'dummy section -- refer to section ps_lnx'
|
|
echo '<<<ps_lnx>>>'
|
|
CGROUP=""
|
|
if [ -e /sys/fs/cgroup ]; then
|
|
CGROUP="cgroup:512,"
|
|
fi
|
|
echo "[header] $(ps ax -o "$CGROUP"user:32,vsz,rss,cputime,etime,pid,command --columns 10000)"
|
|
fi
|
|
|
|
# Memory usage
|
|
section_mem
|
|
|
|
# Load and number of processes
|
|
section_cpu
|
|
|
|
# Uptime
|
|
section_uptime
|
|
|
|
# New variant: Information about speed and state in one section
|
|
if inpath ip; then
|
|
echo '<<<lnx_if>>>'
|
|
echo "[start_iplink]"
|
|
ip address
|
|
echo "[end_iplink]"
|
|
fi
|
|
|
|
echo '<<<lnx_if:sep(58)>>>'
|
|
sed 1,2d /proc/net/dev
|
|
if inpath ethtool; then
|
|
sed -e 1,2d /proc/net/dev | cut -d':' -f1 | sort | while read -r eth; do
|
|
echo "[$eth]"
|
|
ethtool "$eth" | grep -E '(Speed|Duplex|Link detected|Auto-negotiation):'
|
|
echo -e "\tAddress: $(cat "/sys/class/net/$eth/address")\n"
|
|
done
|
|
fi
|
|
|
|
# Current state of bonding interfaces
|
|
if [ -e /proc/net/bonding ]; then
|
|
echo '<<<lnx_bonding:sep(58)>>>'
|
|
pushd /proc/net/bonding >/dev/null
|
|
head -v -n 1000 ./*
|
|
popd >/dev/null
|
|
fi
|
|
|
|
# Same for Open vSwitch bonding
|
|
if inpath ovs-appctl; then
|
|
BONDS=$(ovs-appctl bond/list)
|
|
COL=$(echo "$BONDS" | awk '{for(i=1;i<=NF;i++) {if($i == "bond") printf("%d", i)} exit 0}')
|
|
echo '<<<ovs_bonding:sep(58)>>>'
|
|
for bond in $(echo "$BONDS" | sed -e 1d | cut -f"${COL}"); do
|
|
echo "[$bond]"
|
|
ovs-appctl bond/show "$bond"
|
|
done
|
|
fi
|
|
|
|
# Number of TCP connections in the various states
|
|
if inpath waitmax; then
|
|
echo '<<<tcp_conn_stats>>>'
|
|
THIS=$(waitmax 5 cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }')
|
|
if [ $? == 0 ]; then
|
|
echo "$THIS"
|
|
elif inpath ss; then
|
|
ss -ant | grep -v ^State | awk ' /:/ { c[$1]++; } END { for (x in c) { print x, c[x]; } }' |
|
|
sed -e 's/^ESTAB/01/g;s/^SYN-SENT/02/g;s/^SYN-RECV/03/g;s/^FIN-WAIT-1/04/g;s/^FIN-WAIT-2/05/g;s/^TIME-WAIT/06/g;s/^CLOSED/07/g;s/^CLOSE-WAIT/08/g;s/^LAST-ACK/09/g;s/^LISTEN/0A/g;s/^CLOSING/0B/g;'
|
|
fi
|
|
fi
|
|
|
|
# Linux Multipathing
|
|
if inpath multipath; then
|
|
if [ -f /etc/multipath.conf ]; then
|
|
echo '<<<multipath>>>'
|
|
multipath -l
|
|
fi
|
|
fi
|
|
|
|
# Performancecounter Platten
|
|
if [ -z "$IS_DOCKERIZED" ]; then
|
|
echo '<<<diskstat>>>'
|
|
date +%s
|
|
grep -E ' (x?[shv]d[a-z]*[0-9]*|cciss/c[0-9]+d[0-9]+|emcpower[a-z]+|dm-[0-9]+|VxVM.*|mmcblk.*|dasd[a-z]*|bcache[0-9]+|nvme[0-9]+n[0-9]+) ' </proc/diskstats
|
|
if inpath dmsetup; then
|
|
echo '[dmsetup_info]'
|
|
dmsetup info -c --noheadings --separator ' ' -o name,devno,vg_name,lv_name
|
|
fi
|
|
if [ -d /dev/vx/dsk ]; then
|
|
echo '[vx_dsk]'
|
|
stat -c "%t %T %n" /dev/vx/dsk/*/*
|
|
fi
|
|
else
|
|
echo '<<<docker_container_diskstat>>>'
|
|
echo "[time]"
|
|
date +%s
|
|
for F in io_service_bytes io_serviced; do
|
|
echo "[$F]"
|
|
cat "/sys/fs/cgroup/blkio/blkio.throttle.$F"
|
|
done
|
|
echo "[names]"
|
|
for F in /sys/block/*; do
|
|
echo "${F##*/} $(cat "$F/dev")"
|
|
done
|
|
fi
|
|
|
|
# Performancecounter Kernel
|
|
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ]; then
|
|
echo '<<<kernel>>>'
|
|
date +%s
|
|
cat /proc/vmstat /proc/stat
|
|
fi
|
|
|
|
# Hardware sensors via IPMI (need ipmitool)
|
|
if inpath ipmitool; then
|
|
run_cached -s "ipmi:sep(124)" 300 "waitmax 300 ipmitool sensor list | grep -v 'command failed' | grep -E -v '^[^ ]+ na ' | grep -v ' discrete '"
|
|
# readable discrete sensor states
|
|
run_cached -s "ipmi_discrete:sep(124)" 300 "waitmax 300 ipmitool sdr elist compact"
|
|
fi
|
|
|
|
# IPMI data via ipmi-sensors (of freeipmi). Please make sure, that if you
|
|
# have installed freeipmi that IPMI is really support by your hardware.
|
|
if (type ipmi-sensors && ls /dev/ipmi*) &>/dev/null; then
|
|
echo '<<<ipmi_sensors>>>'
|
|
# Newer ipmi-sensors version have new output format; Legacy format can be used
|
|
if ipmi-sensors --help | grep -q legacy-output; then
|
|
IPMI_FORMAT="--legacy-output"
|
|
else
|
|
IPMI_FORMAT=""
|
|
fi
|
|
if ipmi-sensors --help | grep -q " \-\-groups"; then
|
|
IPMI_GROUP_OPT="-g"
|
|
else
|
|
IPMI_GROUP_OPT="-t"
|
|
fi
|
|
|
|
# At least with ipmi-sensors 0.7.16 this group is Power_Unit instead of "Power Unit"
|
|
run_cached -s ipmi_sensors 300 "for class in Temperature Power_Unit Fan; do
|
|
ipmi-sensors $IPMI_FORMAT --sdr-cache-directory /var/cache $IPMI_GROUP_OPT \"\$class\" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@'
|
|
# In case of a timeout immediately leave loop.
|
|
if [ $? = 255 ]; then break ; fi
|
|
done"
|
|
fi
|
|
|
|
# RAID status of Linux software RAID
|
|
echo '<<<md>>>'
|
|
cat /proc/mdstat
|
|
|
|
# RAID status of Linux RAID via device mapper
|
|
if inpath dmraid && DMSTATUS=$(waitmax 3 dmraid -r); then
|
|
echo '<<<dmraid>>>'
|
|
|
|
# Output name and status
|
|
waitmax 20 dmraid -s | grep -e ^name -e ^status
|
|
|
|
# Output disk names of the RAID disks
|
|
DISKS=$(echo "$DMSTATUS" | cut -f1 -d":")
|
|
|
|
for disk in $DISKS; do
|
|
device=$(cat /sys/block/"$(basename "$disk")"/device/model)
|
|
status=$(echo "$DMSTATUS" | grep "^${disk}")
|
|
echo "${status} Model: ${device}"
|
|
done
|
|
fi
|
|
|
|
# RAID status of LSI controllers via cfggen
|
|
if inpath cfggen; then
|
|
echo '<<<lsi>>>'
|
|
cfggen 0 DISPLAY |
|
|
grep -E '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' |
|
|
sed -e 's/ *//g' -e 's/:/ /'
|
|
fi
|
|
|
|
# RAID status of LSI MegaRAID controller via MegaCli. You can download that tool from:
|
|
# http://www.lsi.com/downloads/Public/MegaRAID%20Common%20Files/8.02.16_MegaCLI.zip
|
|
if inpath MegaCli; then
|
|
MegaCli_bin="MegaCli"
|
|
elif inpath MegaCli64; then
|
|
MegaCli_bin="MegaCli64"
|
|
elif inpath megacli; then
|
|
MegaCli_bin="megacli"
|
|
elif inpath storcli; then
|
|
MegaCli_bin="storcli"
|
|
elif inpath storcli64; then
|
|
MegaCli_bin="storcli64"
|
|
else
|
|
MegaCli_bin="unknown"
|
|
fi
|
|
|
|
if [ "$MegaCli_bin" != "unknown" ]; then
|
|
echo '<<<megaraid_pdisks>>>'
|
|
for part in $($MegaCli_bin -EncInfo -aALL -NoLog </dev/null |
|
|
sed -rn 's/:/ /g; s/[[:space:]]+/ /g; s/^ //; s/ $//; s/Number of enclosures on adapter ([0-9]+).*/adapter \1/g; /^(Enclosure|Device ID|adapter) [0-9]+$/ p'); do
|
|
[ "$part" = adapter ] && echo ""
|
|
[ "$part" = 'Enclosure' ] && echo -ne "\ndev2enc"
|
|
echo -n " $part"
|
|
done
|
|
echo
|
|
$MegaCli_bin -PDList -aALL -NoLog </dev/null |
|
|
grep -E 'Enclosure|Raw Size|Slot Number|Device Id|Firmware state|Inquiry|Adapter'
|
|
echo '<<<megaraid_ldisks>>>'
|
|
$MegaCli_bin -LDInfo -Lall -aALL -NoLog </dev/null | grep -E 'Size|State|Number|Adapter|Virtual'
|
|
echo '<<<megaraid_bbu>>>'
|
|
$MegaCli_bin -AdpBbuCmd -GetBbuStatus -aALL -NoLog </dev/null | grep -v Exit
|
|
fi
|
|
|
|
# RAID status of 3WARE disk controller (by Radoslaw Bak)
|
|
if inpath tw_cli; then
|
|
for C in $(tw_cli show | awk 'NR < 4 { next } { print $1 }'); do
|
|
echo '<<<3ware_info>>>'
|
|
tw_cli "/$C" show all | grep -E 'Model =|Firmware|Serial'
|
|
echo '<<<3ware_disks>>>'
|
|
tw_cli "/$C" show drivestatus | grep -E 'p[0-9]' | sed "s/^/$C\//"
|
|
echo '<<<3ware_units>>>'
|
|
tw_cli "/$C" show unitstatus | grep -E 'u[0-9]' | sed "s/^/$C\//"
|
|
done
|
|
fi
|
|
|
|
# RAID controllers from areca (Taiwan)
|
|
# cli64 can be found at ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/CLI/
|
|
if inpath cli64; then
|
|
run_cached -s arc_raid_status 300 "cli64 rsf info | tail -n +3 | head -n -2"
|
|
fi
|
|
|
|
# VirtualBox Guests. Section must always been output. Otherwise the
|
|
# check would not be executed in case no guest additions are installed.
|
|
# And that is something the check wants to detect
|
|
echo '<<<vbox_guest>>>'
|
|
if inpath VBoxControl && lsmod | grep vboxguest >/dev/null 2>&1; then
|
|
VBoxControl -nologo guestproperty enumerate | cut -d, -f1,2
|
|
[ "${PIPESTATUS[0]}" = 0 ] || echo "ERROR"
|
|
fi
|
|
|
|
# OpenVPN Clients. Currently we assume that the configuration # is in
|
|
# /etc/openvpn. We might find a safer way to find the configuration later.
|
|
if [ -e /etc/openvpn/openvpn-status.log ]; then
|
|
echo '<<<openvpn_clients:sep(44)>>>'
|
|
sed -n -e '/CLIENT LIST/,/ROUTING TABLE/p' </etc/openvpn/openvpn-status.log |
|
|
sed -e 1,3d -e '$d'
|
|
fi
|
|
|
|
# Time synchronization with NTP
|
|
if inpath ntpq; then
|
|
# remove heading, make first column space separated
|
|
run_cached -s ntp 30 "waitmax 5 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/' || true"
|
|
fi
|
|
|
|
# Time synchronization with Chrony
|
|
if inpath chronyc; then
|
|
# Force successful exit code. Otherwise section will be missing if daemon not running
|
|
#
|
|
# The "| cat" has been added for some kind of regression in RedHat 7.5. The
|
|
# SELinux rules shipped with that release were denying the chronyc call
|
|
# without cat.
|
|
run_cached -s chrony 30 "waitmax 5 chronyc -n tracking | cat || true"
|
|
fi
|
|
|
|
if inpath nvidia-settings && [ -S /tmp/.X11-unix/X0 ]; then
|
|
echo '<<<nvidia>>>'
|
|
for var in GPUErrors GPUCoreTemp; do
|
|
DISPLAY=:0 waitmax 2 nvidia-settings -t -q $var | sed "s/^/$var: /"
|
|
done
|
|
fi
|
|
|
|
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ] && [ -e /proc/drbd ]; then
|
|
echo '<<<drbd>>>'
|
|
cat /proc/drbd
|
|
fi
|
|
|
|
# Heartbeat monitoring
|
|
# Different handling for heartbeat clusters with and without CRM
|
|
# for the resource state
|
|
if [ -S /var/run/heartbeat/crm/cib_ro -o -S /var/run/crm/cib_ro ] || pgrep crmd >/dev/null 2>&1 || pgrep -f pacemaker-controld >/dev/null 2>&1; then
|
|
echo '<<<heartbeat_crm>>>'
|
|
TZ=UTC crm_mon -1 -r | grep -v ^$ | sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g'
|
|
fi
|
|
if inpath cl_status; then
|
|
echo '<<<heartbeat_rscstatus>>>'
|
|
cl_status rscstatus
|
|
|
|
echo '<<<heartbeat_nodes>>>'
|
|
for NODE in $(cl_status listnodes); do
|
|
if [ "$NODE" != "$(echo "$HOSTNAME" | tr '[:upper:]' '[:lower:]')" ]; then
|
|
STATUS=$(cl_status nodestatus "$NODE")
|
|
echo -n "$NODE $STATUS"
|
|
for LINK in $(cl_status listhblinks "$NODE" 2>/dev/null); do
|
|
echo -n " $LINK $(cl_status hblinkstatus "$NODE" "$LINK")"
|
|
done
|
|
echo
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Postfix mailqueue monitoring
|
|
# Determine the number of mails and their size in several postfix mail queues
|
|
read_postfix_queue_dirs() {
|
|
postfix_queue_dir=$1
|
|
if [ -n "$postfix_queue_dir" ]; then
|
|
echo '<<<postfix_mailq>>>'
|
|
echo "[[[${2}]]]"
|
|
for queue in deferred active; do
|
|
count=$(find "${postfix_queue_dir}/$queue" -type f | wc -l)
|
|
size=$(du -s "${postfix_queue_dir}/$queue" | awk '{print $1 }')
|
|
if [ -z "$size" ]; then
|
|
size=0
|
|
fi
|
|
if [ -z "$count" ]; then
|
|
echo "Mail queue is empty"
|
|
else
|
|
echo "QUEUE_${queue} $size $count"
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
# Postfix mailqueue monitoring
|
|
# Determine the number of mails and their size in several postfix mail queues
|
|
if inpath postconf; then
|
|
# Check if multi_instance_directories exists in main.cf and is not empty
|
|
# always takes the last entry, multiple entries possible
|
|
multi_instances_dirs=$(postconf -c /etc/postfix 2>/dev/null | grep ^multi_instance_directories | sed 's/.*=[[:space:]]*//g')
|
|
if [ -n "$multi_instances_dirs" ]; then
|
|
for queue_dir in $multi_instances_dirs; do
|
|
if [ -n "$queue_dir" ]; then
|
|
postfix_queue_dir=$(postconf -c "$queue_dir" 2>/dev/null | grep ^queue_directory | sed 's/.*=[[:space:]]*//g')
|
|
read_postfix_queue_dirs "$postfix_queue_dir" "$queue_dir"
|
|
fi
|
|
done
|
|
fi
|
|
# Always check for the default queue. It can exist even if multiple instances are configured
|
|
read_postfix_queue_dirs "$(postconf -h queue_directory 2>/dev/null)"
|
|
|
|
elif [ -x /usr/sbin/ssmtp ]; then
|
|
echo '<<<postfix_mailq>>>'
|
|
mailq 2>&1 | sed 's/^[^:]*: \(.*\)/\1/' | tail -n 6
|
|
|
|
fi
|
|
|
|
# Postfix status monitoring. Can handle multiple instances.
|
|
if inpath postfix; then
|
|
echo "<<<postfix_mailq_status:sep(58)>>>"
|
|
for i in /var/spool/postfix*/; do
|
|
if [ -e "$i/pid/master.pid" ]; then
|
|
if [ -r "$i/pid/master.pid" ]; then
|
|
postfix_pid=$(sed 's/ //g' <"$i/pid/master.pid") # handle possible spaces in output
|
|
if readlink -- "/proc/${postfix_pid}/exe" | grep -q ".*postfix/\(s\?bin/\)\?master.*"; then
|
|
echo "$i:the Postfix mail system is running:PID:$postfix_pid" | sed 's/\/var\/spool\///g'
|
|
else
|
|
echo "$i:PID file exists but instance is not running!" | sed 's/\/var\/spool\///g'
|
|
fi
|
|
else
|
|
echo "$i:PID file exists but is not readable"
|
|
fi
|
|
else
|
|
echo "$i:the Postfix mail system is not running" | sed 's/\/var\/spool\///g'
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Check status of qmail mailqueue
|
|
if inpath qmail-qstat; then
|
|
echo "<<<qmail_stats>>>"
|
|
qmail-qstat
|
|
fi
|
|
|
|
# Nullmailer queue monitoring
|
|
if inpath nullmailer-send && [ -d /var/spool/nullmailer/queue ]; then
|
|
echo '<<<nullmailer_mailq>>>'
|
|
COUNT=$(find /var/spool/nullmailer/queue -type f | wc -l)
|
|
SIZE=$(du -s /var/spool/nullmailer/queue | awk '{print $1 }')
|
|
echo "$SIZE $COUNT"
|
|
fi
|
|
|
|
# Check status of OMD sites and Check_MK Notification spooler
|
|
if inpath omd; then
|
|
echo '<<<labels:sep(0)>>>'
|
|
echo '{"cmk/check_mk_server": "yes"}'
|
|
|
|
run_cached -s omd_status 60 "omd status --bare --auto || true"
|
|
echo '<<<mknotifyd:sep(0)>>>'
|
|
date +%s
|
|
for statefile in /omd/sites/*/var/log/mknotifyd.state; do
|
|
if [ -e "$statefile" ]; then
|
|
site=${statefile%/var/log*}
|
|
site=${site#/omd/sites/}
|
|
echo "[$site]"
|
|
grep -v '^#' <"$statefile"
|
|
fi
|
|
done
|
|
|
|
echo '<<<omd_apache:sep(124)>>>'
|
|
for statsfile in /omd/sites/*/var/log/apache/stats; do
|
|
if [ -e "$statsfile" ]; then
|
|
site=${statsfile%/var/log*}
|
|
site=${site#/omd/sites/}
|
|
echo "[$site]"
|
|
cat "$statsfile"
|
|
: >"$statsfile"
|
|
# prevent next section to fail caused by a missing newline at the end of the statsfile
|
|
echo
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Welcome the ZFS check on Linux
|
|
# We do not endorse running ZFS on linux if your vendor doesnt support it ;)
|
|
# check zpool status
|
|
if inpath zpool; then
|
|
echo "<<<zpool_status>>>"
|
|
zpool status -x
|
|
echo "<<<zpool>>>"
|
|
zpool list
|
|
fi
|
|
|
|
# Veritas Cluster Server
|
|
# Software is always installed in /opt/VRTSvcs.
|
|
# Secure mode must be off to allow root to execute commands
|
|
if [ -x /opt/VRTSvcs/bin/haclus ]; then
|
|
echo "<<<veritas_vcs>>>"
|
|
vcshost=$(hostname | cut -d. -f1)
|
|
waitmax -s 9 2 /opt/VRTSvcs/bin/haclus -display -localclus | grep -e ClusterName -e ClusState
|
|
waitmax -s 9 2 /opt/VRTSvcs/bin/hasys -display -attribute SysState
|
|
waitmax -s 9 2 /opt/VRTSvcs/bin/hagrp -display -sys "$vcshost" -attribute State -localclus
|
|
waitmax -s 9 2 /opt/VRTSvcs/bin/hares -display -sys "$vcshost" -attribute State -localclus
|
|
waitmax -s 9 2 /opt/VRTSvcs/bin/hagrp -display -attribute TFrozen -attribute Frozen
|
|
fi
|
|
|
|
# Fileinfo-Check: put patterns for files into /etc/check_mk/fileinfo.cfg
|
|
perl -e '
|
|
use File::Glob "bsd_glob";
|
|
my @patterns = ();
|
|
foreach (bsd_glob("$ARGV[0]/fileinfo.cfg"), bsd_glob("$ARGV[0]/fileinfo.d/*")) {
|
|
open my $handle, "<", $_ or next;
|
|
while (<$handle>) {
|
|
chomp;
|
|
next if /^\s*(#|$)/;
|
|
my $pattern = $_;
|
|
$pattern =~ s/\$DATE:(.*?)\$/substr(`date +"$1"`, 0, -1)/eg;
|
|
push @patterns, $pattern;
|
|
}
|
|
warn "error while reading $_: $!\n" if $!;
|
|
close $handle;
|
|
}
|
|
exit if ! @patterns;
|
|
|
|
print "<<<fileinfo:sep(124)>>>\n", time, "\n[[[header]]]\nname|status|size|time\n[[[content]]]\n";
|
|
|
|
foreach (@patterns) {
|
|
foreach (bsd_glob("$_")) {
|
|
if (! -f) {
|
|
print "$_|missing\n" if ! -d;
|
|
} elsif (my @infos = stat) {
|
|
print "$_|ok|$infos[7]|$infos[9]\n";
|
|
} else {
|
|
print "$_|stat failed: $!\n";
|
|
}
|
|
}
|
|
}
|
|
' -- "$MK_CONFDIR"
|
|
|
|
# Get stats about OMD monitoring cores running on this machine.
|
|
# Since cd is a shell builtin the check does not affect the performance
|
|
# on non-OMD machines.
|
|
if cd /omd/sites; then
|
|
echo '<<<livestatus_status:sep(59)>>>'
|
|
for site in *; do
|
|
if [ -S "/omd/sites/$site/tmp/run/live" ]; then
|
|
echo "[$site]"
|
|
echo -e "GET status" |
|
|
waitmax 3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/live"
|
|
fi
|
|
done
|
|
|
|
echo '<<<livestatus_ssl_certs:sep(124)>>>'
|
|
for site in *; do
|
|
echo "[$site]"
|
|
for PEM_PATH in "/omd/sites/$site/etc/ssl/ca.pem" "/omd/sites/$site/etc/ssl/sites/$site.pem"; do
|
|
if [ -f "$PEM_PATH" ]; then
|
|
CERT_DATE=$(openssl x509 -enddate -noout -in "$PEM_PATH")
|
|
CERT_DATE=${CERT_DATE/notAfter=/}
|
|
echo "$PEM_PATH|$(date --date="$CERT_DATE" --utc +%s)"
|
|
fi
|
|
done
|
|
done
|
|
|
|
echo '<<<mkeventd_status:sep(0)>>>'
|
|
for site in *; do
|
|
if [ -S "/omd/sites/$site/tmp/run/mkeventd/status" ]; then
|
|
echo "[\"$site\"]"
|
|
echo -e "GET status\nOutputFormat: json" |
|
|
waitmax 3 "/omd/sites/$site/bin/unixcat" "/omd/sites/$site/tmp/run/mkeventd/status"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Collect states of configured Check_MK site backup jobs
|
|
if ls /omd/sites/*/var/check_mk/backup/*.state >/dev/null 2>&1; then
|
|
echo "<<<mkbackup>>>"
|
|
for F in /omd/sites/*/var/check_mk/backup/*.state; do
|
|
SITE=${F#/*/*/*}
|
|
SITE=${SITE%%/*}
|
|
|
|
JOB_IDENT=${F%.state}
|
|
JOB_IDENT=${JOB_IDENT##*/}
|
|
|
|
if [ "$JOB_IDENT" != "restore" ]; then
|
|
echo "[[[site:$SITE:$JOB_IDENT]]]"
|
|
cat "$F"
|
|
echo
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Collect states of configured CMA backup jobs
|
|
if inpath mkbackup && ls /var/lib/mkbackup/*.state >/dev/null 2>&1; then
|
|
echo "<<<mkbackup>>>"
|
|
for F in /var/lib/mkbackup/*.state; do
|
|
JOB_IDENT=${F%.state}
|
|
JOB_IDENT=${JOB_IDENT##*/}
|
|
|
|
if [ "$JOB_IDENT" != "restore" ]; then
|
|
echo "[[[system:$JOB_IDENT]]]"
|
|
cat "$F"
|
|
echo
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Get statistics about monitored jobs. Below the job directory there
|
|
# is a sub directory per user that ran a job. That directory must be
|
|
# owned by the user so that a symlink or hardlink attack for reading
|
|
# arbitrary files can be avoided.
|
|
if pushd "$MK_VARDIR/job" >/dev/null; then
|
|
echo '<<<job>>>'
|
|
for username in *; do
|
|
if [ -d "$username" ] && cd "$username"; then
|
|
if [ $EUID -eq 0 ]; then
|
|
su -s "$SHELL" "$username" -c "head -n -0 -v *"
|
|
else
|
|
head -n -0 -v ./*
|
|
fi
|
|
cd ..
|
|
fi
|
|
done
|
|
popd >/dev/null
|
|
fi
|
|
|
|
# Gather thermal information provided e.g. by acpi
|
|
# At the moment only supporting thermal sensors
|
|
if [ -z "$IS_DOCKERIZED" ] && [ -z "$IS_LXC_CONTAINER" ] && ls /sys/class/thermal/thermal_zone* >/dev/null 2>&1; then
|
|
echo '<<<lnx_thermal:sep(124)>>>'
|
|
for F in /sys/class/thermal/thermal_zone*; do
|
|
line="${F##*/}"
|
|
if [ ! -e "$F/mode" ]; then line="${line}|-"; else line="${line}|$(cat "$F"/mode)"; fi
|
|
line="${line}|$(cat "$F"/{type,temp} | tr \\n "|")"
|
|
for G in $(ls "$F"/trip_point_*_{temp,type}); do
|
|
line="${line}$(tr <"$G" \\n "|")"
|
|
done
|
|
echo "${line%?}"
|
|
done
|
|
fi
|
|
|
|
# Libelle Business Shadow
|
|
if inpath trd; then
|
|
echo "<<<libelle_business_shadow:sep(58)>>>"
|
|
trd -s
|
|
fi
|
|
|
|
# HTTP Accelerator Statistics
|
|
if inpath varnishstat; then
|
|
echo "<<<varnish>>>"
|
|
varnishstat -1
|
|
fi
|
|
|
|
# Proxmox Cluster
|
|
if inpath pvecm; then
|
|
echo "<<<pvecm_status:sep(58)>>>"
|
|
pvecm status
|
|
echo "<<<pvecm_nodes>>>"
|
|
pvecm nodes
|
|
fi
|
|
|
|
for HAPROXY_SOCK in /run/haproxy/admin.sock /var/lib/haproxy/stats; do
|
|
if [ -r "$HAPROXY_SOCK" ] && inpath socat; then
|
|
echo "<<<haproxy:sep(44)>>>"
|
|
echo "show stat" | socat - "UNIX-CONNECT:$HAPROXY_SOCK"
|
|
fi
|
|
done
|
|
|
|
# Start new liveupdate process in background on each agent execution. Starting
|
|
# a new live update process will terminate the old one automatically after
|
|
# max. 1 sec.
|
|
if [ -e "$MK_CONFDIR/real_time_checks.cfg" ]; then
|
|
if [ -z "$REMOTE" ]; then
|
|
echo "ERROR: \$REMOTE not specified. Not starting Real-Time Checks." >&2
|
|
elif ! inpath openssl; then
|
|
echo "ERROR: openssl command is missing. Not starting Real-Time Checks." >&2
|
|
else
|
|
run_real_time_checks >/dev/null &
|
|
fi
|
|
fi
|
|
|
|
# MK's Remote Plugin Executor
|
|
if [ -e "$MK_CONFDIR/mrpe.cfg" ]; then
|
|
grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/mrpe.cfg" |
|
|
while read -r descr cmdline; do
|
|
interval=
|
|
args="-m"
|
|
# NOTE: Due to an escaping-related bug in some old bash versions
|
|
# (3.2.x), we have to use an intermediate variable for the pattern.
|
|
pattern='\(([^\)]*)\)[[:space:]](.*)'
|
|
if [[ $cmdline =~ $pattern ]]; then
|
|
parameters=${BASH_REMATCH[1]}
|
|
cmdline=${BASH_REMATCH[2]}
|
|
|
|
# split multiple parameter assignments
|
|
for par in $(echo "$parameters" | tr ":" "\n"); do
|
|
# split each assignment
|
|
key=$(echo "$par" | cut -d= -f1)
|
|
value=$(echo "$par" | cut -d= -f2)
|
|
|
|
if [ "$key" = "interval" ]; then
|
|
interval=$value
|
|
elif [ "$key" = "appendage" ]; then
|
|
args="-ma"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [ -z "$interval" ]; then
|
|
run_mrpe "$descr" "$cmdline"
|
|
else
|
|
run_cached "$args" "$descr" "$interval" "$cmdline"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# MK's runas Executor
|
|
if [ -e "$MK_CONFDIR/runas.cfg" ]; then
|
|
grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/runas.cfg" |
|
|
while read -r type user include; do
|
|
if [ -d "$include" -o \( "$type" == "mrpe" -a -f "$include" \) ]; then
|
|
PREFIX=""
|
|
if [ "$user" != "-" ]; then
|
|
PREFIX="su $user -c "
|
|
fi
|
|
|
|
# mrpe includes
|
|
if [ "$type" == "mrpe" ]; then
|
|
grep -Ev '^[[:space:]]*($|#)' "$include" |
|
|
while read -r descr cmdline; do
|
|
interval=
|
|
# NOTE: Due to an escaping-related bug in some old bash
|
|
# versions (3.2.x), we have to use an intermediate variable
|
|
# for the pattern.
|
|
pattern='\(([^\)]*)\)[[:space:]](.*)'
|
|
if [[ $cmdline =~ $pattern ]]; then
|
|
parameters=${BASH_REMATCH[1]}
|
|
cmdline=${BASH_REMATCH[2]}
|
|
|
|
# split multiple parameter assignments
|
|
for par in $(echo "$parameters" | tr ":" "\n"); do
|
|
# split each assignment
|
|
IFS='=' read -r key value <<<"${par}"
|
|
if [ "$key" = "interval" ]; then
|
|
interval=$value
|
|
# no other parameters supported currently
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [ -n "$PREFIX" ]; then
|
|
cmdline="$PREFIX'$cmdline'"
|
|
fi
|
|
if [ -z "$interval" ]; then
|
|
run_mrpe "$descr" "$cmdline"
|
|
else
|
|
run_cached -m "$descr" "$interval" "$cmdline"
|
|
fi
|
|
done
|
|
|
|
# local and plugin includes
|
|
elif [ "$type" == "local" -o "$type" == "plugin" ]; then
|
|
if [ "$type" == "local" ]; then
|
|
echo "<<<local>>>"
|
|
fi
|
|
|
|
find "$include" -executable -type f |
|
|
while read -r filename; do
|
|
if [ -n "$PREFIX" ]; then
|
|
cmdline="$PREFIX\"$filename\""
|
|
else
|
|
cmdline=$filename
|
|
fi
|
|
|
|
$cmdline
|
|
done
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
is_valid_plugin() {
|
|
# NOTE: Due to an escaping-related bug in some old bash versions
|
|
# (3.2.x), we have to use an intermediate variable for the pattern.
|
|
pattern='\.dpkg-(new|old|temp)$'
|
|
#TODO Maybe we should change this mechanism
|
|
# shellcheck disable=SC2015
|
|
[[ -f "$1" && -x "$1" && ! "$1" =~ $pattern ]] && true || false
|
|
}
|
|
|
|
# Local checks
|
|
echo '<<<local>>>'
|
|
if cd "$LOCALDIR"; then
|
|
for skript in ./*; do
|
|
if is_valid_plugin "$skript"; then
|
|
./"$skript"
|
|
fi
|
|
done
|
|
# Call some plugins only every X'th second
|
|
for skript in [1-9]*/*; do
|
|
if is_valid_plugin "$skript"; then
|
|
run_cached "local_${skript//\//\\}" "${skript%/*}" "$skript"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Plugins
|
|
if cd "$PLUGINSDIR"; then
|
|
for skript in ./*; do
|
|
if is_valid_plugin "$skript"; then
|
|
./"$skript"
|
|
fi
|
|
done
|
|
# Call some plugins only every Xth second
|
|
for skript in [1-9]*/*; do
|
|
if is_valid_plugin "$skript"; then
|
|
run_cached "plugins_${skript//\//\\}" "${skript%/*}" "$skript"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Agent output snippets created by cronjobs, etc.
|
|
if [ -d "$SPOOLDIR" ] && [ -r "$SPOOLDIR" ]; then
|
|
pushd "$SPOOLDIR" >/dev/null
|
|
now=$(date +%s)
|
|
|
|
for file in *; do
|
|
test "$file" = "*" && break
|
|
# output every file in this directory. If the file is prefixed
|
|
# with a number, then that number is the maximum age of the
|
|
# file in seconds. If the file is older than that, it is ignored.
|
|
maxage=""
|
|
part="$file"
|
|
|
|
# Each away all digits from the front of the filename and
|
|
# collect them in the variable maxage.
|
|
while [ "${part/#[0-9]/}" != "$part" ]; do
|
|
maxage=$maxage${part:0:1}
|
|
part=${part:1}
|
|
done
|
|
|
|
# If there is at least one digit, than we honor that.
|
|
if [ "$maxage" ]; then
|
|
mtime=$(stat -c %Y "$file")
|
|
if [ $((now - mtime)) -gt "$maxage" ]; then
|
|
continue
|
|
fi
|
|
fi
|
|
|
|
# Output the file
|
|
cat "$file"
|
|
done
|
|
popd >/dev/null
|
|
fi
|