> nagios
Configure Nagios for infrastructure monitoring, service checks, host monitoring, and alert notifications. Use when a user needs to set up Nagios Core, write check commands, configure host and service definitions, manage notification contacts, or create custom monitoring plugins.
curl "https://skillshub.wtf/TerminalSkills/skills/nagios?format=md"Nagios
Overview
Set up Nagios Core for infrastructure and service monitoring with host definitions, check commands, notification contacts, and custom plugins. Covers configuration, common check setups, and plugin development.
Instructions
Task A: Install and Configure Nagios
# Install Nagios Core on Ubuntu
sudo apt-get update
sudo apt-get install -y nagios4 nagios-plugins nagios-nrpe-plugin
sudo systemctl enable nagios4
sudo systemctl start nagios4
# Set admin password for web UI
sudo htpasswd -c /etc/nagios4/htpasswd.users nagiosadmin
# /etc/nagios4/nagios.cfg — Key configuration directives
cfg_dir=/etc/nagios4/conf.d
cfg_dir=/etc/nagios4/servers
log_file=/var/log/nagios4/nagios.log
command_file=/var/nagios4/rw/nagios.cmd
check_result_path=/var/nagios4/spool/checkresults
status_update_interval=10
check_external_commands=1
enable_notifications=1
execute_service_checks=1
Task B: Define Hosts and Services
# /etc/nagios4/servers/web-servers.cfg — Web server host definitions
define host {
use linux-server
host_name web-01
alias Web Server 01
address 192.168.1.10
max_check_attempts 5
check_period 24x7
notification_interval 30
notification_period 24x7
contacts platform-team
hostgroups web-servers
}
define host {
use linux-server
host_name web-02
alias Web Server 02
address 192.168.1.11
max_check_attempts 5
check_period 24x7
notification_interval 30
notification_period 24x7
contacts platform-team
hostgroups web-servers
}
define hostgroup {
hostgroup_name web-servers
alias Web Servers
members web-01,web-02
}
# /etc/nagios4/servers/web-services.cfg — Service check definitions
define service {
use generic-service
hostgroup_name web-servers
service_description HTTP
check_command check_http!-p 80 -e "200,301"
max_check_attempts 3
check_interval 2
retry_interval 1
notification_interval 15
contacts platform-team
}
define service {
use generic-service
hostgroup_name web-servers
service_description HTTPS Certificate
check_command check_http!-S -p 443 -C 30
check_interval 360
notification_interval 60
contacts platform-team
}
define service {
use generic-service
hostgroup_name web-servers
service_description Disk Usage
check_command check_nrpe!check_disk
max_check_attempts 3
check_interval 10
contacts platform-team
}
define service {
use generic-service
hostgroup_name web-servers
service_description CPU Load
check_command check_nrpe!check_load
check_interval 5
contacts platform-team
}
define service {
use generic-service
hostgroup_name web-servers
service_description Memory Usage
check_command check_nrpe!check_mem
check_interval 5
contacts platform-team
}
Task C: Configure Commands and NRPE
# /etc/nagios4/conf.d/commands.cfg — Custom check commands
define command {
command_name check_nrpe
command_line /usr/lib/nagios/plugins/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 30
}
define command {
command_name check_http_content
command_line /usr/lib/nagios/plugins/check_http -H $HOSTADDRESS$ -p $ARG1$ -u $ARG2$ -s "$ARG3$"
}
define command {
command_name check_postgres
command_line /usr/lib/nagios/plugins/check_pgsql -H $HOSTADDRESS$ -d $ARG1$ -l $ARG2$
}
# /etc/nagios/nrpe.cfg — NRPE configuration on remote hosts
server_address=0.0.0.0
allowed_hosts=192.168.1.5
dont_blame_nrpe=0
command[check_disk]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /
command[check_load]=/usr/lib/nagios/plugins/check_load -w 5,4,3 -c 10,8,6
command[check_mem]=/usr/lib/nagios/plugins/check_mem.pl -w 80 -c 90 -f
command[check_procs]=/usr/lib/nagios/plugins/check_procs -w 250 -c 400
command[check_swap]=/usr/lib/nagios/plugins/check_swap -w 20% -c 10%
Task D: Notification Contacts
# /etc/nagios4/conf.d/contacts.cfg — Contact and notification definitions
define contact {
contact_name marta
alias Marta (Platform Lead)
email marta@example.com
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,u,r
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
}
define contactgroup {
contactgroup_name platform-team
alias Platform Engineering Team
members marta,tom,nina
}
define command {
command_name notify-service-by-slack
command_line /usr/local/bin/nagios-slack-notify.sh "$NOTIFICATIONTYPE$" "$SERVICEDESC$" "$HOSTALIAS$" "$SERVICESTATE$" "$SERVICEOUTPUT$"
}
Task E: Custom Plugin
#!/bin/bash
# /usr/lib/nagios/plugins/check_api_health — Custom API health check plugin
# Usage: check_api_health -u <url> -w <warn_ms> -c <crit_ms>
URL=""
WARN=1000
CRIT=3000
while getopts "u:w:c:" opt; do
case $opt in
u) URL="$OPTARG" ;;
w) WARN="$OPTARG" ;;
c) CRIT="$OPTARG" ;;
esac
done
START=$(date +%s%N)
RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$URL")
END=$(date +%s%N)
DURATION=$(( (END - START) / 1000000 ))
if [ "$RESPONSE" != "200" ]; then
echo "CRITICAL - HTTP $RESPONSE from $URL | response_time=${DURATION}ms"
exit 2
elif [ "$DURATION" -gt "$CRIT" ]; then
echo "CRITICAL - Response time ${DURATION}ms > ${CRIT}ms | response_time=${DURATION}ms"
exit 2
elif [ "$DURATION" -gt "$WARN" ]; then
echo "WARNING - Response time ${DURATION}ms > ${WARN}ms | response_time=${DURATION}ms"
exit 1
else
echo "OK - Response time ${DURATION}ms | response_time=${DURATION}ms"
exit 0
fi
# Verify configuration before reloading
sudo nagios4 -v /etc/nagios4/nagios.cfg
sudo systemctl reload nagios4
Best Practices
- Always run
nagios -vto verify config before reloading to prevent outages - Use NRPE for remote checks that need local access (disk, CPU, memory)
- Set
max_check_attempts> 1 to avoid alerting on transient failures - Use hostgroups and servicegroups to apply checks to multiple hosts at once
- Output performance data (
| metric=value) from plugins for graphing integration - Use
check_intervalandretry_intervalto balance monitoring granularity with load
> related_skills --same-repo
> zustand
You are an expert in Zustand, the small, fast, and scalable state management library for React. You help developers manage global state without boilerplate using Zustand's hook-based stores, selectors for performance, middleware (persist, devtools, immer), computed values, and async actions — replacing Redux complexity with a simple, un-opinionated API in under 1KB.
> zoho
Integrate and automate Zoho products. Use when a user asks to work with Zoho CRM, Zoho Books, Zoho Desk, Zoho Projects, Zoho Mail, or Zoho Creator, build custom integrations via Zoho APIs, automate workflows with Deluge scripting, sync data between Zoho apps and external systems, manage leads and deals, automate invoicing, build custom Zoho Creator apps, set up webhooks, or manage Zoho organization settings. Covers Zoho CRM, Books, Desk, Projects, Creator, and cross-product integrations.
> zod
You are an expert in Zod, the TypeScript-first schema declaration and validation library. You help developers define schemas that validate data at runtime AND infer TypeScript types at compile time — eliminating the need to write types and validators separately. Used for API input validation, form validation, environment variables, config files, and any data boundary.
> zipkin
Deploy and configure Zipkin for distributed tracing and request flow visualization. Use when a user needs to set up trace collection, instrument Java/Spring or other services with Zipkin, analyze service dependencies, or configure storage backends for trace data.