I’ve been very unhappy with the state of network monitoring applications lately. Most network monitoring tools are either too big or too arbitrary to be helpful for application support. This can be an issue when focusing on a specific application with components separated into various tiers, datacenters or locations. When network performance is in question the most helpful data is the active latency between a node and its other components (during or leading up to the time in question). If a network monitoring tool lacks any specificity of the application in question it will be viewed as too dense or cerebral to be useful; at worse it will harm troubleshooting. The quicker hard data can be accessed proving out the network layer, the quicker troubleshooting can move up the stack towards resolution.
Monolithic tools like Cacti are sometimes useful, however the lighter the script is, the more nimbly it can be deployed on a wide variety of nodes. Because both FPing and RRDTool are small, useful and standard Linux packages they are ideal, so l wrote the following bash script that leverages only those 2 tools together. The data collected is roughly identical to SmokePing but has the benefit of not dirtying a system with unnecessary packages. The script can easily be deployed by any devops deployment and is ran via crontab. Graph data can be created when or if they are needed.
fping_rrd.sh: Bourne-Again shell script, ASCII text executable
#!/usr/bin/env bash ## FPing data collector for RRDTOOL # # Crontab: # */5 * * * * fping_rrd.sh # ## Requires: fping, rrdtool ## 2021 - Script from www.davideaves.com # Enable for debuging #set -x STEP=300 # 5min PINGS=20 # 20 pings # The first ping is usually an outlier; adding an extra ping to drop the first result. fping_opts="-C $((PINGS+1)) -q -B1 -r1 -i10" fping_hosts="172.31.3.1 172.31.3.3 172.31.4.1 172.31.4.10 172.31.15.1 172.31.15.4" rrd_path="/var/lib/fping" rrd_timestamp=$(date +%s) calc_median() { awk '{ if ( $1 != "-" ) { fping[NR] = $1 } else { NR-- } } END { asort(fping); if (NR % 2) { print fping[(NR + 1) / 2] } else { print (fping[(NR / 2)] + fping[(NR / 2) + 1]) / 2.0 } }' } rrd_create() { rrdtool create "${fping_rrd}" \ --start now-2h --step $((STEP)) \ DS:loss:GAUGE:$((STEP*2)):0:$((PINGS)) \ DS:median:GAUGE:$((STEP*2)):0:180 \ $(seq -f " DS:ping%g:GAUGE:$((STEP*2)):0:180" 1 $((PINGS++))) \ RRA:AVERAGE:0.5:1:1008 \ RRA:AVERAGE:0.5:12:4320 \ RRA:MIN:0.5:12:4320 \ RRA:MAX:0.5:12:4320 \ RRA:AVERAGE:0.5:144:720 \ RRA:MAX:0.5:144:720 \ RRA:MIN:0.5:144:720 } rrd_update() { rrd_loss=0 rrd_median="" rrd_rev=$((PINGS)) rrd_name="" rrd_value="${rrd_timestamp}" for rrd_idx in $(seq 1 $((rrd_rev))) do rrd_name="${rrd_name}$([[ ${rrd_idx} -gt "1" ]] && echo ":")ping$((rrd_idx))" rrd_value="${rrd_value}:${fping_array[-$((rrd_rev))]}" rrd_median="${fping_array[-$((rrd_rev))]}\n${rrd_median}" [ "${fping_array[-$((rrd_rev))]}" == "-" ] && (( rrd_loss++ )) (( rrd_rev-- )) done rrd_median=$(printf ${rrd_median} | calc_median) rrdtool update "${fping_rrd}" --template $(echo ${rrd_name}:median:loss ${rrd_value}:${rrd_median}:${rrd_loss} | sed 's/-/U/g') unset rrd_loss rrd_median rrd_rev rrd_name rrd_value } fping ${fping_opts} ${fping_hosts} 2>&1 | while read fping_line; do fping_array=( ${fping_line} ) fping_rrd="${rrd_path}/fping_${fping_array[0],,}.rrd" # Create RRD file. if [ ! -f "${fping_rrd}" ] then rrd_create fi # Update RRD file. if [ -f "${fping_rrd}" ] then rrd_last=$(( ${rrd_timestamp} - $(rrdtool last "${fping_rrd}") )) [[ $((rrd_last)) -ge $((STEP)) ]] && rrd_update fi && unset rrd_last done |
Creating Network Monitoring Graphs
The following are 3 example scripts that use rrdtool to create graphs from the RRD files.
Mini Graph
graph_mini.sh: Bourne-Again shell script, ASCII text executable
#!/usr/bin/env bash ## Create a mini graph from a RRD file ## Requires: rrdtool ## 2021 - Script from www.davideaves.com # Enable for debuging #set -x fping_rrd="${1}" COLOR=( "FF5500" ) rrd_graph_cmd() { cat << EOF rrdtool graph "$(basename ${fping_rrd%.*})_mini.png" --start "${START}" --end "${END}" --title "$(date -d "${START}") ($(awk -v TIME=$TIME 'BEGIN {printf "%.1f hr", TIME/3600}'))" --height 65 --width 600 --vertical-label "Seconds" --color BACK#F3F3F3 --color CANVAS#FDFDFD --color SHADEA#CBCBCB --color SHADEB#999999 --color FONT#000000 --color AXIS#2C4D43 --color ARROW#2C4D43 --color FRAME#2C4D43 --border 1 --font TITLE:10:"Arial" --font AXIS:8:"Arial" --font LEGEND:8:"Courier" --font UNIT:8:"Arial" --font WATERMARK:6:"Arial" --imgformat PNG EOF } rrd_graph_opts() { rrd_idx=0 cat << EOF DEF:median$((rrd_idx))="${fping_rrd}":median:AVERAGE DEF:loss$((rrd_idx))="${fping_rrd}":loss:AVERAGE $(for ((i=1;i<=PINGS;i++)); do echo "DEF:ping$((rrd_idx))p$((i))=\"${fping_rrd}\":ping$((i)):AVERAGE"; done) CDEF:ploss$((rrd_idx))=loss$((rrd_idx)),20,/,100,* CDEF:dm$((rrd_idx))=median$((rrd_idx)),0,100000,LIMIT $(for ((i=1;i<=PINGS;i++)); do echo "CDEF:p$((rrd_idx))p$((i))=ping$((rrd_idx))p$((i)),UN,0,ping$((rrd_idx))p$((i)),IF"; done) $(echo -n "CDEF:pings$((rrd_idx))=$((PINGS)),p$((rrd_idx))p1,UN"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),UN,+"; done; echo ",-") $(echo -n "CDEF:m$((rrd_idx))=p$((rrd_idx))p1"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),+"; done; echo ",pings$((rrd_idx)),/") $(echo -n "CDEF:sdev$((rrd_idx))=p$((rrd_idx))p1,m$((rrd_idx)),-,DUP,*"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),m$((rrd_idx)),-,DUP,*,+"; done; echo ",pings$((rrd_idx)),/,SQRT") CDEF:dmlow$((rrd_idx))=dm$((rrd_idx)),sdev$((rrd_idx)),2,/,- CDEF:s2d$((rrd_idx))=sdev$((rrd_idx)) AREA:dmlow$((rrd_idx)) AREA:s2d$((rrd_idx))#${COLOR}30:STACK LINE1:dm$((rrd_idx))#${COLOR}:"$(basename ${fping_rrd%.*} | awk -F'_' '{print $NF}')\t" VDEF:avmed$((rrd_idx))=median$((rrd_idx)),AVERAGE VDEF:avsd$((rrd_idx))=sdev$((rrd_idx)),AVERAGE CDEF:msr$((rrd_idx))=median$((rrd_idx)),POP,avmed$((rrd_idx)),avsd$((rrd_idx)),/ VDEF:avmsr$((rrd_idx))=msr$((rrd_idx)),AVERAGE GPRINT:avmed$((rrd_idx)):"Median RTT\: %5.2lfms" GPRINT:ploss$((rrd_idx)):AVERAGE:"Loss\: %5.1lf%%" GPRINT:avsd$((rrd_idx)):"Std Dev\: %5.2lfms" GPRINT:avmsr$((rrd_idx)):"Ratio\: %5.1lfms\\j" COMMENT:"Probe\: $((PINGS)) pings every $((STEP)) seconds" COMMENT:"${fping_rrd}\\j" EOF } if [ ! -r "${fping_rrd}" ] then printf "${0} \"file.rrd\"\n" else STEP=$(rrdtool info "${fping_rrd}" | awk '/^step/{print $NF}') PINGS=$(rrdtool info "${fping_rrd}" | awk '/^ds.ping.*index/{count++} END{print count}') START="$([ -z "${2}" ] && echo "-9 hours" || echo "${2}")" END="$([ -z "${3}" ] && echo "now" || echo "${3}")" TIME=$(( $(date -d "${END}" +%s) - $(date -d "${START}" +%s) )) eval $(rrd_graph_cmd; rrd_graph_opts) fi |
Combined (multi) Graph
graph_multi.sh: Bourne-Again shell script, ASCII text executable
#!/usr/bin/env bash ## Create a mini graph from multiple RRDs ## Requires: rrdtool ## 2021 - Script from www.davideaves.com # Enable for debuging #set -x START="-9 hours" END="now" png_file="${1}" rrd_files="${*:2}" rrd_graph_cmd() { cat << EOF rrdtool graph "${png_file}" --start "${START}" --end "${END}" --title "$(date -d "${START}") ($(awk -v TIME=$TIME 'BEGIN {printf "%.1f hr", TIME/3600}'))" --height 115 --width 600 --vertical-label "Seconds" --color BACK#F3F3F3 --color CANVAS#FDFDFD --color SHADEA#CBCBCB --color SHADEB#999999 --color FONT#000000 --color AXIS#2C4D43 --color ARROW#2C4D43 --color FRAME#2C4D43 --border 1 --font TITLE:10:"Arial" --font AXIS:8:"Arial" --font LEGEND:8:"Courier" --font UNIT:8:"Arial" --font WATERMARK:6:"Arial" --imgformat PNG EOF } rrd_graph_opts() { rrd_idx=0 for fping_rrd in ${rrd_files} do COLOR=$(openssl rand -hex 3) STEP=$(rrdtool info "${fping_rrd}" | awk '/^step/{print $NF}') PINGS=$(rrdtool info "${fping_rrd}" | awk '/^ds.ping.*index/{count++} END{print count}') cat << EOF DEF:median$((rrd_idx))="${fping_rrd}":median:AVERAGE DEF:loss$((rrd_idx))="${fping_rrd}":loss:AVERAGE $(for ((i=1;i<=PINGS;i++)); do echo "DEF:ping$((rrd_idx))p$((i))=\"${fping_rrd}\":ping$((i)):AVERAGE"; done) CDEF:ploss$((rrd_idx))=loss$((rrd_idx)),20,/,100,* CDEF:dm$((rrd_idx))=median$((rrd_idx)),0,100000,LIMIT $(for ((i=1;i<=PINGS;i++)); do echo "CDEF:p$((rrd_idx))p$((i))=ping$((rrd_idx))p$((i)),UN,0,ping$((rrd_idx))p$((i)),IF"; done) $(echo -n "CDEF:pings$((rrd_idx))=$((PINGS)),p$((rrd_idx))p1,UN"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),UN,+"; done; echo ",-") $(echo -n "CDEF:m$((rrd_idx))=p$((rrd_idx))p1"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),+"; done; echo ",pings$((rrd_idx)),/") $(echo -n "CDEF:sdev$((rrd_idx))=p$((rrd_idx))p1,m$((rrd_idx)),-,DUP,*"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),m$((rrd_idx)),-,DUP,*,+"; done; echo ",pings$((rrd_idx)),/,SQRT") CDEF:dmlow$((rrd_idx))=dm$((rrd_idx)),sdev$((rrd_idx)),2,/,- CDEF:s2d$((rrd_idx))=sdev$((rrd_idx)) AREA:dmlow$((rrd_idx)) AREA:s2d$((rrd_idx))#${COLOR}30:STACK LINE1:dm$((rrd_idx))#${COLOR}:"$(basename ${fping_rrd%.*} | awk -F'_' '{print $NF}')\t" VDEF:avmed$((rrd_idx))=median$((rrd_idx)),AVERAGE VDEF:avsd$((rrd_idx))=sdev$((rrd_idx)),AVERAGE CDEF:msr$((rrd_idx))=median$((rrd_idx)),POP,avmed$((rrd_idx)),avsd$((rrd_idx)),/ VDEF:avmsr$((rrd_idx))=msr$((rrd_idx)),AVERAGE GPRINT:avmed$((rrd_idx)):"Median RTT\: %5.2lfms" GPRINT:ploss$((rrd_idx)):AVERAGE:"Loss\: %5.1lf%%" GPRINT:avsd$((rrd_idx)):"Std Dev\: %5.2lfms" GPRINT:avmsr$((rrd_idx)):"Ratio\: %5.1lfms\\j" EOF (( rrd_idx++ )) done && unset rrd_idx } if [ -z "${rrd_files}" ] then printf "${0} \"file.png\" { file1.rrd ... file6.rrd }\n" else TIME=$(( $(date -d "${END}" +%s) - $(date -d "${START}" +%s) )) eval $(rrd_graph_cmd; rrd_graph_opts) fi |
SmokePing like Graph
graph_smoke.sh: Bourne-Again shell script, ASCII text executable
#!/usr/bin/env bash ## Create a SmokePing like graph from a RRD file ## Requires: rrdtool ## 2021 - Script from www.davideaves.com # Enable for debuging #set -x fping_rrd="${1}" COLOR=( "0F0f00" "00FF00" "00BBFF" "0022FF" "8A2BE2" "FA0BE2" "C71585" "FF0000" ) LINE=".5" rrd_graph_cmd() { cat << EOF rrdtool graph "$(basename ${fping_rrd%.*})_smoke.png" --start "${START}" --end "${END}" --title "$(basename ${fping_rrd%.*} | awk -F'_' '{print $NF}')" --height 95 --width 600 --vertical-label "Seconds" --color BACK#F3F3F3 --color CANVAS#FDFDFD --color SHADEA#CBCBCB --color SHADEB#999999 --color FONT#000000 --color AXIS#2C4D43 --color ARROW#2C4D43 --color FRAME#2C4D43 --border 1 --font TITLE:10:"Arial" --font AXIS:8:"Arial" --font LEGEND:9:"Courier" --font UNIT:8:"Arial" --font WATERMARK:7:"Arial" --imgformat PNG EOF } rrd_graph_opts() { cat << EOF DEF:median$((rrd_idx))="${fping_rrd}":median:AVERAGE DEF:loss$((rrd_idx))="${fping_rrd}":loss:AVERAGE $(for ((i=1;i<=PINGS;i++)); do echo "DEF:ping$((rrd_idx))p$((i))=\"${fping_rrd}\":ping$((i)):AVERAGE"; done) CDEF:ploss$((rrd_idx))=loss$((rrd_idx)),20,/,100,* CDEF:dm$((rrd_idx))=median$((rrd_idx)),0,100000,LIMIT $(for ((i=1;i<=PINGS;i++)); do echo "CDEF:p$((rrd_idx))p$((i))=ping$((rrd_idx))p$((i)),UN,0,ping$((rrd_idx))p$((i)),IF"; done) $(echo -n "CDEF:pings$((rrd_idx))=$((PINGS)),p$((rrd_idx))p1,UN"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),UN,+"; done; echo ",-") $(echo -n "CDEF:m$((rrd_idx))=p$((rrd_idx))p1"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),+"; done; echo ",pings$((rrd_idx)),/") $(echo -n "CDEF:sdev$((rrd_idx))=p$((rrd_idx))p1,m$((rrd_idx)),-,DUP,*"; for ((i=2;i<=PINGS;i++)); do echo -n ",p$((rrd_idx))p$((i)),m$((rrd_idx)),-,DUP,*,+"; done; echo ",pings$((rrd_idx)),/,SQRT") CDEF:dmlow$((rrd_idx))=dm$((rrd_idx)),sdev$((rrd_idx)),2,/,- CDEF:s2d$((rrd_idx))=sdev$((rrd_idx)) AREA:dmlow$((rrd_idx)) AREA:s2d$((rrd_idx))#${COLOR[0]}30:STACK \ VDEF:avmed$((rrd_idx))=median$((rrd_idx)),AVERAGE VDEF:avsd$((rrd_idx))=sdev$((rrd_idx)),AVERAGE CDEF:msr$((rrd_idx))=median$((rrd_idx)),POP,avmed$((rrd_idx)),avsd$((rrd_idx)),/ VDEF:avmsr$((rrd_idx))=msr$((rrd_idx)),AVERAGE LINE3:avmed$((rrd_idx))#${COLOR[1]}15: \ COMMENT:"\t\t" COMMENT:"Average" COMMENT:"Maximum" COMMENT:"Minimum" COMMENT:"Current" COMMENT:"Std Dev" COMMENT:" \\j" \ COMMENT:"Median RTT\:\t" GPRINT:avmed$((rrd_idx)):"%.2lf" GPRINT:median$((rrd_idx)):MAX:"%.2lf" GPRINT:median$((rrd_idx)):MIN:"%.2lf" GPRINT:median$((rrd_idx)):LAST:"%.2lf" GPRINT:avsd$((rrd_idx)):"%.2lf" COMMENT:" \\j" \ COMMENT:"Packet Loss\:\t" GPRINT:ploss$((rrd_idx)):AVERAGE:"%.2lf%%" GPRINT:ploss$((rrd_idx)):MAX:"%.2lf%%" GPRINT:ploss$((rrd_idx)):MIN:"%.2lf%%" GPRINT:ploss$((rrd_idx)):LAST:"%.2lf%%" COMMENT:" - " COMMENT:" \\j" \ COMMENT:"Loss Colors\:\t" CDEF:me0=loss$((rrd_idx)),-1,GT,loss$((rrd_idx)),0,LE,*,1,UNKN,IF,median$((rrd_idx)),* CDEF:meL0=me0,${LINE},- CDEF:meH0=me0,0,*,${LINE},2,*,+ AREA:meL0 STACK:meH0#${COLOR[1]}:" 0/$((PINGS))" CDEF:me1=loss$((rrd_idx)),0,GT,loss$((rrd_idx)),1,LE,*,1,UNKN,IF,median$((rrd_idx)),* CDEF:meL1=me1,${LINE},- CDEF:meH1=me1,0,*,${LINE},2,*,+ AREA:meL1 STACK:meH1#${COLOR[2]}:" 1/$((PINGS))" CDEF:me2=loss$((rrd_idx)),1,GT,loss$((rrd_idx)),2,LE,*,1,UNKN,IF,median$((rrd_idx)),* CDEF:meL2=me2,${LINE},- CDEF:meH2=me2,0,*,${LINE},2,*,+ AREA:meL2 STACK:meH2#${COLOR[3]}:" 2/$((PINGS))" CDEF:me3=loss$((rrd_idx)),2,GT,loss$((rrd_idx)),3,LE,*,1,UNKN,IF,median$((rrd_idx)),* CDEF:meL3=me3,${LINE},- CDEF:meH3=me3,0,*,${LINE},2,*,+ AREA:meL3 STACK:meH3#${COLOR[4]}:" 3/$((PINGS))" CDEF:me4=loss$((rrd_idx)),3,GT,loss$((rrd_idx)),4,LE,*,1,UNKN,IF,median$((rrd_idx)),* CDEF:meL4=me4,${LINE},- CDEF:meH4=me4,0,*,${LINE},2,*,+ AREA:meL4 STACK:meH4#${COLOR[5]}:" 4/$((PINGS))" CDEF:me10=loss$((rrd_idx)),4,GT,loss$((rrd_idx)),10,LE,*,1,UNKN,IF,median$((rrd_idx)),* CDEF:meL10=me10,${LINE},- CDEF:meH10=me10,0,*,${LINE},2,*,+ AREA:meL10 STACK:meH10#${COLOR[6]}:"10/$((PINGS))" CDEF:me19=loss$((rrd_idx)),10,GT,loss$((rrd_idx)),19,LE,*,1,UNKN,IF,median$((rrd_idx)),* CDEF:meL19=me19,${LINE},- CDEF:meH19=me19,0,*,${LINE},2,*,+ AREA:meL19 STACK:meH19#${COLOR[7]}:"19/$((PINGS))\\j" \ COMMENT:"Probe\: $((PINGS)) pings every $((STEP)) seconds" COMMENT:"$(date -d "${START}" | sed 's/\:/\\\:/g') ($(awk -v TIME=$TIME 'BEGIN {printf "%.1f hr", TIME/3600}'))\\j" EOF } if [ ! -r "${fping_rrd}" ] then printf "${0} \"file.rrd\"\n" else STEP=$(rrdtool info "${fping_rrd}" | awk '/^step/{print $NF}') PINGS=$(rrdtool info "${fping_rrd}" | awk '/^ds.ping.*index/{count++} END{print count}') START="$([ -z "${2}" ] && echo "-7 hours" || echo "${2}")" END="$([ -z "${3}" ] && echo "now" || echo "${3}")" TIME=$(( $(date -d "${END}" +%s) - $(date -d "${START}" +%s) )) eval $(rrd_graph_cmd; rrd_graph_opts) fi |