[collectd] Collectd and FreeBSD restart behavior + df and zfs file system.
Day, David
dday at redcom.com
Wed Feb 3 21:26:14 CET 2016
Hi,
Some observations from the trenches:
Please file, post and/or suggest redirection.
Overall pleased with the tool and it's flexibility.
I am using collectd V5.5 under FreeBSD V10.2.
When ever I "sudo service collectd restart" I observe the following 2 behaviors.
1.) collectd core dumps.
2016-02-03T19:49:02.913918-05:00 alabama kern info kernel:pid 80434 (collectd), uid 0: exited on signal 11 (core dumped) ( Actually this happens as well if the service command is simply "stop"
2.) As the restart occurs on just about every plugin I have configured there will be an rrdtool complaint in the syslog "illegal attempt to update ....."
e.g.
2016-02-03T19:48:02.694521-05:00 alabama daemon warning collectd[80434]: rrdtool plugin: rrd_update_r (/var/db/collectd/rrd/localhost/memory/memory-active.rrd) failed: /var/db/collectd/rrd/localhost/memory/memory-active.rrd: illegal attempt to update using time 1454546882 when last update time is 1454546882 (minimum one second step)
2016-02-03T19:48:02.695166-05:00 alabama daemon warning collectd[80434]: rrdtool plugin: rrd_update_r (/var/db/collectd/rrd/localhost/memory/memory-cache.rrd) failed: /var/db/collectd/rrd/localhost/memory/memory-cache.rrd: illegal attempt to update using time 1454546882 when last update time is 1454546882 (minimum one second step)
2016-02-03T19:48:12.671130-05:00 alabama daemon warning collectd[80434]: rrdtool plugin: rrd_update_r (/var/db/collectd/rrd/localhost/load/load.rrd) failed: /var/db/collectd/rrd/localhost/load/load.rrd: illegal attempt to update using time 1454546882 when last update time is 1454546882 (minimum one second step)
My work-around for this is to avoid the service collectd restart option.
Something like the following avoids the second observation. The sleep step is critical.
service collectd stop;sleep 2; service collectd start.
I suspect this may actually be more of a matter for the FreeBSD mailing lists?
As a separate application matter I found that the df plugin did a poor job of dealing with a zfs filesystem in calculating used capacity and reporting to the threshold plugin. I ended up writing an exec extension that used zpool to emulate what I was hoping df would generate for me.
Best Regards
Dave Day
For completeness here s is my collectd.conf file.
cat /usr/local/etc/collectd.conf
#
# Config file for collectd(1).
# Please read collectd.conf(5) for a list of options.
# http://collectd.org/
##############################################################################
# Global #
#----------------------------------------------------------------------------#
# Global settings for the daemon. #
Hostname "localhost"
FQDNLookup true
BaseDir "/var/db/collectd"
PIDFile "/var/run/collectd.pid"
TypesDB "/usr/local/share/collectd/types.db"
#----------------------------------------------------------------------------#
# When enabled, plugins are loaded automatically with the default options #
# when an appropriate <Plugin ...> block is encountered. #
# Disabled by default. #
#----------------------------------------------------------------------------#
#AutoLoadPlugin false
#----------------------------------------------------------------------------#
# Interval at which to query values. This may be overwritten on a per-plugin #
# base by using the 'Interval' option of the LoadPlugin block: #
# <LoadPlugin foo> #
# Interval 60 #
# </LoadPlugin> #
#----------------------------------------------------------------------------#
#Interval 10
#Timeout 2
#ReadThreads 5
#WriteThreads 5
# Limit the size of the write queue. Default is no limit. Setting up a limit is
# recommended for servers handling a high volume of traffic.
#WriteQueueLimitHigh 1000000
#WriteQueueLimitLow 800000
# config file assumes collectd V5.5
#A value is identified by a unique name, which we usually call The "identifier" consists of five parts, two of which are optional:
#. host "/" plugin ["-" plugin instance] "/" type ["-" type instance]
# [/var/db/collectd/rrd/]localhost/cpu-0/cpu-idle
##############################################################################
# Logging #
#----------------------------------------------------------------------------#
# Plugins which provide logging functions should be loaded first, so log #
# messages generated when loading or configuring other plugins can be #
# accessed. #
##############################################################################
LoadPlugin syslog
<plugin syslog>
LogLevel warning
NotifyLevel "OKAY"
</plugin>
LoadPlugin cpu
LoadPlugin unixsock
<Plugin cpu>
ReportByState true # separate database for time of system,idle,user,nice,interrupt
ReportByCpu false # aggregate across cores.
ValuesPercentage true
</Plugin>
LoadPlugin exec
<Plugin exec>
Exec "www:www" "/usr/local/sbin/collectd_zpool"
Exec "www:www" "/usr/local/sbin/collectd_coretemp"
Exec "www:www" "/usr/local/sbin/collectd_smart.sh"
Exec "www:www" "/usr/local/sbin/collectd_superio.sh"
</Plugin>
LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin match_regex
LoadPlugin postgresql
<plugin postgresql>
<Query active_calls>
Statement "SELECT COALESCE((SELECT COUNT(*) FROM log.pep_connection),0) as active_calls;"
<Result>
Type gauge
InstancePrefix "active_calls"
ValuesFrom active_calls
</Result>
</Query>
<Database ace_db>
Host "localhost"
Port "5432"
User "collectd"
Query active_calls
</Database>
</plugin>
LoadPlugin processes
LoadPlugin rrdtool
<plugin rrdtool>
DataDir "/var/db/collectd/rrd"
</plugin>
LoadPlugin swap
LoadPlugin threshold
LoadPlugin tail
# count the msgs per facilitiy at warning or above.
<plugin "tail">
<File "/var/log/messages">
Instance "messages"
<Match>
# localhost/tail-messages/counter-ace
Regex "local1.(err|warn|alert|crit)"
DSType "CounterInc"
Type "counter"
Instance "ace"
</Match>
<Match>
Regex "local0.(err|warn|alert|crit)"
ExcludeRegex "smdr:"
DSType "CounterInc"
Type "counter"
Instance "postgres"
</Match>
<Match>
Regex "local4.(err|warn|alert|crit)"
DSType "CounterInc"
Type "counter"
Instance "mec"
</Match>
<Match>
Regex "local5.(err|warn|alert|crit)"
DSType "CounterInc"
Type "counter"
Instance "web"
</Match>
<Match>
Regex "(local6|local7).(err|warn|alert|crit)"
DSType "CounterInc"
Type "counter"
Instance "apache"
</Match>
<Match>
Regex "^.*$"
ExcludeRegex " local[0-7] "
DSType "CounterInc"
Type "counter"
Instance "os"
</Match>
</File>
</plugin>
<Plugin unixsock>
SocketFile "/var/run/collectd-unixsock"
SocketPerms "0660"
DeleteSocket false
</Plugin>
#https://collectd.org/documentation/manpages/collectd-threshold.5.shtml
<Plugin "threshold">
<plugin "cpu">
<type "percent">
Instance "idle"
WarningMin 50
FailureMin 5
Hits 3
</type>
</plugin>
<Plugin "memory">
<Type "memory">
Instance "free"
WarningMin 100000000
</Type>
</Plugin>
<Plugin "swap">
<Type "swap">
Instance "free"
WarningMin 100000000
</Type>
</Plugin>
<plugin "load">
<type "load">
DataSource "midterm"
FailureMax 4
Hits 3
Hysteresis 3
</type>
</plugin>
<Plugin "tail">
Instance "messages"
<type "counter">
Instance "os"
WarningMax 5
</type>
<type "counter">
Instance "ace"
WarningMax 5
</type>
<type "counter">
Instance "apache"
WarningMax 5
</type>
<type "counter">
Instance "web"
WarningMax 5
</type>
<type "counter">
Instance "postgres"
WarningMax 5
</type>
<type "counter">
Instance "mec"
WarningMax 5
</type>
</Plugin>
<plugin "exec">
Instance "coretemp"
<type "temperature">
Instance "avg"
WarningMax 80
</type>
</plugin>
<plugin "exec">
Instance "zpool"
<type "gauge">
Instance "val"
WarningMax 75
FailureMax 90
</type>
</plugin>
<plugin "exec">
Instance "superio"
<type "temperature">
Instance "cpu"
WarningMax 80
</type>
<type "temperature">
Instance "system"
WarningMax 80
</type>
</plugin>
</Plugin>
More information about the collectd
mailing list