[collectd] Memoy leak advice

Daniel Rowe lists at fathom13.com
Thu Jan 24 14:16:50 CET 2008


> Hi Daniel,
> On Wed, Jan 23, 2008 at 10:17:07PM +0900, Daniel Rowe wrote:
> > I enabled the logging and corrected any errors, mainly plugins that
> > where not working. There are no errors being dumped to syslog now and
> > it is still leaking memory. If I watch it with top a few bytes leak
> > away every few seconds.
> could you please provide your updated config, too? From the previous
> config it looks like I'm using almost all of the plugin you had loaded
> in production without memory loss.
> The notable exceptions are the cpufreq and perl plugins. Do you use the
> perl plugin? Can you provide the plugin written in Perl, too?
# Config file for collectd(1).
# Please read collectd.conf(5) for a list of options.
# http://collectd.org/

Hostname    "bajor.fathom13.com"
BaseDir     "/opt/collectd/var/lib/collectd"
PIDFile     "/opt/collectd/var/run/collectd.pid"
PluginDir   "/opt/collectd/lib/collectd"
TypesDB     "/opt/collectd/lib/collectd/types.db"
Interval     10
ReadThreads  5

#LoadPlugin apache
#LoadPlugin apcups
#LoadPlugin apple_sensors
#LoadPlugin battery
LoadPlugin cpu
#LoadPlugin cpufreq
#LoadPlugin csv
LoadPlugin df
LoadPlugin disk
#LoadPlugin dns
#LoadPlugin email
LoadPlugin entropy
#LoadPlugin exec
#LoadPlugin hddtemp
LoadPlugin interface
#LoadPlugin iptables
#LoadPlugin ipvs
LoadPlugin irq
LoadPlugin load
#LoadPlugin logfile
#LoadPlugin mbmon
#LoadPlugin memcached
LoadPlugin memory
#LoadPlugin multimeter
#LoadPlugin mysql
#LoadPlugin netlink
LoadPlugin network
#LoadPlugin nfs
#LoadPlugin nginx
LoadPlugin ntpd
#LoadPlugin nut
#LoadPlugin perl
#LoadPlugin ping
LoadPlugin processes
LoadPlugin rrdtool
LoadPlugin sensors
#LoadPlugin serial
#LoadPlugin snmp
LoadPlugin swap
LoadPlugin syslog
#LoadPlugin tape
LoadPlugin tcpconns
#LoadPlugin unixsock
LoadPlugin users
#LoadPlugin vserver
#LoadPlugin wireless
#LoadPlugin xmms

#<Plugin apache>
#       URL "http://localhost/status?auto"
#       User "www-user"
#       Password "secret"
#       CACert "/etc/ssl/ca.crt"

#<Plugin apcups>
#       Host "localhost"
#       Port "3551"

#<Plugin csv>
#       DataDir "/opt/collectd/var/lib/collectd/csv"

#<Plugin df>
#       Device "/dev/hda1"
#       Device ""
#       MountPoint "/home"
#       FSType "ext3"
#       IgnoreSelected false

#<Plugin dns>
#       Interface "eth0"
#       IgnoreSource ""

#<Plugin email>
#       SocketFile "/opt/collectd/var/run/collectd-email"
#       SocketGroup "collectd"
#       SocketPerms "0770"
#       MaxConns 5

#<Plugin exec>
#       Exec "user:group" "/path/to/exec"

#<Plugin hddtemp>
#       Host ""
#       Port "7634"

#<Plugin interface>
#       Interface "eth0"
#       IgnoreSelected false

#<Plugin iptables>
#       Chain table chain

#<Plugin irq>
#       Irq 7
#       Irq 8
#       Irq 9
#       IgnoreSelected true

#<Plugin logfile>
#       LogLevel info
#       File STDOUT

#<Plugin mbmon>
#       Host ""
#       Port "411"

#<Plugin memcached>
#       Host ""
#       Port "11211"

#<Plugin mysql>
#       Host "database.serv.er"
#       User "db_user"
#       Password "secret"
#       Database "db_name"

#<Plugin netlink>
#       Interface "All"
#       VerboseInterface "All"
#       QDisc "eth0" "pfifo_fast-1:0"
#       Class "ppp0" "htb-1:10"
#       Filter "ppp0" "u32-1:0"

#<Plugin network>
#       Server "ff18::efc0:4a42" "25826"
#       Server "" "25826"
#       Listen "ff18::efc0:4a42" "25826"
#       Listen "" "25826"
#       TimeToLive "128"
#       Forward false
#       CacheFlush 1800

#<Plugin nginx>
#       URL "http://localhost/status?auto"
#       User "www-user"
#       Password "secret"
#       CACert "/etc/ssl/ca.crt"

#<Plugin ntpd>
#       Host "localhost"
#       Port 123

#<Plugin nut>
#       UPS "upsname at hostname:port"

#<Plugin perl>
#       IncludeDir "/my/include/path"
#       BaseName "Collectd::Plugin"
#       LoadPlugin foo

#<Plugin ping>
#       Host "host.foo.bar"

#<Plugin processes>
#       Process "name"

#<Plugin rrdtool>
#       DataDir "/opt/collectd/var/lib/collectd/rrd"
#       CacheTimeout 120
#       CacheFlush   900

#<Plugin sensors>
#       Sensor "it8712-isa-0290/temperature-temp1"
#       Sensor "it8712-isa-0290/fanspeed-fan3"
#       Sensor "it8712-isa-0290/voltage-in8"
#       IgnoreSelected false

#<Plugin snmp>
#   <Data "powerplus_voltge_input">
#       Type "voltage"
#       Table false
#       Instance "input_line1"
#       Values "SNMPv2-SMI::enterprises.6050."
#   </Data>
#   <Data "hr_users">
#       Type "users"
#       Table false
#       Instance ""
#       Values "HOST-RESOURCES-MIB::hrSystemNumUsers.0"
#   </Data>
#   <Data "std_traffic">
#       Type "if_octets"
#       Table true
#       Instance "IF-MIB::ifDescr"
#       Values "IF-MIB::ifInOctets" "IF-MIB::ifOutOctets"
#   </Data>
#   <Host "some.switch.mydomain.org">
#       Address ""
#       Version 1
#       Community "community_string"
#       Collect "std_traffic"
#       Inverval 120
#   </Host>
#   <Host "some.server.mydomain.org">
#       Address ""
#       Version 2
#       Community "another_string"
#       Collect "std_traffic" "hr_users"
#   </Host>
#   <Host "some.ups.mydomain.org">
#       Address ""
#       Version 1
#       Community "more_communities"
#       Collect "powerplus_voltge_input"
#       Interval 300
#   </Host>

<Plugin syslog>
        LogLevel debug

#<Plugin tcpconns>
#       ListeningPorts false
#       LocalPort "25"
#       RemotePort "25"

#<Plugin unixsock>
#       SocketFile "/opt/collectd/var/run/collectd-unixsock"
#       SocketGroup "collectd"
#       SocketPerms "0660"

> > I'm pretty sure it has a memory leak. What can I do to help track it
> > down?
> Don't know about your programming background, but if you've done a fair
> amount of (C) programming you may want to try valgrind [1] to search for
> lost memory blocks.

Not a C programmer am a Oracle PL/SQL dev and database man.

With valgrind:

==19637==    by 0x6158AB9: rrd_queue_thread (rrdtool.c:363)
==19637==    by 0x33E1806406: start_thread (in /lib64/libpthread-2.7.so)
==19637==    by 0x33E0CD4B0C: clone (in /lib64/libc-2.7.so)
==19637== LEAK SUMMARY:
==19637==    definitely lost: 8,596,688 bytes in 268,646 blocks.
==19637==    indirectly lost: 91,819,207 bytes in 1,662,466 blocks.
==19637==      possibly lost: 664 bytes in 12 blocks.
==19637==    still reachable: 101,325 bytes in 754 blocks.
==19637==         suppressed: 0 bytes in 0 blocks.

> Other than that the ``divide and conquer'' testing method I've mentioned
> in my previous email should be relatively easy and effective.
I will do this at the weekend when I have a little time.

> Regards,
> -octo
> [1] <http://valgrind.org/>


More information about the collectd mailing list