[collectd] Bug#535787: collectd: powerdns monitoring hangs on the pdns socket (pdns 2.9.22)

thomas morgan tm at iprog.com
Thu Aug 20 06:46:39 CEST 2009


Hi.

On Aug 19, 2009, at 3:18 PM, Sebastian Harl wrote:

> Hi again,
>
> On Sat, Jul 04, 2009 at 08:29:49PM -0600, thomas morgan wrote:
>> There is some kind of incompatibility between collectd and powerdns
>> 2.9.22. With powerdns support enabled in collectd, collectd connects
>> to powerdns's unix socket. The connection hangs indefinitely.
> […]
>> collectd will no longer even shutdown and must be kill -9'd. It  
>> render
>> powerdns's control socket useless as well, so it can no longer be
>> managed either (until collectd is killed).
>>
>> Things work as expected with powerdns 2.9.21.2. I tried collectd  
>> 4.6.2
>> also, with the same problem.
>
> On Mon, Jul 27, 2009 at 05:36:37PM -0700, Luke Heberling wrote:
>> OK, two problems.
>> Problem number one seems to have been introduced with the  
>> LocalSocket option.
>> In fact, that config value is ignored in favor of its key, the  
>> literal
>> "LocalSocket". So the plugin creates the socket as "LocalSocket",  
>> connects to
>> the pdns control socket and sends the data. When the pdns control  
>> socket
>> attempts to send data back, it finds no socket called "LocalSocket"  
>> as it's in
>> a different working directory.
> […]
>
> Hrm … reading through the original bug report again, I think there's
> another issue as well. Thomas, did you use the 'LocalSocket' config
> option at all? Could you please provide your configuration? Since the
> problem did not exist when using powerdns 2.9.21.2 this issue seems to
> be unrelated to that option.

Here's the exact config section:
<Plugin powerdns>
         <Server "stage01">
#               Collect "latency"
#               Collect "udp-answers" "udp-queries"
                 Socket "/var/run/pdns.controlsocket"
         </Server>
#       <Recursor "recursor_name">
#               Collect "questions"
#               Collect "cache-hits" "cache-misses"
#               Socket "/var/run/pdns_recursor.controlsocket"
#       </Recursor>
#       LocalSocket "/opt/collectd/var/run/collectd-powerdns"
</Plugin>

As you can see, LocalSocket is commented out.


>
> Thomas, could you please provide the output for the following commands
> (issued when collectd hangs):

Hope these are what you're looking for. If not, let me know and I'll  
try again. Run with these versions:
collectd 4.6.3-1~bpo50+1
pdns-server, pdns-backend-pgsql 2.9.22-1~bpo50+1
amd64

>
> * lsof -p <pid of collectd>

# lsof -p 5296
COMMAND   PID USER   FD   TYPE             DEVICE    SIZE     NODE NAME
collectd 5296 root  cwd    DIR              202,2    4096   268259 / 
var/lib/collectd
collectd 5296 root  rtd    DIR              202,2    4096        2 /
collectd 5296 root  txt    REG              202,2  139544   434017 / 
usr/sbin/collectd
collectd 5296 root  mem    REG              202,2  119288   131110 / 
lib/ld-2.7.so
collectd 5296 root  mem    REG              202,2  130114   131103 / 
lib/libpthread-2.7.so
collectd 5296 root  mem    REG              202,2   14616   131111 / 
lib/libdl-2.7.so
collectd 5296 root  mem    REG              202,2 1375536   131107 / 
lib/libc-2.7.so
collectd 5296 root  mem    REG              202,2    5176    39306 / 
usr/lib/collectd/syslog.so
collectd 5296 root  mem    REG              202,2    6672    39259 / 
usr/lib/collectd/cpu.so
collectd 5296 root  mem    REG              202,2   13152    39264 / 
usr/lib/collectd/df.so
collectd 5296 root  mem    REG              202,2    8712    39265 / 
usr/lib/collectd/disk.so
collectd 5296 root  mem    REG              202,2    7208    39272 / 
usr/lib/collectd/interface.so
collectd 5296 root  mem    REG              202,2    5288    39278 / 
usr/lib/collectd/load.so
collectd 5296 root  mem    REG              202,2    6624    39285 / 
usr/lib/collectd/memory.so
collectd 5296 root  mem    REG              202,2   24824    39299 / 
usr/lib/collectd/powerdns.so
collectd 5296 root  mem    REG              202,2   14128    39300 / 
usr/lib/collectd/processes.so
collectd 5296 root  mem    REG              202,2   20912    39301 / 
usr/lib/collectd/rrdtool.so
collectd 5296 root  mem    REG              202,2  208880   434014 / 
usr/lib/librrd_th.so.4.0.0
collectd 5296 root  mem    REG              202,2  534736   131106 / 
lib/libm-2.7.so
collectd 5296 root  mem    REG              202,2           430633 / 
usr/lib/libxml2.so.2.6.32 (path inode=428931)
collectd 5296 root  mem    REG              202,2  153304   433424 / 
usr/lib/libpng12.so.0.27.0
collectd 5296 root  mem    REG              202,2   44792   433980 / 
usr/lib/libpangocairo-1.0.so.0.2002.3
collectd 5296 root  mem    REG              202,2  292024   433978 / 
usr/lib/libpango-1.0.so.0.2002.3
collectd 5296 root  mem    REG              202,2  477736   433524 / 
usr/lib/libcairo.so.2.17.5
collectd 5296 root  mem    REG              202,2  277520   433820 / 
usr/lib/libgobject-2.0.so.0.1600.6
collectd 5296 root  mem    REG              202,2   12520   433821 / 
usr/lib/libgmodule-2.0.so.0.1600.6
collectd 5296 root  mem    REG              202,2  795864   430436 / 
usr/lib/libglib-2.0.so.0.1600.6
collectd 5296 root  mem    REG              202,2   93504   428258 / 
usr/lib/libz.so.1.2.3.3
collectd 5296 root  mem    REG              202,2  181272   433982 / 
usr/lib/libpangoft2-1.0.so.0.2002.3
collectd 5296 root  mem    REG              202,2  541512   432426 / 
usr/lib/libfreetype.so.6.3.18
collectd 5296 root  mem    REG              202,2  200344   432470 / 
usr/lib/libfontconfig.so.1.3.0
collectd 5296 root  mem    REG              202,2  485864   433511 / 
usr/lib/libdirectfb-1.0.so.0.1.0
collectd 5296 root  mem    REG              202,2   34008   433512 / 
usr/lib/libfusion-1.0.so.0.1.0
collectd 5296 root  mem    REG              202,2   86896   433510 / 
usr/lib/libdirect-1.0.so.0.1.0
collectd 5296 root  mem    REG              202,2   12888   433522 / 
usr/lib/libxcb-render-util.so.0.0.0
collectd 5296 root  mem    REG              202,2   33304   433545 / 
usr/lib/libxcb-render.so.0.0.0
collectd 5296 root  mem    REG              202,2  114176   431481 / 
usr/lib/libxcb.so.1.0.0
collectd 5296 root  mem    REG              202,2   38296   432382 / 
usr/lib/libXrender.so.1.3.0
collectd 5296 root  mem    REG              202,2 1096880   429833 / 
usr/lib/libX11.so.6.2.0
collectd 5296 root  mem    REG              202,2  183856   433518 / 
usr/lib/libpixman-1.so.0.10.0
collectd 5296 root  mem    REG              202,2  162816   430292 / 
usr/lib/libpcre.so.3.12.1
collectd 5296 root  mem    REG              202,2  169776   430370 / 
usr/lib/libexpat.so.1.5.2
collectd 5296 root  mem    REG              202,2    9824   429825 / 
usr/lib/libXau.so.6.0.0
collectd 5296 root  mem    REG              202,2   20096   429827 / 
usr/lib/libXdmcp.so.6.0.0
collectd 5296 root  mem    REG              202,2    5896   431483 / 
usr/lib/libxcb-xlib.so.0.0.0
collectd 5296 root  mem    REG              202,2    6616    39305 / 
usr/lib/collectd/swap.so
collectd 5296 root  mem    REG              202,2    5096    39315 / 
usr/lib/collectd/users.so
collectd 5296 root  mem    REG              202,2   47520   131104 / 
lib/libnss_files-2.7.so
collectd 5296 root    0u   CHR                1,3              827 / 
dev/null
collectd 5296 root    1u   CHR                1,3              827 / 
dev/null
collectd 5296 root    2u   CHR                1,3              827 / 
dev/null
collectd 5296 root    3u  unix 0xffff88002faf4c80          7717883  
socket
collectd 5296 root    8u  unix 0xffff88002eed3080         14522374  
socket


>  * lsof -p <pid of pdns>

# lsof -p 29492
COMMAND     PID USER   FD   TYPE             DEVICE    SIZE     NODE  
NAME
pdns_serv 29492 root  cwd    DIR              202,2    4096   262159 / 
var/run
pdns_serv 29492 root  rtd    DIR              202,2    4096        2 /
pdns_serv 29492 root  txt    REG              202,2 1252408   430699 / 
usr/sbin/pdns_server
pdns_serv 29492 root  mem    REG              202,2  119288   131110 / 
lib/ld-2.7.so
pdns_serv 29492 root  mem    REG              202,2   14616   131111 / 
lib/libdl-2.7.so
pdns_serv 29492 root  mem    REG              202,2   93504   428258 / 
usr/lib/libz.so.1.2.3.3
pdns_serv 29492 root  mem    REG              202,2 1019216   427522 / 
usr/lib/libstdc++.so.6.0.10
pdns_serv 29492 root  mem    REG              202,2  534736   131106 / 
lib/libm-2.7.so
pdns_serv 29492 root  mem    REG              202,2   93016   131136 / 
lib/libgcc_s.so.1
pdns_serv 29492 root  mem    REG              202,2  130114   131103 / 
lib/libpthread-2.7.so
pdns_serv 29492 root  mem    REG              202,2 1375536   131107 / 
lib/libc-2.7.so
pdns_serv 29492 root  mem    REG              202,2   31536   131099 / 
lib/libnss_compat-2.7.so
pdns_serv 29492 root  mem    REG              202,2   88968   131097 / 
lib/libnsl-2.7.so
pdns_serv 29492 root  mem    REG              202,2   43472   131087 / 
lib/libnss_nis-2.7.so
pdns_serv 29492 root  mem    REG              202,2   47520   131104 / 
lib/libnss_files-2.7.so
pdns_serv 29492 root    0u   CHR                1,3              827 / 
dev/null
pdns_serv 29492 root    1u   CHR                1,3              827 / 
dev/null
pdns_serv 29492 root    2u   CHR                1,3              827 / 
dev/null
pdns_serv 29492 root    3u  unix 0xffff88003a890080         14522293  
socket
pdns_serv 29492 root    4u  unix 0xffff88003a890c80         14522295 / 
var/run/pdns.controlsocket
pdns_serv 29492 root    5u  unix 0xffff88002eed3380         14522304 / 
var/run/pdns.controlsocket
pdns_serv 29492 root    6w  FIFO                0,6         14522299  
pipe
pdns_serv 29492 root    7r  FIFO                0,6         14522300  
pipe
pdns_serv 29492 root    8u  unix 0xffff88002eed3380         14522304 / 
var/run/pdns.controlsocket


> * strace -p <pid of collectd>

# strace -p 5296
Process 5296 attached - interrupt to quit
restart_syscall(<... resuming interrupted call ...>) = 0
gettimeofday({1250742124, 386860}, NULL) = 0
time(NULL)                              = 1250742124
futex(0x6219e4, FUTEX_CMP_REQUEUE, 1, 2147483647, 0x6219a0, 3620962) = 4
futex(0x6219a0, FUTEX_WAKE, 1)          = 1
gettimeofday({1250742124, 389152}, NULL) = 0
nanosleep({9, 997708000}, {9, 997708000}) = 0
gettimeofday({1250742134, 396795}, NULL) = 0
time(NULL)                              = 1250742134
futex(0x6219e4, FUTEX_CMP_REQUEUE, 1, 2147483647, 0x6219a0, 3620970) = 4
futex(0x6219a0, FUTEX_WAKE, 1)          = 1
gettimeofday({1250742134, 398535}, NULL) = 0
nanosleep({9, 998260000}, {9, 998260000}) = 0
gettimeofday({1250742144, 406810}, NULL) = 0
time(NULL)                              = 1250742144
futex(0x6219e4, FUTEX_CMP_REQUEUE, 1, 2147483647, 0x6219a0, 3620978) = 4
futex(0x6219a0, FUTEX_WAKE, 1)          = 1
gettimeofday({1250742144, 408491}, NULL) = 0
nanosleep({9, 998319000},  <unfinished ...>
Process 5296 detached


> * strace -p <pid of pdns>

# strace -p 29492
Process 29492 attached - interrupt to quit
restart_syscall(<... resuming interrupted call ...>) = 0
wait4(29494, 0x7fff238025b8, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, {1, 0})               = 0
wait4(29494, 0x7fff238025b8, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, {1, 0})               = 0
wait4(29494, 0x7fff238025b8, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, {1, 0})               = 0
wait4(29494, 0x7fff238025b8, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, {1, 0})               = 0
wait4(29494, 0x7fff238025b8, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, {1, 0})               = 0
wait4(29494, 0x7fff238025b8, WNOHANG, NULL) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0},  <unfinished ...>
Process 29492 detached


Thanks for your help on this.

--t


>
> Cheers,
> Sebastian
>
> -- 
> Sebastian "tokkee" Harl +++ GnuPG-ID: 0x8501C7FC +++ http:// 
> tokkee.org/
>
> Those who would give up Essential Liberty to purchase a little  
> Temporary
> Safety, deserve neither Liberty nor Safety.         -- Benjamin  
> Franklin
>




More information about the collectd mailing list