[collectd] rrd_queue_thread issue
Amit Gupta
amit.gupta221 at gmail.com
Thu May 14 13:19:13 CEST 2009
Hi,
I am seeing a very strange issue with collectd. I have started
collectd using collectdmon and the collectd is configured to only
monitor cpu and memory. ReadThreads are 5. Initially when I do a
pstack of the collectd process, I see 7 threads which is expected (5
read threads, 1 main thread, and 1 rrd_queue_cache thread)
# pstack 26921
26921: /opt/webstack/admin/sbin/collectd -C /etc/opt/webstack/admin/collectd/
----------------- lwp# 1 / thread# 1 --------------------
fee44077 nanosleep (8045c68, 8045c68)
fef92b67 nanosleep (8045c68, 8045c68, 8046888, 8054981) + 1b
080549f7 do_loop (0, 0, 0, 0, 0, 1) + d3
08054ee0 main (4, 8047d90, 8047da4, feffb818) + 3dc
0805448d _start (4, 8047e34, 8047e56, 8047e59, 8047e88, 0) + 7d
----------------- lwp# 2 / thread# 2 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cad68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe260200) + 4e
fee43d70 _lwp_start (fe260200, 0, 0, fe36eff8, fee43d70, fe260200)
----------------- lwp# 3 / thread# 3 --------------------
fee43dcb lwp_park (0, 0, 4)
fee3e60e cond_wait_queue (807e9d0, 807e9e0, 0, 0) + 3b
fee3eb07 _cond_wait (807e9d0, 807e9e0) + 66
fee3eb49 cond_wait (807e9d0, 807e9e0) + 21
fee3eb82 pthread_cond_wait (807e9d0, 807e9e0, fe25efd8, 8059268) + 1b
080592fb plugin_read_thread (0) + 10b
fee43a81 _thr_setup (fe260a00) + 4e
fee43d70 _lwp_start (fe260a00, 0, 0, fe25eff8, fee43d70, fe260a00)
----------------- lwp# 4 / thread# 4 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (807e9d0, 807e9e0, 0, 0) + 3b
fee3eb07 _cond_wait (807e9d0, 807e9e0) + 66
fee3eb49 cond_wait (807e9d0, 807e9e0) + 21
fee3eb82 pthread_cond_wait (807e9d0, 807e9e0, fe15ffd8, 8059268) + 1b
080592fb plugin_read_thread (0) + 10b
fee43a81 _thr_setup (fe261200) + 4e
fee43d70 _lwp_start (fe261200, 0, 0, fe15fff8, fee43d70, fe261200)
----------------- lwp# 5 / thread# 5 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (807e9d0, 807e9e0, 0, 0) + 3b
fee3eb07 _cond_wait (807e9d0, 807e9e0) + 66
fee3eb49 cond_wait (807e9d0, 807e9e0) + 21
fee3eb82 pthread_cond_wait (807e9d0, 807e9e0, fe060fd8, 8059268) + 1b
080592fb plugin_read_thread (0) + 10b
fee43a81 _thr_setup (fe261a00) + 4e
fee43d70 _lwp_start (fe261a00, 0, 0, fe060ff8, fee43d70, fe261a00)
----------------- lwp# 6 / thread# 6 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (807e9d0, 807e9e0, 0, 0) + 3b
fee3eb07 _cond_wait (807e9d0, 807e9e0) + 66
fee3eb49 cond_wait (807e9d0, 807e9e0) + 21
fee3eb82 pthread_cond_wait (807e9d0, 807e9e0, fdf61fd8, 8059268) + 1b
080592fb plugin_read_thread (0) + 10b
fee43a81 _thr_setup (fe262200) + 4e
fee43d70 _lwp_start (fe262200, 0, 0, fdf61ff8, fee43d70, fe262200)
----------------- lwp# 7 / thread# 7 --------------------
fee43dcb lwp_park (0, 0, 6)
fee3e60e cond_wait_queue (807e9d0, 807e9e0, 0, 0) + 3b
fee3eb07 _cond_wait (807e9d0, 807e9e0) + 66
fee3eb49 cond_wait (807e9d0, 807e9e0) + 21
fee3eb82 pthread_cond_wait (807e9d0, 807e9e0, fde62fd8, 8059268) + 1b
080592fb plugin_read_thread (0) + 10b
fee43a81 _thr_setup (fe262a00) + 4e
fee43d70 _lwp_start (fe262a00, 0, 0, fde62ff8, fee43d70, fe262a00)
However, after sometime the number of rrd_queue_threads keeps on increasing:
# pstack 26921
26921: /opt/webstack/admin/sbin/collectd -C /etc/opt/webstack/admin/collectd/
.....
.....
....
----------------- lwp# 8 / thread# 8 --------------------
fee43dcb lwp_park (0, 0, c)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cae68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe263200) + 4e
fee43d70 _lwp_start (fe263200, 0, 0, fdd63ff8, fee43d70, fe263200)
----------------- lwp# 9 / thread# 9 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cad68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe263a00) + 4e
fee43d70 _lwp_start (fe263a00, 0, 0, fdc64ff8, fee43d70, fe263a00)
----------------- lwp# 10 / thread# 10 --------------------
fee43dcb lwp_park (0, 0, b)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 8098e30) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe264200) + 4e
fee43d70 _lwp_start (fe264200, 0, 0, fdb2eff8, fee43d70, fe264200)
----------------- lwp# 11 / thread# 11 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cad68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe264a00) + 4e
fee43d70 _lwp_start (fe264a00, 0, 0, fda2fff8, fee43d70, fe264a00)
----------------- lwp# 12 / thread# 12 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cad68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe265200) + 4e
fee43d70 _lwp_start (fe265200, 0, 0, fd930ff8, fee43d70, fe265200)
After looking at the code I figured out that this happens because
update_kstat function calls the plugin_init_all which ends up creating
a new rrd_queue_thread. Is this intentional?
Because of this behviour, If I send a HUP signal to collectdmon to
restart collectd, collectd doesn't get killed. Here is the pstack
output of collectd process after sending a HUP signal to collectdmon.
----------------- lwp# 1 / thread# 1 --------------------
fee44ad7 lwp_wait (c, 8046838)
fee40ce7 _thrp_join (c, 0, 0, 1) + 5a
fee40e66 pthread_join (c, 0, 1, feb82a4c) + 2b
feb82ae4 rrd_shutdown (8046cc0, 0, 8047d58, 8054f08, 0, 0) + a4
08059f86 plugin_shutdown_all (0, 0, 0, 0, 0, 1) + da
08054f08 main (4, 8047d90, 8047da4, feffb818) + 404
0805448d _start (4, 8047e34, 8047e56, 8047e59, 8047e88, 0) + 7d
----------------- lwp# 2 / thread# 2 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 8098dd0) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe260200) + 4e
fee43d70 _lwp_start (fe260200, 0, 0, fe36eff8, fee43d70, fe260200)
----------------- lwp# 8 / thread# 8 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cae68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe263200) + 4e
fee43d70 _lwp_start (fe263200, 0, 0, fdd63ff8, fee43d70, fe263200)
----------------- lwp# 9 / thread# 9 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cad68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe263a00) + 4e
fee43d70 _lwp_start (fe263a00, 0, 0, fdc64ff8, fee43d70, fe263a00)
----------------- lwp# 10 / thread# 10 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cad68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe264200) + 4e
fee43d70 _lwp_start (fe264200, 0, 0, fdb2eff8, fee43d70, fe264200)
----------------- lwp# 11 / thread# 11 --------------------
feb81114 rrd_queue_thread(), exit value = 0x00000000
** zombie (exited, not detached, not yet joined) **
----------------- lwp# 12 / thread# 12 --------------------
fee43dcb lwp_park (0, 0, 0)
fee3e60e cond_wait_queue (feb94528, feb94538, 0, 0) + 3b
fee3eb07 _cond_wait (feb94528, feb94538) + 66
fee3eb49 cond_wait (feb94528, feb94538) + 21
fee3eb82 pthread_cond_wait (feb94528, feb94538, 0, 80cad68) + 1b
feb811c4 rrd_queue_thread (0) + b0
fee43a81 _thr_setup (fe265200) + 4e
fee43d70 _lwp_start (fe265200, 0, 0, fd930ff8, fee43d70, fe265200)
As you would see one of the rrd_queue_thread has died (zombied) and
thus collectdmon doesn't restart collectd. Is this a known issue?
Regards
Amit
More information about the collectd
mailing list