CPU IRQ/softirq/steal accounting (was Re: [patch] Re: [collectd] Adding new DS definitions to existing plugins)

Nic Bellamy nicb-lists<span style="display: none;">.trailing-username</span>(a)<span style="display: none;">leading-domain.</span>vadacom.co.nz
Wed Jan 24 05:24:42 CET 2007


Nic Bellamy wrote:
> Florian Forster wrote:
>> On Wed, Jan 24, 2007 at 10:58:34AM +1300, Nic Bellamy wrote:
>>
>>> I've whipped up a "works for me" patch for src/cpu.c that adds irq +
>>> softirq DS definitions to ds_defs[], and updates cpu_read/cpu_submit
>>> to handle the extra data,
>>
>> I'm in the process of writing a new major version, collectd 4. It is no
>> problem at all to include that change there, though I considering to
>> pull the RRD-file appart and use one RRD-file for each counter type.
>> This offers a lot more flexibility over the RRD-file as it is. I
>> originally decided against doing that because I thought that now
>> breaking backwards compatibility where it can be avoided might be a good
>> idea, but if the datasources change anyway I might as well do that.
>>
>> If this patch should be accepted into the collectd 3 line then it's
>> necessary that the default behavior doesn't change. You could use the
>> alternative RRD-file-layout when configured to do so or simply use
>> seperate RRD-files and do some trickery at graph-generation-time. I
>> think the second option would be preferable for most users.
>
> Ok. I'll have a think about which branch I want to aim for and whip up 
> an appropriately designed patch.
Well, expediency took over, and the extra stats really do belong in the 
same RRD database as the other CPU stats.

Attached is a patch that adds separate IRQ, softirq and "steal time"[1] 
handling to the CPU module. It's disabled by default, and can be enabled 
by using the --enable-cpu_detail flag when configuring.

This way, there's no changes to the default RRD files, but you can 
enable the option during the build if you want more detailed information 
(8 DS's in the RRD file instead of 5).

Cheers,
    Nic.

[1] "Steal time" counts time in virtual machines when (a) the VM is 
runnable, but (b) the virtual machine hypervisor is doing something else 
(eg. another VM is running).

-- 
Nic Bellamy,
Head Of Engineering, Vadacom Ltd - http://www.vadacom.co.nz/

-------------- next part --------------
Index: configure.in
===================================================================
--- configure.in	(revision 1144)
+++ configure.in	(revision 1147)
@@ -973,6 +973,7 @@
 collectd features:])
 AC_COLLECTD([debug],     [enable],  [feature], [debugging])
 AC_COLLECTD([daemon],    [disable], [feature], [daemon mode])
+AC_COLLECTD([cpu_detail],[enable],  [feature], [Detailed CPU stats - IRQ time etc.])
 AC_COLLECTD([getifaddrs],[enable],  [feature], [getifaddrs under Linux])
 
 m4_divert_once([HELP_ENABLE], [
@@ -1028,6 +1029,7 @@
   Features:
     debug . . . . . . . $enable_debug
     daemon mode . . . . $enable_daemon
+    cpu details . . . . $enable_cpu_detail
     step  . . . . . . . $collectd_step seconds
     heartbeat . . . . . $collectd_heartbeat seconds
 
Index: src/cpu.c
===================================================================
--- src/cpu.c	(revision 1144)
+++ src/cpu.c	(revision 1147)
@@ -115,9 +115,18 @@
 	"DS:syst:COUNTER:"COLLECTD_HEARTBEAT":0:U",
 	"DS:idle:COUNTER:"COLLECTD_HEARTBEAT":0:U",
 	"DS:wait:COUNTER:"COLLECTD_HEARTBEAT":0:U",
+#if COLLECT_CPU_DETAIL
+	"DS:irq:COUNTER:"COLLECTD_HEARTBEAT":0:U",
+	"DS:softirq:COUNTER:"COLLECTD_HEARTBEAT":0:U",
+	"DS:steal:COUNTER:"COLLECTD_HEARTBEAT":0:U",
+#endif	/* COLLECT_CPU_DETAIL */
 	NULL
 };
+#if COLLECT_CPU_DETAIL
+static int ds_num = 8;
+#else
 static int ds_num = 5;
+#endif	/* COLLECT_CPU_DETAIL */
 
 static void cpu_init (void)
 {
@@ -196,14 +205,22 @@
 #define BUFSIZE 512
 static void cpu_submit (int cpu_num, unsigned long long user,
 		unsigned long long nice, unsigned long long syst,
-		unsigned long long idle, unsigned long long wait)
+		unsigned long long idle, unsigned long long wait,
+		unsigned long long irq, unsigned long long softirq,
+		unsigned long long steal)
 {
 	char buf[BUFSIZE];
 	char cpu[16];
 
+#if COLLECT_CPU_DETAIL
+	if (snprintf (buf, BUFSIZE, "%u:%llu:%llu:%llu:%llu:%llu:%llu:%llu:%llu", (unsigned int) curtime,
+				user, nice, syst, idle, wait, irq, softirq, steal) >= BUFSIZE)
+		return;
+#else
 	if (snprintf (buf, BUFSIZE, "%u:%llu:%llu:%llu:%llu:%llu", (unsigned int) curtime,
 				user, nice, syst, idle, wait) >= BUFSIZE)
 		return;
+#endif	/* COLLECT_CPU_DETAIL */
 	snprintf (cpu, 16, "%i", cpu_num);
 
 	plugin_submit (MODULE_NAME, cpu, buf);
@@ -252,7 +269,7 @@
 				cpu_info.cpu_ticks[CPU_STATE_NICE],
 				cpu_info.cpu_ticks[CPU_STATE_SYSTEM],
 				cpu_info.cpu_ticks[CPU_STATE_IDLE],
-				0ULL);
+				0ULL, 0ULL, 0ULL, 0ULL);
 #endif /* PROCESSOR_CPU_LOAD_INFO */
 #if PROCESSOR_TEMPERATURE
 		/*
@@ -305,7 +322,7 @@
 # define BUFSIZE 1024
 	int cpu;
 	unsigned long long user, nice, syst, idle;
-	unsigned long long wait, intr, sitr; /* sitr == soft interrupt */
+	unsigned long long wait, intr, sitr, steal; /* sitr == soft interrupt */
 	FILE *fh;
 	char buf[BUFSIZE];
 
@@ -347,17 +364,29 @@
 			wait = atoll (fields[5]);
 			intr = atoll (fields[6]);
 			sitr = atoll (fields[7]);
+			if (numfields >= 9)
+				steal = atoll (fields[8]);
+			else
+				steal = 0;
 
+#if !COLLECT_CPU_DETAIL
 			/* I doubt anyone cares about the time spent in
 			 * interrupt handlers.. */
 			syst += intr + sitr;
+			intr = 0;
+			sitr = 0;
+			steal = 0;
+#endif	/* COLLECT_CPU_DETAIL */
 		}
 		else
 		{
 			wait = 0LL;
+			intr = 0LL;
+			sitr = 0LL;
+			steal = 0LL;
 		}
 
-		cpu_submit (cpu, user, nice, syst, idle, wait);
+		cpu_submit (cpu, user, nice, syst, idle, wait, intr, sitr, steal);
 	}
 
 	fclose (fh);
@@ -383,7 +412,8 @@
 		wait = (unsigned long long) cs.cpu_sysinfo.cpu[CPU_WAIT];
 
 		cpu_submit (ksp[cpu]->ks_instance,
-				user, 0LL, syst, idle, wait);
+				user, 0LL, syst, idle, wait,
+				0ULL, 0ULL, 0ULL);
 	}
 /* #endif defined(HAVE_LIBKSTAT) */
 
@@ -409,7 +439,7 @@
 	cpuinfo[CP_SYS] += cpuinfo[CP_INTR];
 
 	/* FIXME: Instance is always `0' */
-	cpu_submit (0, cpuinfo[CP_USER], cpuinfo[CP_NICE], cpuinfo[CP_SYS], cpuinfo[CP_IDLE], 0LL);
+	cpu_submit (0, cpuinfo[CP_USER], cpuinfo[CP_NICE], cpuinfo[CP_SYS], cpuinfo[CP_IDLE], 0ULL, 0ULL, 0ULL, 0ULL);
 #endif
 
 	return;
Index: src/collectd.1
===================================================================
--- src/collectd.1	(revision 1144)
+++ src/collectd.1	(revision 1147)
@@ -426,6 +426,9 @@
 \&  DS:syst:COUNTER:HEARTBEAT:0:100
 \&  DS:idle:COUNTER:HEARTBEAT:0:100
 \&  DS:wait:COUNTER:HEARTBEAT:0:100
+\&  DS:irq:COUNTER:HEARTBEAT:0:100 (if compiled with --enable-cpu_detail)
+\&  DS:softirq:COUNTER:HEARTBEAT:0:100 (if compiled with --enable-cpu_detail)
+\&  DS:steal:COUNTER:HEARTBEAT:0:100 (if compiled with --enable-cpu_detail)
 .Ve
 .IP "\s-1CPU\s0 frequency (\fIcpufreq\-\fI<num>\fI.rrd\fR)" 4
 .IX Item "CPU frequency (cpufreq-<num>.rrd)"


More information about the collectd mailing list