Subject: Provide individual CPU usage measurement based on idle time From: Carsten Emde <C.Emde@osadl.org> Date: Fri, 4 Sep 2015 20:41:40 +0100 The various methods to determine CPU usage and load have a number of disadvantages (see also Documentation/cpu-load.txt), and a straight-forward method to gain usage information about a particular CPU is lacking. However, in the context of setting CPU affinity and isolation, it is often required to monitor the effective usage ratio of a CPU. This patch adds an additional CPU usage measuring method that is based on idle time processing. The data are available for every CPU in /proc/idleruntime/cpuN/data in the format "<idletime> <runtime>". The counters can be reset by writing to /proc/idleruntime/cpuN/reset. To calculate the per-core CPU usage since the most recent reset, divide the runtime by the sum of runtime plus idletime, e.g. # for i in `ls -1d /proc/idleruntime/cpu* | sort -nk1.22` > do > echo "$i: `awk '{ print (100.0*$2) / ($1+$2)"%" }' <$i/data`" > echo 1 >$i/reset > done /proc/idleruntime/cpu0: 72.0048% /proc/idleruntime/cpu1: 5.49522% /proc/idleruntime/cpu2: 0.27916% /proc/idleruntime/cpu3: 32.3493% In addition, summed up data of all present CPUs are available in /proc/idleruntime/all in the same format as above. Thus, to calculate the overall CPU usage since the most recent reset, the following command may be used: awk '{ print (100.0*$2) / ($1+$2)"%" }' </proc/idleruntime/all/data To reset the counters althogether write to /proc/idleruntime/all/reset. Signed-off-by: Carsten Emde <C.Emde@osadl.org> --- init/Kconfig | 28 ++++ kernel/sched/Makefile | 1 kernel/sched/core.c | 31 +++++ kernel/sched/cpu_idleruntime.c | 244 +++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 8 + 5 files changed, 312 insertions(+) Index: linux-4.4.39-rt50/init/Kconfig =================================================================== @ linux-4.4.39-rt50/init/Kconfig:431 @ config BSD_PROCESS_ACCT_V3 for processing it. A preliminary version of these tools is available at <http://www.gnu.org/software/acct/>. +config CPU_IDLERUNTIME + bool "Provide individual CPU usage measurement based on idle processing" + help + If you say Y here, individual CPU usage data will be provided that are + based on idle processing. The data are available for every CPU and for + all present CPUs summed up in /proc/idleruntime/cpuN/data and + /proc/idleruntime/all/data, respectively, in the format + "<idletime> <runtime>". The counters can be reset by writing to + /proc/idleruntime/cpuN/reset separately for every CPU and to + /proc/idleruntime/all/reset for all present CPUs at once. To calculate + the CPU usage since the most recent reset, the runtime must be devided + by the sum of idletime plus runtime + awk '{print (100.0*$2) / ($1+$2)"%"}' </proc/idleruntime/cpu0/data + for every CPU or + awk '{print (100.0*$2) / ($1+$2)"%"}' </proc/idleruntime/all/data + for all CPUs altogether. The shell code snippet + # for i in `ls -1d /proc/idleruntime/cpu* | sort -nk1.22` + > do + > echo "$i: `awk '{ print (100.0*$2) / ($1+$2)"%" }' <$i/data`" + > echo 1 >$i/reset + > done + may produce + /proc/idleruntime/cpu0: 72.0048% + /proc/idleruntime/cpu1: 5.49522% + /proc/idleruntime/cpu2: 0.27916% + /proc/idleruntime/cpu3: 32.3493% + on a four-core processor. + config TASKSTATS bool "Export task/process statistics through netlink" depends on NET Index: linux-4.4.39-rt50/kernel/sched/Makefile =================================================================== --- linux-4.4.39-rt50.orig/kernel/sched/Makefile +++ linux-4.4.39-rt50/kernel/sched/Makefile @ linux-4.4.39-rt50/init/Kconfig:20 @ obj-y += wait.o swait.o swork.o completi obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o +obj-$(CONFIG_CPU_IDLERUNTIME) += cpu_idleruntime.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o Index: linux-4.4.39-rt50/kernel/sched/core.c =================================================================== --- linux-4.4.39-rt50.orig/kernel/sched/core.c +++ linux-4.4.39-rt50/kernel/sched/core.c @ linux-4.4.39-rt50/init/Kconfig:2690 @ prepare_task_switch(struct rq *rq, struc fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); +#ifdef CONFIG_CPU_IDLERUNTIME + if (is_idle_task(next)) { + int cpu = raw_smp_processor_id(); + + if (per_cpu(idlestop, cpu)) { + unsigned long flags; + + raw_spin_lock_irqsave(&per_cpu(idleruntime_lock, cpu), + flags); + per_cpu(idlestart, cpu) = cpu_clock(cpu); + per_cpu(runtime, cpu) += + per_cpu(idlestart, cpu) - per_cpu(idlestop, cpu); + raw_spin_unlock_irqrestore(&per_cpu(idleruntime_lock, + cpu), flags); + } + } else if (is_idle_task(prev)) { + int cpu = raw_smp_processor_id(); + + if (per_cpu(idlestart, cpu)) { + unsigned long flags; + + raw_spin_lock_irqsave(&per_cpu(idleruntime_lock, cpu), + flags); + per_cpu(idlestop, cpu) = cpu_clock(cpu); + per_cpu(idletime, cpu) += + per_cpu(idlestop, cpu) - per_cpu(idlestart, cpu); + raw_spin_unlock_irqrestore(&per_cpu(idleruntime_lock, + cpu), flags); + } + } +#endif } /** Index: linux-4.4.39-rt50/kernel/sched/cpu_idleruntime.c =================================================================== --- /dev/null +++ linux-4.4.39-rt50/kernel/sched/cpu_idleruntime.c @ linux-4.4.39-rt50/init/Kconfig:4 @ +/* + cpu_idleruntime.c: provide CPU usage data based on idle processing + + Copyright (C) 2012,2015 Carsten Emde <C.Emde@osadl.org> + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/cpu.h> + +#include "sched.h" + +DEFINE_PER_CPU(unsigned long long, idlestart); +DEFINE_PER_CPU(unsigned long long, idlestop); +DEFINE_PER_CPU(unsigned long long, idletime); +DEFINE_PER_CPU(unsigned long long, runtime); +DEFINE_PER_CPU(raw_spinlock_t, idleruntime_lock); + +static DEFINE_PER_CPU(struct proc_dir_entry *, idleruntime_dir); +static struct proc_dir_entry *root_idleruntime_dir; + +static void idleruntime_get(unsigned long cpu, unsigned long long *cpuidletime, + unsigned long long *cpuruntime) +{ + unsigned long long now; + unsigned long flags; + + raw_spin_lock_irqsave(&per_cpu(idleruntime_lock, cpu), flags); + + /* Update runtime counter */ + now = cpu_clock(cpu); + if (is_idle_task(cpu_rq(cpu)->curr)) + per_cpu(idletime, cpu) += now - per_cpu(idlestart, cpu); + else + per_cpu(runtime, cpu) += now - per_cpu(idlestop, cpu); + + *cpuidletime = per_cpu(idletime, cpu); + *cpuruntime = per_cpu(runtime, cpu); + + raw_spin_unlock_irqrestore(&per_cpu(idleruntime_lock, cpu), flags); + +} + +static void idleruntime_output(struct seq_file *m, unsigned long long idletime, + unsigned long long runtime) +{ + seq_printf(m, "%llu %llu\n", idletime, runtime); +} + +static int idleruntime_show(struct seq_file *m, void *v) +{ + unsigned long cpu = (unsigned long) m->private; + unsigned long long cpuidletime, cpuruntime; + + idleruntime_get(cpu, &cpuidletime, &cpuruntime); + idleruntime_output(m, cpuidletime, cpuruntime); + + return 0; +} + +static int idleruntime_show_all(struct seq_file *m, void *v) +{ + unsigned long cpu; + unsigned long long total_idletime = 0ULL, total_runtime = 0ULL; + + preempt_disable(); + + for_each_present_cpu(cpu) { + unsigned long long cpuidletime, cpuruntime; + + idleruntime_get(cpu, &cpuidletime, &cpuruntime); + total_idletime += cpuidletime; + total_runtime += cpuruntime; + } + + preempt_enable(); + + idleruntime_output(m, total_idletime, total_runtime); + + return 0; +} + +static inline void idleruntime_reset1(unsigned long cpu) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&per_cpu(idleruntime_lock, cpu), flags); + per_cpu(idletime, cpu) = per_cpu(runtime, cpu) = 0; + per_cpu(idlestart, cpu) = per_cpu(idlestop, cpu) = cpu_clock(cpu); + raw_spin_unlock_irqrestore(&per_cpu(idleruntime_lock, cpu), flags); +} + +static ssize_t idleruntime_reset(struct file *file, const char __user *buffer, + size_t len, loff_t *offset) +{ + unsigned long cpu = (unsigned long) PDE_DATA(file_inode(file)); + + idleruntime_reset1(cpu); + return len; +} + +static ssize_t idleruntime_reset_all(struct file *file, + const char __user *buffer, + size_t len, loff_t *offset) +{ + unsigned long cpu; + + preempt_disable(); + + for_each_present_cpu(cpu) + idleruntime_reset1(cpu); + + preempt_enable(); + + return len; +} + +static int idleruntime_open_all(struct inode *inode, struct file *file) +{ + return single_open(file, idleruntime_show_all, PDE_DATA(inode)); +} + +static const struct file_operations idleruntime_all_fops = { + .open = idleruntime_open_all, + .read = seq_read, + .llseek = seq_lseek, + .write = idleruntime_reset_all, + .release = single_release, +}; + +static int idleruntime_open(struct inode *inode, struct file *file) +{ + return single_open(file, idleruntime_show, PDE_DATA(inode)); +} + +static const struct file_operations idleruntime_fops = { + .open = idleruntime_open, + .read = seq_read, + .llseek = seq_lseek, + .write = idleruntime_reset, + .release = single_release, +}; + +static void setup_procfiles(unsigned long cpu) +{ + char name[32]; + struct proc_dir_entry *idleruntime_cpudir = NULL; + + if (root_idleruntime_dir) { + snprintf(name, sizeof(name), "cpu%lu", cpu); + idleruntime_cpudir = proc_mkdir(name, root_idleruntime_dir); + } + + if (idleruntime_cpudir) { + proc_create_data("data", S_IRUGO, idleruntime_cpudir, + &idleruntime_fops, (void *) cpu); + proc_create_data("reset", S_IWUGO, idleruntime_cpudir, + &idleruntime_fops, (void *) cpu); + } + per_cpu(idleruntime_dir, cpu) = idleruntime_cpudir; +} + +static void unset_procfiles(unsigned long cpu) +{ + struct proc_dir_entry *idleruntime_cpudir = + per_cpu(idleruntime_dir, cpu); + + if (idleruntime_cpudir) { + remove_proc_entry("reset", idleruntime_cpudir); + remove_proc_entry("data", idleruntime_cpudir); + proc_remove(idleruntime_cpudir); + per_cpu(idleruntime_dir, cpu) = NULL; + } +} + +static int idleruntime_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long) hcpu; + + switch (action) { + case CPU_ONLINE: + setup_procfiles(cpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + unset_procfiles(cpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block idleruntime_cpu_notifier = +{ + .notifier_call = idleruntime_cpu_callback, +}; + + +static int __init idleruntime_init(void) +{ + root_idleruntime_dir = proc_mkdir("idleruntime", NULL); + if (root_idleruntime_dir) { + struct proc_dir_entry *idleruntime_alldir; + unsigned long cpu, cpus = 0; + + for_each_possible_cpu(cpu) { + per_cpu(idlestart, cpu) = per_cpu(idlestop, cpu) = + cpu_clock(cpu); + raw_spin_lock_init(&per_cpu(idleruntime_lock, cpu)); + cpus++; + } + + setup_procfiles(0); + + if (cpus > 1) { + idleruntime_alldir = proc_mkdir("all", + root_idleruntime_dir); + proc_create_data("data", S_IRUGO, idleruntime_alldir, + &idleruntime_all_fops, NULL); + proc_create_data("reset", S_IWUGO, idleruntime_alldir, + &idleruntime_all_fops, NULL); + } + + register_cpu_notifier(&idleruntime_cpu_notifier); + } + return 0; +} + +early_initcall(idleruntime_init); Index: linux-4.4.39-rt50/kernel/sched/sched.h =================================================================== --- linux-4.4.39-rt50.orig/kernel/sched/sched.h +++ linux-4.4.39-rt50/kernel/sched/sched.h @ linux-4.4.39-rt50/init/Kconfig:754 @ static inline void rq_clock_skip_update( rq->clock_skip_update &= ~RQCF_REQ_SKIP; } +#ifdef CONFIG_CPU_IDLERUNTIME +extern DEFINE_PER_CPU(unsigned long long, idlestart); +extern DEFINE_PER_CPU(unsigned long long, idlestop); +extern DEFINE_PER_CPU(unsigned long long, idletime); +extern DEFINE_PER_CPU(unsigned long long, runtime); +extern DEFINE_PER_CPU(raw_spinlock_t, idleruntime_lock); +#endif + #ifdef CONFIG_NUMA enum numa_topology_type { NUMA_DIRECT,