From: Frederic Weisbecker Prepare the interface to implement the nohz cpuset flag. This flag, once set, will tell the system to try to shutdown the periodic timer tick when possible. We use here a per cpu refcounter. As long as a CPU is contained into at least one cpuset that has the nohz flag set, it is part of the set of CPUs that run into adaptive nohz mode. [ include build fix from Zen Lin ] Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- arch/Kconfig | 3 +++ include/linux/cpuset.h | 31 ++++++++++++++++++++++++++++ init/Kconfig | 8 ++++++++ kernel/cpuset.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 366ec06..8e2162f6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -239,6 +239,9 @@ config HAVE_ARCH_JUMP_LABEL bool config HAVE_ARCH_MUTEX_CPU_RELAX + bool + +config HAVE_CPUSETS_NO_HZ bool config HAVE_RCU_TABLE_FREE diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 838320f..7e7eb41 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_CPUSETS @@ -235,4 +236,34 @@ static inline bool put_mems_allowed(unsigned int seq) #endif /* !CONFIG_CPUSETS */ +#ifdef CONFIG_CPUSETS_NO_HZ + +DECLARE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref); + +static inline bool cpuset_cpu_adaptive_nohz(int cpu) +{ + atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu); + + if (atomic_add_return(0, ref) > 0) + return true; + + return false; +} + +static inline bool cpuset_adaptive_nohz(void) +{ + /* + * We probably want to do atomic_read() when we read + * locally to avoid the overhead of an ordered add. + * For that we have to do the dec of the ref locally as + * well. + */ + return cpuset_cpu_adaptive_nohz(smp_processor_id()); +} +#else +static inline bool cpuset_cpu_adaptive_nohz(int cpu) { return false; } +static inline bool cpuset_adaptive_nohz(void) { return false; } + +#endif /* CONFIG_CPUSETS_NO_HZ */ + #endif /* _LINUX_CPUSET_H */ diff --git a/init/Kconfig b/init/Kconfig index 6fdd6e3..ffdeeab 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -749,6 +749,14 @@ config PROC_PID_CPUSET depends on CPUSETS default y +config CPUSETS_NO_HZ + bool "Tickless cpusets" + depends on CPUSETS && HAVE_CPUSETS_NO_HZ + help + This options let you apply a nohz property to a cpuset such + that the periodic timer tick tries to be avoided when possible on + the concerned CPUs. + config CGROUP_CPUACCT bool "Simple CPU accounting cgroup subsystem" help diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f33c715..6319d8e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -145,6 +145,7 @@ typedef enum { CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, CS_SPREAD_SLAB, + CS_ADAPTIVE_NOHZ, } cpuset_flagbits_t; /* the type of hotplug event */ @@ -189,6 +190,11 @@ static inline int is_spread_slab(const struct cpuset *cs) return test_bit(CS_SPREAD_SLAB, &cs->flags); } +static inline int is_adaptive_nohz(const struct cpuset *cs) +{ + return test_bit(CS_ADAPTIVE_NOHZ, &cs->flags); +} + static struct cpuset top_cpuset = { .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), }; @@ -1190,6 +1196,32 @@ static void cpuset_change_flag(struct task_struct *tsk, cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk); } +#ifdef CONFIG_CPUSETS_NO_HZ + +DEFINE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref); + +static void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs) +{ + int cpu; + int val; + + if (is_adaptive_nohz(old_cs) == is_adaptive_nohz(cs)) + return; + + for_each_cpu(cpu, cs->cpus_allowed) { + atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu); + if (is_adaptive_nohz(cs)) + atomic_inc(ref); + else + atomic_dec(ref); + } +} +#else +static inline void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs) +{ +} +#endif + /* * update_tasks_flags - update the spread flags of tasks in the cpuset. * @cs: the cpuset in which each task's spread flags needs to be changed @@ -1255,6 +1287,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); + update_nohz_cpus(cs, trialcs); + mutex_lock(&callback_mutex); cs->flags = trialcs->flags; mutex_unlock(&callback_mutex); @@ -1465,6 +1499,7 @@ typedef enum { FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB, + FILE_ADAPTIVE_NOHZ, } cpuset_filetype_t; static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) @@ -1504,6 +1539,11 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) case FILE_SPREAD_SLAB: retval = update_flag(CS_SPREAD_SLAB, cs, val); break; +#ifdef CONFIG_CPUSETS_NO_HZ + case FILE_ADAPTIVE_NOHZ: + retval = update_flag(CS_ADAPTIVE_NOHZ, cs, val); + break; +#endif default: retval = -EINVAL; break; @@ -1663,6 +1703,10 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); +#ifdef CONFIG_CPUSETS_NO_HZ + case FILE_ADAPTIVE_NOHZ: + return is_adaptive_nohz(cs); +#endif default: BUG(); } @@ -1771,7 +1815,14 @@ static struct cftype files[] = { .write_u64 = cpuset_write_u64, .private = FILE_SPREAD_SLAB, }, - +#ifdef CONFIG_CPUSETS_NO_HZ + { + .name = "adaptive_nohz", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_ADAPTIVE_NOHZ, + }, +#endif { .name = "memory_pressure_enabled", .flags = CFTYPE_ONLY_ON_ROOT, -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/