CPU_PISCH: Define preliminary CPU Plug In SCHeduler driver interface to allow alternate schedulers for SCHED_NORMAL/SCHED_BATCH/SCHED_IDLE policy tasks to be used in place of CFS's "fair" scheduler. This will be expanded as necessary as schedulers are added. Implement interface for CFS "fair" scheduler. Signed-off-by: Peter Williams diff --git a/include/linux/cpu_pisch_drv.h b/include/linux/cpu_pisch_drv.h new file mode 100644 --- /dev/null +++ b/include/linux/cpu_pisch_drv.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_CPU_PISCH_DRV_H +#define _LINUX_CPU_PISCH_DRV_H +/* + * include/linux/cpu_pisch_drv.h + * This contains the definition of the CPU Plug In SCHeduler driver + * struct, etc. + */ +#ifdef CONFIG_CPU_PISCH +#include +#include + +#define pisch_const __read_mostly + +struct cfs_rq; + +/* + * This is the main scheduler driver struct. + */ +struct cpu_pisch_drv { + const char *name; + struct attribute **attrs; + + void (*place_entity_after)(struct sched_entity *sea, struct sched_entity *seb); + int (*entity_before)(struct sched_entity *a, struct sched_entity *b); + s64 (*entity_key)(struct cfs_rq *, struct sched_entity *s); + void (*place_entity)(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial); + void (*rq_tick)(struct rq* rq); + u64 (*sched_slice)(struct cfs_rq *cfs_rq, struct sched_entity *se); + void (*update_curr)(struct cfs_rq *cfs_rq, struct sched_entity *curr, unsigned long delta_exec); + void (*swap_places)(struct sched_entity *sea, struct sched_entity *seb); + void (*inherit_place)(struct sched_entity *sea, struct sched_entity *seb); + int (*wakeup_preempt_entity)(struct sched_entity *curr, struct sched_entity *se); + void (*change_task_cpu)(struct task_struct *p, unsigned int old_cpu, unsigned int new_cpu); +}; + +extern const struct cpu_pisch_drv *cpu_pisch_drvp; + +static inline void cpu_pisch_printk_dev_name(void) +{ + printk("CPU_PISCH: Running with \"%s\" CPU scheduler.\n", cpu_pisch_drvp->name); +} + +extern int cpu_pisch_drv_sysfs_init(void); +#else +#define pisch_const const +static inline void cpu_pisch_printk_dev_name(void) {} +static inline int cpu_pisch_drv_sysfs_init(void) { return 0; } +#endif + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1212,6 +1212,11 @@ struct sched_rt_entity { struct rcu_node; +#ifdef CONFIG_CPU_PISCH +struct cpu_pisch_task_data { +}; +#endif + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1232,6 +1237,9 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_CPU_PISCH + struct cpu_pisch_task_data cpu_pisch; +#endif #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ diff --git a/init/Kconfig b/init/Kconfig --- a/init/Kconfig +++ b/init/Kconfig @@ -426,6 +426,8 @@ config LOG_BUF_SHIFT config HAVE_UNSTABLE_SCHED_CLOCK bool +source "kernel/Kconfig.cpu_pisch" + config GROUP_SCHED bool "Group CPU scheduler" depends on EXPERIMENTAL diff --git a/init/main.c b/init/main.c --- a/init/main.c +++ b/init/main.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -688,6 +689,8 @@ asmlinkage void __init start_kernel(void ftrace_init(); + cpu_pisch_printk_dev_name(); + /* Do the rest non-__init'ed, we're now alive */ rest_init(); } @@ -786,6 +789,7 @@ static void __init do_basic_setup(void) init_irq_proc(); do_ctors(); do_initcalls(); + cpu_pisch_drv_sysfs_init(); } static void __init do_pre_smp_initcalls(void) diff --git a/kernel/Kconfig.cpu_pisch b/kernel/Kconfig.cpu_pisch new file mode 100644 --- /dev/null +++ b/kernel/Kconfig.cpu_pisch @@ -0,0 +1,55 @@ + +menu "CPU schedulers" + +config CPU_PISCH + bool "Support multiple CPU schedulers" + default y + ---help--- + Say y here if you wish to be able to make a boot time selection + of which CPU scheduler to use. The CPU scheduler to be used may + then be selected with the boot parameter "cpu_pisch=". In the + absence of such a command line parameter, the scheduler selected + at "Default CPU scheduler" will be used. + + The choice of which schedulers should be compiled into the + kernel (and be available for boot time selection) can be made + be enabling "Select which CPU schedulers to build in". + + If you say n here the single scheduler to be built into the + kernel may be selected at "Default CPU scheduler". + +config CPU_PISCH_CHOOSE_BUILTINS + bool "Select which CPU schedulers to build in" if CPU_PISCH + default n + ---help--- + Say y here if you want to be able to select which CPU schedulers + are built into the kernel (for selection at boot time). + +config CPU_PISCH_CFS_FAIR + bool "'Completely Fair Scheduler' CPU scheduler" if CPU_PISCH_CHOOSE_BUILTINS + depends on CPU_PISCH + default CPU_PISCH + ---help--- + This is the standard CPU scheduler which purports to model an + "ideal, precise multi-tasking CPU". + To boot this CPU scheduler, if it is not the default, use the + boot parameter "cpu_pisch=cfs_fair". + +choice + prompt "Default CPU scheduler" + depends on CPU_PISCH + ---help--- + This option allows you to choose which CPU scheduler shall be + booted by default at startup if you have enabled CPU_PISCH, + or it will select the only scheduler to be built in otherwise. + +config CPU_PISCH_DEFAULT_CFS_FAIR + bool "CFS Fair CPU scheduler" + select CPU_PISCH_CFS_FAIR + ---help--- + This is the default CPU scheduler which is an O(1) model of an + "ideal, precise multi-tasking CPU".. + +endchoice + +endmenu diff --git a/kernel/cpu_pisch_drv.c b/kernel/cpu_pisch_drv.c new file mode 100644 --- /dev/null +++ b/kernel/cpu_pisch_drv.c @@ -0,0 +1,120 @@ +/* + * kernel/cpu_pisch_drv.c + * + * Kernel CPU Plug In SCHeduler device implementation + */ + +const struct cpu_pisch_drv *cpu_pisch_drvp = +#if defined(CONFIG_CPU_PISCH_DEFAULT_CFS_FAIR) + &cfs_fair_cpu_pisch_drv; +#else + NULL; +#error "You must have at least 1 CPU scheduler selected" +#endif + +const struct cpu_pisch_drv *cpu_pisch_drvs[] = { +#if defined(CONFIG_CPU_PISCH_CFS_FAIR) + &cfs_fair_cpu_pisch_drv, +#endif + NULL, +}; + +static const struct cpu_pisch_drv *cpu_pisch_get_drv(const char *str) +{ + int i; + const struct cpu_pisch_drv *drvp; + + for (i = 0; (drvp = cpu_pisch_drvs[i]); i++) + if (!strcmp(str, drvp->name)) { + cpu_pisch_drvp = drvp; + return drvp; + } + + return NULL; +} + +static int __init cpu_pisch_drv_setup(char *str) +{ + const struct cpu_pisch_drv *drvp = cpu_pisch_get_drv(str); + + if (drvp) { + cpu_pisch_drvp = drvp; + return 1; + } + + printk("CPU_PISCH: Unknown scheduler: \"%s\"\n", str); + + return 1; +} + +__setup ("cpu_pisch=", cpu_pisch_drv_setup); + +/* Set up sysfs structures for CPU_PISCH data */ + +static struct attribute_group cpu_pisch_attr_group = { + .name = "parameters", +}; + +static struct kobject *cpu_pisch_kobj; + +static ssize_t +show_cpu_pisch_name(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", cpu_pisch_drvp->name); +} + +static ssize_t +store_cpu_pisch_name(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + const struct cpu_pisch_drv *drvp = cpu_pisch_get_drv(buf); + + if (drvp) { + int retval = 0; + + sysfs_remove_group(cpu_pisch_kobj, &cpu_pisch_attr_group); + cpu_pisch_drvp = drvp; + cpu_pisch_attr_group.attrs = cpu_pisch_drvp->attrs; + retval = sysfs_create_group(cpu_pisch_kobj, &cpu_pisch_attr_group); + if (retval) + printk("CPU_PISCH: ERROR(%d): \"%s\"\n", retval, cpu_pisch_drvp->name); + printk("CPU_PISCH: Set scheduler: \"%s\"\n", cpu_pisch_drvp->name); + return count; + } + + printk("CPU_PISCH: Unknown scheduler: \"%s\"\n", buf); + + return count; +} + +static struct kobj_attribute cpu_pisch_name_attribute = + __ATTR(name, S_IRUGO | S_IWUSR, show_cpu_pisch_name, store_cpu_pisch_name); + +int __init cpu_pisch_drv_sysfs_init(void) +{ + int retval = 0; + + /* + * Create a simple kobject with the name of "cpu_pisch", + * located under /sys/kernel/ + */ + cpu_pisch_kobj = kobject_create_and_add("cpu_pisch", kernel_kobj); + if (!cpu_pisch_kobj) + return -ENOMEM; + + /* Create a file containing the name of the current scheduler */ + retval = sysfs_create_file(cpu_pisch_kobj, &cpu_pisch_name_attribute.attr); + + /* Create the files associated with the scheduler's parameters + * in a subdirectory named "parameters" + */ + if (!retval && (cpu_pisch_drvp->attrs != NULL)) { + cpu_pisch_attr_group.attrs = cpu_pisch_drvp->attrs; + retval = sysfs_create_group(cpu_pisch_kobj, &cpu_pisch_attr_group); + } + + if (retval) + kobject_put(cpu_pisch_kobj); + + return retval; +} diff --git a/kernel/cpu_pisch_pvt.h b/kernel/cpu_pisch_pvt.h new file mode 100644 --- /dev/null +++ b/kernel/cpu_pisch_pvt.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_CPU_PISCH_PVT_H +#define _LINUX_CPU_PISCH_PVT_H +/* + * kernel/cpu_pisch_pvt.h + * This contains the code for use by CPU Plug In SCHedulers + */ + +/* + * sysfs helper macros for scheduler attributes + */ +#define to_llu(a) ((unsigned long long)(a)) +#define to_lu(a) ((unsigned long)(a)) +#define to_u(a) ((unsigned int)(a)) + +#define __CPU_PISCH_SYSFS_SHOW_UNSIGNED(aname, vname, convert_out) \ +static ssize_t \ +show_cpu_pisch_ ## aname(struct kobject *kobj, struct kobj_attribute* attr, char *buf) \ +{ \ + return sprintf(buf, "%llu\n", convert_out(vname)); \ +} + +#define __CPU_PISCH_SYSFS_STORE_UNSIGNED(aname, vname, convert_in) \ +static ssize_t \ +store_cpu_pisch_ ## aname(struct kobject *kobj, struct kobj_attribute* attr, \ + const char *buf, size_t count) \ +{ \ + unsigned long long var; \ + \ + sscanf(buf, "%llu", &var); \ + \ + vname = convert_in(var); \ + \ + return count; \ +} + +#define CPU_PISCH_SYSFS_DEFINE_UNSIGNED_RW(aname, vname, convert_out, convert_in) \ +__CPU_PISCH_SYSFS_SHOW_UNSIGNED(aname, vname, convert_out) \ +__CPU_PISCH_SYSFS_STORE_UNSIGNED(aname, vname, convert_in) \ +static struct kobj_attribute cpu_pisch_sysfs_ ## aname = \ + __ATTR(aname, (S_IRUGO | S_IWUSR), show_cpu_pisch_ ## aname, store_cpu_pisch_ ## aname) + +#define CPU_PISCH_SYSFS_DEFINE_UNSIGNED_RO(aname, vname, convert_out) \ +__CPU_PISCH_SYSFS_SHOW_UNSIGNED(aname, vname, convert_out) \ +static struct kobj_attribute cpu_pisch_sysfs_ ## aname = \ + __ATTR(aname, S_IRUGO, show_cpu_pisch_ ## aname, NULL) + +#define CPU_PISCH_SYSFS_ATTR(aname) \ + (&((cpu_pisch_sysfs_ ## aname).attr)) + +#endif diff --git a/kernel/sched.c b/kernel/sched.c --- a/kernel/sched.c +++ b/kernel/sched.c @@ -515,6 +515,11 @@ static struct root_domain def_root_domai #endif +#ifdef CONFIG_CPU_PISCH +struct cpu_pisch_rq_data { +}; +#endif + /* * This is the main, per-CPU runqueue data structure. * @@ -544,6 +549,9 @@ struct rq { u64 nr_migrations_in; struct cfs_rq cfs; +#ifdef CONFIG_CPU_PISCH + struct cpu_pisch_rq_data cpu_pisch; +#endif struct rt_rq rt; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1813,10 +1821,17 @@ static void cfs_rq_set_shares(struct cfs static void calc_load_account_active(struct rq *this_rq); +#include #include "sched_stats.h" #include "sched_idletask.c" +#ifdef CONFIG_CPU_PISCH +#include "cpu_pisch_pvt.h" +#endif #include "sched_fair.c" #include "sched_rt.c" +#ifdef CONFIG_CPU_PISCH +#include "cpu_pisch_drv.c" +#endif #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" #endif @@ -2030,8 +2045,10 @@ void set_task_cpu(struct task_struct *p, { int old_cpu = task_cpu(p); struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); +#ifndef CONFIG_CPU_PISCH struct cfs_rq *old_cfsrq = task_cfs_rq(p), *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); +#endif u64 clock_offset; clock_offset = old_rq->clock - new_rq->clock; @@ -2056,8 +2073,13 @@ void set_task_cpu(struct task_struct *p, perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); } + +#ifdef CONFIG_CPU_PISCH + cpu_pisch_drvp->change_task_cpu(p, old_cpu, new_cpu); +#else p->se.vruntime -= old_cfsrq->min_vruntime - new_cfsrq->min_vruntime; +#endif __set_task_cpu(p, new_cpu); } @@ -5200,6 +5222,10 @@ void scheduler_tick(void) spin_lock(&rq->lock); update_rq_clock(rq); update_cpu_load(rq); +#ifdef CONFIG_CPU_PISCH + if (cpu_pisch_drvp->rq_tick) + cpu_pisch_drvp->rq_tick(rq); +#endif curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -276,11 +276,26 @@ static inline u64 min_vruntime(u64 min_v static inline int entity_before(struct sched_entity *a, struct sched_entity *b) +#ifdef CONFIG_CPU_PISCH +{ + return cpu_pisch_drvp->entity_before(a, b); +} + +static inline int cfs_fair_entity_before(struct sched_entity *a, + struct sched_entity *b) +#endif { return (s64)(a->vruntime - b->vruntime) < 0; } static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) +#ifdef CONFIG_CPU_PISCH +{ + return cpu_pisch_drvp->entity_key(cfs_rq, se); +} + +static inline s64 cfs_fair_entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) +#endif { return se->vruntime - cfs_rq->min_vruntime; } @@ -439,6 +454,13 @@ static u64 __sched_period(unsigned long * s = p*P[w/rw] */ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) +#ifdef CONFIG_CPU_PISCH +{ + return cpu_pisch_drvp->sched_slice(cfs_rq, se); +} + +static u64 cfs_fair_sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) +#endif { u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); @@ -478,12 +500,25 @@ static inline void __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, unsigned long delta_exec) { +#ifndef CONFIG_CPU_PISCH unsigned long delta_exec_weighted; +#endif schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq, exec_clock, delta_exec); +#ifdef CONFIG_CPU_PISCH + cpu_pisch_drvp->update_curr(cfs_rq, curr, delta_exec); +} + +static inline void +cfs_fair_update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, + unsigned long delta_exec) +{ + unsigned long delta_exec_weighted; +#endif + delta_exec_weighted = calc_delta_fair(delta_exec, curr); curr->vruntime += delta_exec_weighted; update_min_vruntime(cfs_rq); @@ -698,6 +733,14 @@ static void check_spread(struct cfs_rq * static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) +#ifdef CONFIG_CPU_PISCH +{ + cpu_pisch_drvp->place_entity(cfs_rq, se, initial); +} + +static void +cfs_fair_place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) +#endif { u64 vruntime = cfs_rq->min_vruntime; @@ -1064,9 +1107,20 @@ static void yield_task_fair(struct rq *r * Upon rescheduling, sched_class::put_prev_task() will place * 'current' within the tree based on its new key value. */ +#ifdef CONFIG_CPU_PISCH + cpu_pisch_drvp->place_entity_after(se, rightmost); +#else se->vruntime = rightmost->vruntime + 1; +#endif } +#ifdef CONFIG_CPU_PISCH +static inline void cfs_fair_place_entity_after(struct sched_entity *sea, struct sched_entity *seb) +{ + sea->vruntime = seb->vruntime + 1; +} +#endif + #ifdef CONFIG_SMP #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1530,6 +1584,14 @@ wakeup_gran(struct sched_entity *curr, s */ static int wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) +#ifdef CONFIG_CPU_PISCH +{ + return cpu_pisch_drvp->wakeup_preempt_entity(curr, se); +} + +static int +cfs_fair_wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) +#endif { s64 gran, vdiff = curr->vruntime - se->vruntime; @@ -1863,7 +1925,11 @@ static void task_new_fair(struct rq *rq, update_curr(cfs_rq); if (curr) +#ifdef CONFIG_CPU_PISCH + cpu_pisch_drvp->inherit_place(se, curr); +#else se->vruntime = curr->vruntime; +#endif place_entity(cfs_rq, se, 1); /* 'curr' will be NULL if the child belongs to a different group */ @@ -1873,13 +1939,29 @@ static void task_new_fair(struct rq *rq, * Upon rescheduling, sched_class::put_prev_task() will place * 'current' within the tree based on its new key value. */ +#ifdef CONFIG_CPU_PISCH + cpu_pisch_drvp->swap_places(curr, se); +#else swap(curr->vruntime, se->vruntime); +#endif resched_task(rq->curr); } enqueue_task_fair(rq, p, 0); } +#ifdef CONFIG_CPU_PISCH +static inline void cfs_fair_inherit_place(struct sched_entity *sea, struct sched_entity *seb) +{ + sea->vruntime = seb->vruntime; +} + +static inline void cfs_fair_swap_places(struct sched_entity *sea, struct sched_entity *seb) +{ + swap(sea->vruntime, seb->vruntime); +} +#endif + /* * Priority of the task has changed. Check to see if we preempt * the current task. @@ -2004,3 +2086,35 @@ static void print_cfs_stats(struct seq_f rcu_read_unlock(); } #endif + +#ifdef CONFIG_CPU_PISCH +#ifdef CONFIG_CPU_PISCH_CFS_FAIR +static inline void +cfs_fair_change_task_cpu(struct task_struct *p, unsigned int old_cpu, unsigned int new_cpu) +{ + struct cfs_rq *old_cfsrq = task_cfs_rq(p), + *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); + + p->se.vruntime -= old_cfsrq->min_vruntime - + new_cfsrq->min_vruntime; +} + +static struct attribute *cpu_pisch_cfs_fair_attrs[] = { + NULL, +}; + +static const struct cpu_pisch_drv cfs_fair_cpu_pisch_drv = { + .name = "cfs_fair", + .attrs = cpu_pisch_cfs_fair_attrs, + .entity_before = cfs_fair_entity_before, + .entity_key = cfs_fair_entity_key, + .place_entity = cfs_fair_place_entity, + .sched_slice = cfs_fair_sched_slice, + .update_curr = cfs_fair_update_curr, + .swap_places = cfs_fair_swap_places, + .inherit_place = cfs_fair_inherit_place, + .wakeup_preempt_entity = cfs_fair_wakeup_preempt_entity, + .change_task_cpu = cfs_fair_change_task_cpu, +}; +#endif +#endif