lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Sun, 8 Nov 2009 21:13:23 +0100 From: Frederic Weisbecker <fweisbec@...il.com> To: Ingo Molnar <mingo@...e.hu> Cc: LKML <linux-kernel@...r.kernel.org>, Frederic Weisbecker <fweisbec@...il.com>, Peter Zijlstra <peterz@...radead.org>, Arnaldo Carvalho de Melo <acme@...hat.com>, Mike Galbraith <efault@....de>, Paul Mackerras <paulus@...ba.org>, Thomas Gleixner <tglx@...utronix.de> Subject: [RFC PATCH 1/4] perf/core: split context's event group list into pinned and non-pinned lists Split-up struct perf_event_context::group_list into pinned_grp_list and volatile_grp_list (non-pinned). This first appears to be useless as it duplicates various loops around the group list handlings. But it scales better in the fast-path in perf_sched_in(). We don't anymore iterate twice through the entire list to separate pinned and non-pinned scheduling. Instead we interate through two distinct lists. The another desired effect is that it makes easier the distinct scheduling rules for both. Signed-off-by: Frederic Weisbecker <fweisbec@...il.com> Cc: Peter Zijlstra <peterz@...radead.org> Cc: Arnaldo Carvalho de Melo <acme@...hat.com> Cc: Mike Galbraith <efault@....de> Cc: Paul Mackerras <paulus@...ba.org> Cc: Thomas Gleixner <tglx@...utronix.de> --- include/linux/perf_event.h | 3 +- kernel/perf_event.c | 177 +++++++++++++++++++++++++++++++------------- 2 files changed, 127 insertions(+), 53 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6ff7c3b..659351c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -662,7 +662,8 @@ struct perf_event_context { */ struct mutex mutex; - struct list_head group_list; + struct list_head pinned_grp_list; + struct list_head volatile_grp_list; struct list_head event_list; int nr_events; int nr_active; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6f4ed3b..b3a31c8 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -259,9 +259,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) * add it straight to the context's event list, or to the group * leader's sibling list: */ - if (group_leader == event) - list_add_tail(&event->group_entry, &ctx->group_list); - else { + if (group_leader == event) { + struct list_head *list; + + if (event->attr.pinned) + list = &ctx->pinned_grp_list; + else + list = &ctx->volatile_grp_list; + list_add_tail(&event->group_entry, list); + } else { list_add_tail(&event->group_entry, &group_leader->sibling_list); group_leader->nr_siblings++; } @@ -299,8 +305,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) * to the context list directly: */ list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { + struct list_head *list; + + if (sibling->attr.pinned) + list = &ctx->pinned_grp_list; + else + list = &ctx->volatile_grp_list; - list_move_tail(&sibling->group_entry, &ctx->group_list); + list_move_tail(&sibling->group_entry, list); sibling->group_leader = sibling; } } @@ -1032,10 +1044,14 @@ void __perf_event_sched_out(struct perf_event_context *ctx, update_context_time(ctx); perf_disable(); - if (ctx->nr_active) - list_for_each_entry(event, &ctx->group_list, group_entry) + if (ctx->nr_active) { + list_for_each_entry(event, &ctx->pinned_grp_list, group_entry) group_sched_out(event, cpuctx, ctx); + list_for_each_entry(event, &ctx->volatile_grp_list, group_entry) + group_sched_out(event, cpuctx, ctx); + } + perf_enable(); out: spin_unlock(&ctx->lock); @@ -1249,9 +1265,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - list_for_each_entry(event, &ctx->group_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF || - !event->attr.pinned) + list_for_each_entry(event, &ctx->pinned_grp_list, group_entry) { + if (event->state <= PERF_EVENT_STATE_OFF) continue; if (event->cpu != -1 && event->cpu != cpu) continue; @@ -1269,13 +1284,12 @@ __perf_event_sched_in(struct perf_event_context *ctx, } } - list_for_each_entry(event, &ctx->group_list, group_entry) { + list_for_each_entry(event, &ctx->volatile_grp_list, group_entry) { /* * Ignore events in OFF or ERROR state, and * ignore pinned events since we did them already. */ - if (event->state <= PERF_EVENT_STATE_OFF || - event->attr.pinned) + if (event->state <= PERF_EVENT_STATE_OFF) continue; /* @@ -1428,8 +1442,13 @@ static void rotate_ctx(struct perf_event_context *ctx) * Rotate the first entry last (works just fine for group events too): */ perf_disable(); - list_for_each_entry(event, &ctx->group_list, group_entry) { - list_move_tail(&event->group_entry, &ctx->group_list); + list_for_each_entry(event, &ctx->pinned_grp_list, group_entry) { + list_move_tail(&event->group_entry, &ctx->pinned_grp_list); + break; + } + + list_for_each_entry(event, &ctx->volatile_grp_list, group_entry) { + list_move_tail(&event->group_entry, &ctx->volatile_grp_list); break; } perf_enable(); @@ -1465,6 +1484,22 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) perf_event_task_sched_in(curr, cpu); } +static void __perf_event_enable_on_exec(struct perf_event *event, + struct perf_event_context *ctx, + int *enabled) +{ + if (!event->attr.enable_on_exec) + return; + + event->attr.enable_on_exec = 0; + if (event->state >= PERF_EVENT_STATE_INACTIVE) + return; + + __perf_event_mark_enabled(event, ctx); + + *enabled = 1; +} + /* * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. @@ -1485,15 +1520,11 @@ static void perf_event_enable_on_exec(struct task_struct *task) spin_lock(&ctx->lock); - list_for_each_entry(event, &ctx->group_list, group_entry) { - if (!event->attr.enable_on_exec) - continue; - event->attr.enable_on_exec = 0; - if (event->state >= PERF_EVENT_STATE_INACTIVE) - continue; - __perf_event_mark_enabled(event, ctx); - enabled = 1; - } + list_for_each_entry(event, &ctx->pinned_grp_list, group_entry) + __perf_event_enable_on_exec(event, ctx, &enabled); + + list_for_each_entry(event, &ctx->volatile_grp_list, group_entry) + __perf_event_enable_on_exec(event, ctx, &enabled); /* * Unclone this context if we enabled any event. @@ -1562,7 +1593,8 @@ __perf_event_init_context(struct perf_event_context *ctx, memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->group_list); + INIT_LIST_HEAD(&ctx->pinned_grp_list); + INIT_LIST_HEAD(&ctx->volatile_grp_list); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); ctx->task = task; @@ -4869,7 +4901,11 @@ void perf_event_exit_task(struct task_struct *child) mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); again: - list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, + list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_grp_list, + group_entry) + __perf_event_exit_task(child_event, child_ctx, child); + + list_for_each_entry_safe(child_event, tmp, &child_ctx->volatile_grp_list, group_entry) __perf_event_exit_task(child_event, child_ctx, child); @@ -4878,7 +4914,8 @@ again: * its siblings to the list, but we obtained 'tmp' before that which * will still point to the list head terminating the iteration. */ - if (!list_empty(&child_ctx->group_list)) + if (!list_empty(&child_ctx->pinned_grp_list) || + !list_empty(&child_ctx->volatile_grp_list)) goto again; mutex_unlock(&child_ctx->mutex); @@ -4886,6 +4923,24 @@ again: put_ctx(child_ctx); } +static void perf_event_free_event(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *parent = event->parent; + + if (WARN_ON_ONCE(!parent)) + return; + + mutex_lock(&parent->child_mutex); + list_del_init(&event->child_list); + mutex_unlock(&parent->child_mutex); + + fput(parent->filp); + + list_del_event(event, ctx); + free_event(event); +} + /* * free an unexposed, unused context as created by inheritance by * init_task below, used by fork() in case of fail. @@ -4900,23 +4955,15 @@ void perf_event_free_task(struct task_struct *task) mutex_lock(&ctx->mutex); again: - list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { - struct perf_event *parent = event->parent; - - if (WARN_ON_ONCE(!parent)) - continue; - - mutex_lock(&parent->child_mutex); - list_del_init(&event->child_list); - mutex_unlock(&parent->child_mutex); + list_for_each_entry_safe(event, tmp, &ctx->pinned_grp_list, group_entry) + perf_event_free_event(event, ctx); - fput(parent->filp); - - list_del_event(event, ctx); - free_event(event); - } + list_for_each_entry_safe(event, tmp, &ctx->volatile_grp_list, + group_entry) + perf_event_free_event(event, ctx); - if (!list_empty(&ctx->group_list)) + if (!list_empty(&ctx->pinned_grp_list) || + !list_empty(&ctx->volatile_grp_list)) goto again; mutex_unlock(&ctx->mutex); @@ -4924,6 +4971,29 @@ again: put_ctx(ctx); } +static int +perf_event_inherit(struct perf_event *event, struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + struct perf_event_context *child_ctx, + int *inherited_all) +{ + int ret; + + if (!event->attr.inherit) { + *inherited_all = 0; + return 0; + } + + ret = inherit_group(event, parent, parent_ctx, + child, child_ctx); + if (ret) + *inherited_all = 0; + + return ret; +} + + /* * Initialize the perf_event context in task_struct */ @@ -4981,19 +5051,20 @@ int perf_event_init_task(struct task_struct *child) * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ - list_for_each_entry(event, &parent_ctx->group_list, group_entry) { + list_for_each_entry(event, &parent_ctx->pinned_grp_list, group_entry) { - if (!event->attr.inherit) { - inherited_all = 0; - continue; - } + ret = perf_event_inherit(event, parent, parent_ctx, child, + child_ctx, &inherited_all); + if (ret) + break; + } + + list_for_each_entry(event, &parent_ctx->volatile_grp_list, group_entry) { - ret = inherit_group(event, parent, parent_ctx, - child, child_ctx); - if (ret) { - inherited_all = 0; + ret = perf_event_inherit(event, parent, parent_ctx, child, + child_ctx, &inherited_all); + if (ret) break; - } } if (inherited_all) { @@ -5044,7 +5115,9 @@ static void __perf_event_exit_cpu(void *info) struct perf_event_context *ctx = &cpuctx->ctx; struct perf_event *event, *tmp; - list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) + list_for_each_entry_safe(event, tmp, &ctx->pinned_grp_list, group_entry) + __perf_event_remove_from_context(event); + list_for_each_entry_safe(event, tmp, &ctx->volatile_grp_list, group_entry) __perf_event_remove_from_context(event); } static void perf_event_exit_cpu(int cpu) -- 1.6.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists