Aside from allowing software events into a !software group, allow adding !software events to pure software groups. Once we've moved the software group and attached the first !software event, the group will no longer be a pure software group and hence no longer be eligible for movement, at which point the straight ctx comparison is correct again. Cc: Paul Mackerras Cc: Stephane Eranian Signed-off-by: Peter Zijlstra --- include/linux/perf_event.h | 6 ++++ kernel/perf_event.c | 65 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 66 insertions(+), 5 deletions(-) Index: linux-2.6/kernel/perf_event.c =================================================================== --- linux-2.6.orig/kernel/perf_event.c +++ linux-2.6/kernel/perf_event.c @@ -5190,6 +5190,7 @@ int perf_pmu_register(struct pmu *pmu) cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); __perf_event_init_context(&cpuctx->ctx); + cpuctx->ctx.type = cpu_context; cpuctx->ctx.pmu = pmu; cpuctx->timer_interval = TICK_NSEC; hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -5523,7 +5524,8 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { - struct perf_event *event, *group_leader = NULL, *output_event = NULL; + struct perf_event *group_leader = NULL, *output_event = NULL; + struct perf_event *event, *sibling; struct perf_event_attr attr; struct perf_event_context *ctx; struct file *event_file = NULL; @@ -5531,6 +5533,7 @@ SYSCALL_DEFINE5(perf_event_open, struct task_struct *task = NULL; struct pmu *pmu; int event_fd; + int move_group = 0; int fput_needed = 0; int err; @@ -5580,8 +5583,29 @@ SYSCALL_DEFINE5(perf_event_open, * any hardware group. */ pmu = event->pmu; - if ((pmu->task_ctx_nr == perf_sw_context) && group_leader) - pmu = group_leader->pmu; + + if (group_leader && + (is_software_event(event) != is_software_event(group_leader))) { + if (is_software_event(event)) { + /* + * If event and group_leader are not both a software + * event, and event is, then group leader is not. + * + * Allow the addition of software events to !software + * groups, this is safe because software events never + * fail to schedule. + */ + pmu = group_leader->pmu; + } else if (is_software_event(group_leader) && + (group_leader->group_flags & PERF_GROUP_SOFTWARE)) { + /* + * In case the group is a pure software group, and we + * try to add a hardware event, move the whole group to + * the hardware context. + */ + move_group = 1; + } + } if (pid != -1) task = find_lively_task_by_vpid(pid); @@ -5611,8 +5635,14 @@ SYSCALL_DEFINE5(perf_event_open, * Do not allow to attach to a group in a different * task or CPU context: */ - if (group_leader->ctx != ctx) - goto err_context; + if (move_group) { + if (group_leader->ctx->type != ctx->type) + goto err_context; + } else { + if (group_leader->ctx != ctx) + goto err_context; + } + /* * Only a group leader can be exclusive or pinned */ @@ -5632,9 +5662,34 @@ SYSCALL_DEFINE5(perf_event_open, goto err_context; } + if (move_group) { + struct perf_event_context *gctx = group_leader->ctx; + + mutex_lock(&gctx->mutex); + perf_event_remove_from_context(group_leader); + list_for_each_entry(sibling, &group_leader->sibling_list, + group_entry) { + perf_event_remove_from_context(sibling); + put_ctx(gctx); + } + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + } + event->filp = event_file; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); + + if (move_group) { + perf_install_in_context(ctx, group_leader, cpu); + get_ctx(ctx); + list_for_each_entry(sibling, &group_leader->sibling_list, + group_entry) { + perf_install_in_context(ctx, sibling, cpu); + get_ctx(ctx); + } + } + perf_install_in_context(ctx, event, cpu); ++ctx->generation; mutex_unlock(&ctx->mutex); Index: linux-2.6/include/linux/perf_event.h =================================================================== --- linux-2.6.orig/include/linux/perf_event.h +++ linux-2.6/include/linux/perf_event.h @@ -804,12 +804,18 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; +enum perf_event_context_type { + task_context, + cpu_context, +}; + /** * struct perf_event_context - event context structure * * Used as a container for task events and CPU events as well: */ struct perf_event_context { + enum perf_event_context_type type; struct pmu *pmu; /* * Protect the states of the events in the list, -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/