Subject: perf: Remove TASK_TOMBSTONG From: Peter Zijlstra Date: Mon Jul 15 13:42:35 CEST 2019 Instead of overwriting the entirely of ctx->task, only set the LSB to mark the ctx dead. This allows recovering the task pointer for fun and games later on. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/core.c | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -164,14 +164,26 @@ static void perf_ctx_unlock(struct perf_ raw_spin_unlock(&cpuctx->ctx.lock); } -#define TASK_TOMBSTONE ((void *)-1L) #define TASK_KERNEL ((void *)-1L) -static bool is_kernel_event(struct perf_event *event) +static inline bool is_kernel_event(struct perf_event *event) { return READ_ONCE(event->owner) == TASK_KERNEL; } +static inline bool is_dead_task(struct task_struct *task) +{ + return (unsigned long)task & 1; +} + +static inline void mark_dead_task_ctx(struct perf_event_context *ctx) +{ + unsigned long task = (unsigned long)READ_ONCE(ctx->task); + WARN_ON_ONCE(!task); + task |= 1; + WRITE_ONCE(ctx->task, (struct task_struct *)task); +} + /* * On task ctx scheduling... * @@ -270,7 +282,7 @@ static void event_function_call(struct p return; } - if (task == TASK_TOMBSTONE) + if (is_dead_task(task)) return; again: @@ -283,7 +295,7 @@ static void event_function_call(struct p * a concurrent perf_event_context_sched_out(). */ task = ctx->task; - if (task == TASK_TOMBSTONE) { + if (is_dead_task(task)) { raw_spin_unlock_irq(&ctx->lock); return; } @@ -309,7 +321,7 @@ static void event_function_local(struct lockdep_assert_irqs_disabled(); if (task) { - if (task == TASK_TOMBSTONE) + if (is_dead_task(task)) return; task_ctx = ctx; @@ -318,7 +330,7 @@ static void event_function_local(struct perf_ctx_lock(cpuctx, task_ctx); task = ctx->task; - if (task == TASK_TOMBSTONE) + if (is_dead_task(task)) goto unlock; if (task) { @@ -1190,7 +1202,7 @@ static void put_ctx(struct perf_event_co if (refcount_dec_and_test(&ctx->refcount)) { if (ctx->parent_ctx) put_ctx(ctx->parent_ctx); - if (ctx->task && ctx->task != TASK_TOMBSTONE) + if (ctx->task && !is_dead_task(ctx->task)) put_task_struct(ctx->task); call_rcu(&ctx->rcu_head, free_ctx); } @@ -1402,7 +1414,7 @@ perf_lock_task_context(struct task_struc goto retry; } - if (ctx->task == TASK_TOMBSTONE || + if (is_dead_task(ctx->task) || !refcount_inc_not_zero(&ctx->refcount)) { raw_spin_unlock(&ctx->lock); ctx = NULL; @@ -2109,7 +2121,7 @@ static void perf_remove_from_context(str /* * The above event_function_call() can NO-OP when it hits - * TASK_TOMBSTONE. In that case we must already have been detached + * is_dead_task(). In that case we must already have been detached * from the context (by perf_event_exit_event()) but the grouping * might still be in-tact. */ @@ -2590,7 +2602,7 @@ perf_install_in_context(struct perf_even /* * Should not happen, we validate the ctx is still alive before calling. */ - if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) + if (WARN_ON_ONCE(is_dead_task(task))) return; /* @@ -2630,7 +2642,7 @@ perf_install_in_context(struct perf_even raw_spin_lock_irq(&ctx->lock); task = ctx->task; - if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) { + if (WARN_ON_ONCE(is_dead_task(task))) { /* * Cannot happen because we already checked above (which also * cannot happen), and we hold ctx->mutex, which serializes us @@ -9110,10 +9122,11 @@ static void perf_event_addr_filters_appl unsigned long flags; /* - * We may observe TASK_TOMBSTONE, which means that the event tear-down - * will stop on the parent's child_mutex that our caller is also holding + * We may observe is_dead_task(), which means that the event tear-down + * will stop on the parent's child_mutex that our caller is also + * holding */ - if (task == TASK_TOMBSTONE) + if (is_dead_task(task)) return; if (ifh->nr_file_filters) { @@ -11018,7 +11031,7 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { gctx = __perf_event_ctx_lock_double(group_leader, ctx); - if (gctx->task == TASK_TOMBSTONE) { + if (is_dead_task(gctx->task)) { err = -ESRCH; goto err_locked; } @@ -11057,7 +11070,7 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&ctx->mutex); } - if (ctx->task == TASK_TOMBSTONE) { + if (is_dead_task(ctx->task)) { err = -ESRCH; goto err_locked; } @@ -11250,7 +11263,7 @@ perf_event_create_kernel_counter(struct WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); - if (ctx->task == TASK_TOMBSTONE) { + if (is_dead_task(ctx->task)) { err = -ESRCH; goto err_unlock; } @@ -11472,7 +11485,7 @@ static void perf_event_exit_task_context */ RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); put_ctx(task_ctx); /* cannot be last */ - WRITE_ONCE(task_ctx->task, TASK_TOMBSTONE); + mark_dead_task_ctx(task_ctx); put_task_struct(current); /* cannot be last */ clone_ctx = unclone_ctx(task_ctx); @@ -11581,7 +11594,7 @@ void perf_event_free_task(struct task_st * exposed yet the context has been (through child_list). */ RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); - WRITE_ONCE(ctx->task, TASK_TOMBSTONE); + mark_dead_task_ctx(ctx); put_task_struct(task); /* cannot be last */ raw_spin_unlock_irq(&ctx->lock);