Subject: perf: Delay put_task_struct() for 'dead' contexts From: Peter Zijlstra Date: Mon Jul 15 13:50:05 CEST 2019 Currently we immediately do put_task_struct() when we mark a context dead, instead delay it until after the final put_ctx() by recovering the task pointer. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/core.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -180,10 +180,17 @@ static inline void mark_dead_task_ctx(st { unsigned long task = (unsigned long)READ_ONCE(ctx->task); WARN_ON_ONCE(!task); - task |= 1; + task |= 1;; WRITE_ONCE(ctx->task, (struct task_struct *)task); } +static inline struct task_struct *__dead_ctx_task(struct perf_event_context *ctx) +{ + unsigned long task = (unsigned long)READ_ONCE(ctx->task); + task &= ~1L; + return (struct task_struct *)task; +} + /* * On task ctx scheduling... * @@ -1200,10 +1207,11 @@ static void free_ctx(struct rcu_head *he static void put_ctx(struct perf_event_context *ctx) { if (refcount_dec_and_test(&ctx->refcount)) { + struct task_struct *task = __dead_ctx_task(ctx); if (ctx->parent_ctx) put_ctx(ctx->parent_ctx); - if (ctx->task && !is_dead_task(ctx->task)) - put_task_struct(ctx->task); + if (task) + put_task_struct(task); call_rcu(&ctx->rcu_head, free_ctx); } } @@ -11484,9 +11492,8 @@ static void perf_event_exit_task_context * and mark the context dead. */ RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); - put_ctx(task_ctx); /* cannot be last */ + put_ctx(task_ctx); /* matches perf_pin_task_context(), cannot be last */ mark_dead_task_ctx(task_ctx); - put_task_struct(current); /* cannot be last */ clone_ctx = unclone_ctx(task_ctx); raw_spin_unlock_irq(&task_ctx->lock); @@ -11595,7 +11602,6 @@ void perf_event_free_task(struct task_st */ RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); mark_dead_task_ctx(ctx); - put_task_struct(task); /* cannot be last */ raw_spin_unlock_irq(&ctx->lock); list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)