linux-kernel - Re: [RFC v2] perf: Rewrite core context handling

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <YqdNKJllCVMci3ov@hirez.programming.kicks-ass.net>
Date:   Mon, 13 Jun 2022 16:43:52 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Ravi Bangoria <ravi.bangoria@....com>
Cc:     acme@...nel.org, alexander.shishkin@...ux.intel.com,
        jolsa@...hat.com, namhyung@...nel.org, songliubraving@...com,
        eranian@...gle.com, alexey.budankov@...ux.intel.com,
        ak@...ux.intel.com, mark.rutland@....com, megha.dey@...el.com,
        frederic@...nel.org, maddy@...ux.ibm.com, irogers@...gle.com,
        kim.phillips@....com, linux-kernel@...r.kernel.org,
        santosh.shukla@....com
Subject: Re: [RFC v2] perf: Rewrite core context handling

On Mon, Jun 13, 2022 at 04:35:11PM +0200, Peter Zijlstra wrote:

> @@ -3652,17 +3697,28 @@ static noinline int visit_groups_merge(s
>  			.size = ARRAY_SIZE(itrs),
>  		};
>  		/* Events not within a CPU context may be on any CPU. */
> -		__heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
> +		__heap_add(&event_heap, perf_event_groups_first(groups, -1, pmu, NULL));
>  	}
>  	evt = event_heap.data;
>  
> -	__heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
> +	__heap_add(&event_heap, perf_event_groups_first(groups, cpu, pmu, NULL));
>  
>  #ifdef CONFIG_CGROUP_PERF
>  	for (; css; css = css->parent)
> -		__heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
> +		__heap_add(&event_heap, perf_event_groups_first(groups, cpu, pmu, css->cgroup));
>  #endif
>  
> +	if (event_heap.nr) {
> +		/*
> +		 * XXX: For now, visit_groups_merge() gets called with pmu
> +		 * pointer never NULL. But these functions needs to be called
> +		 * once for each pmu if I implement pmu=NULL optimization.
> +		 */
> +		__link_epc((*evt)->pmu_ctx);
> +		perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu);
> +	}
> +
> +
>  	min_heapify_all(&event_heap, &perf_min_heap);
>  
>  	while (event_heap.nr) {

> @@ -3741,39 +3799,67 @@ static int merge_sched_in(struct perf_ev
>  	return 0;
>  }
>  
> -static void
> -ctx_pinned_sched_in(struct perf_event_context *ctx,
> -		    struct perf_cpu_context *cpuctx)
> +static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
>  {
> +	struct perf_event_pmu_context *pmu_ctx;
>  	int can_add_hw = 1;
>  
> -	if (ctx != &cpuctx->ctx)
> -		cpuctx = NULL;
> -
> -	visit_groups_merge(cpuctx, &ctx->pinned_groups,
> -			   smp_processor_id(),
> -			   merge_sched_in, &can_add_hw);
> +	if (pmu) {
> +		visit_groups_merge(ctx, &ctx->pinned_groups,
> +				   smp_processor_id(), pmu,
> +				   merge_sched_in, &can_add_hw);
> +	} else {
> +		/*
> +		 * XXX: This can be optimized for per-task context by calling
> +		 * visit_groups_merge() only once with:
> +		 *   1) pmu=NULL
> +		 *   2) Ignoring pmu in perf_event_groups_cmp() when it's NULL
> +		 *   3) Making can_add_hw a per-pmu variable
> +		 *
> +		 * Though, it can not be opimized for per-cpu context because
> +		 * per-cpu rb-tree consist of pmu-subtrees and pmu-subtrees
> +		 * consist of cgroup-subtrees. i.e. a cgroup events of same
> +		 * cgroup but different pmus are seperated out into respective
> +		 * pmu-subtrees.
> +		 */
> +		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
> +			can_add_hw = 1;
> +			visit_groups_merge(ctx, &ctx->pinned_groups,
> +					   smp_processor_id(), pmu_ctx->pmu,
> +					   merge_sched_in, &can_add_hw);
> +		}
> +	}
>  }

I'm not sure I follow.. task context can have multiple PMUs just the
same as CPU context can, that's more or less the entire point of the
patch.