linux-kernel - Re: [PATCH 8/9] psi: pressure stall information for CPU, memory, and IO

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20180907101634.GO24106@hirez.programming.kicks-ass.net>
Date:   Fri, 7 Sep 2018 12:16:34 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Johannes Weiner <hannes@...xchg.org>
Cc:     Ingo Molnar <mingo@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Tejun Heo <tj@...nel.org>,
        Suren Baghdasaryan <surenb@...gle.com>,
        Daniel Drake <drake@...lessm.com>,
        Vinayak Menon <vinmenon@...eaurora.org>,
        Christopher Lameter <cl@...ux.com>,
        Peter Enderborg <peter.enderborg@...y.com>,
        Shakeel Butt <shakeelb@...gle.com>,
        Mike Galbraith <efault@....de>, linux-mm@...ck.org,
        cgroups@...r.kernel.org, linux-kernel@...r.kernel.org,
        kernel-team@...com
Subject: Re: [PATCH 8/9] psi: pressure stall information for CPU, memory, and
 IO

On Tue, Aug 28, 2018 at 01:22:57PM -0400, Johannes Weiner wrote:
> +enum psi_states {
> +	PSI_IO_SOME,
> +	PSI_IO_FULL,
> +	PSI_MEM_SOME,
> +	PSI_MEM_FULL,
> +	PSI_CPU_SOME,
> +	/* Only per-CPU, to weigh the CPU in the global average: */
> +	PSI_NONIDLE,
> +	NR_PSI_STATES,
> +};

> +static u32 get_recent_time(struct psi_group *group, int cpu,
> +			   enum psi_states state)
> +{
> +	struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
> +	unsigned int seq;
> +	u32 time, delta;
> +
> +	do {
> +		seq = read_seqcount_begin(&groupc->seq);
> +
> +		time = groupc->times[state];
> +		/*
> +		 * In addition to already concluded states, we also
> +		 * incorporate currently active states on the CPU,
> +		 * since states may last for many sampling periods.
> +		 *
> +		 * This way we keep our delta sampling buckets small
> +		 * (u32) and our reported pressure close to what's
> +		 * actually happening.
> +		 */
> +		if (test_state(groupc->tasks, state))
> +			time += cpu_clock(cpu) - groupc->state_start;
> +	} while (read_seqcount_retry(&groupc->seq, seq));
> +
> +	delta = time - groupc->times_prev[state];
> +	groupc->times_prev[state] = time;
> +
> +	return delta;
> +}

> +static bool update_stats(struct psi_group *group)
> +{
> +	u64 deltas[NR_PSI_STATES - 1] = { 0, };
> +	unsigned long missed_periods = 0;
> +	unsigned long nonidle_total = 0;
> +	u64 now, expires, period;
> +	int cpu;
> +	int s;
> +
> +	mutex_lock(&group->stat_lock);
> +
> +	/*
> +	 * Collect the per-cpu time buckets and average them into a
> +	 * single time sample that is normalized to wallclock time.
> +	 *
> +	 * For averaging, each CPU is weighted by its non-idle time in
> +	 * the sampling period. This eliminates artifacts from uneven
> +	 * loading, or even entirely idle CPUs.
> +	 */
> +	for_each_possible_cpu(cpu) {
> +		u32 nonidle;
> +
> +		nonidle = get_recent_time(group, cpu, PSI_NONIDLE);
> +		nonidle = nsecs_to_jiffies(nonidle);
> +		nonidle_total += nonidle;
> +
> +		for (s = 0; s < PSI_NONIDLE; s++) {
> +			u32 delta;
> +
> +			delta = get_recent_time(group, cpu, s);
> +			deltas[s] += (u64)delta * nonidle;
> +		}
> +	}

This does the whole seqcount thing 6x, which is a bit of a waste.

struct snapshot {
	u32 times[NR_PSI_STATES];
};

static inline struct snapshot get_times_snapshot(struct psi_group *pg, int cpu)
{
	struct pci_group_cpu *pgc = per_cpu_ptr(pg->pcpu, cpu);
	struct snapshot s;
	unsigned int seq;
	u32 delta;
	int i;

	do {
		seq = read_seqcount_begin(&pgc->seq);

		delta = cpu_clock(cpu) - pgc->state_start;
		for (i = 0; i < NR_PSI_STATES; i++) {
			s.times[i] = gpc->times[i];
			if (test_state(pgc->tasks, i))
				s.times[i] += delta;
		}

	} while (read_seqcount_retry(&pgc->seq, seq);

	return s;
}


	for_each_possible_cpu(cpu) {
		struct snapshot s = get_times_snapshot(pg, cpu);

		nonidle = nsecs_to_jiffies(s.times[PSI_NONIDLE]);
		nonidle_total += nonidle;

		for (i = 0; i < PSI_NONIDLE; i++)
			deltas[s] += (u64)s.times[i] * nonidle;

		/* ... */

	}


It's a bit cumbersome, but that's because of C.