lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230113175459.14825-10-james.morse@arm.com>
Date:   Fri, 13 Jan 2023 17:54:50 +0000
From:   James Morse <james.morse@....com>
To:     x86@...nel.org, linux-kernel@...r.kernel.org
Cc:     Fenghua Yu <fenghua.yu@...el.com>,
        Reinette Chatre <reinette.chatre@...el.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        H Peter Anvin <hpa@...or.com>,
        Babu Moger <Babu.Moger@....com>,
        James Morse <james.morse@....com>,
        shameerali.kolothum.thodi@...wei.com,
        D Scott Phillips OS <scott@...amperecomputing.com>,
        carl@...amperecomputing.com, lcherian@...vell.com,
        bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
        xingxin.hx@...nanolis.org, baolin.wang@...ux.alibaba.com,
        Jamie Iles <quic_jiles@...cinc.com>,
        Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com
Subject: [PATCH v2 09/18] x86/resctrl: Allow resctrl_arch_rmid_read() to sleep

MPAM's cache occupancy counters can take a little while to settle once
the monitor has been configured. The maximum settling time is described
to the driver via a firmware table. The value could be large enough
that it makes sense to sleep.

To avoid exposing this to resctrl, it should be hidden behind MPAM's
resctrl_arch_rmid_read(). But add_rmid_to_limbo() calls
resctrl_arch_rmid_read() from a non-preemptible context.

add_rmid_to_limbo() is opportunistically reading the L3 occupancy counter
on this domain to avoid adding the RMID to limbo if this domain's value
has drifted below resctrl_rmid_realloc_threshold since the limbo handler
last ran. Determining 'this domain' involves disabling preeption to
prevent the thread being migrated to CPUs in a different domain between
the check and resctrl_arch_rmid_read() call. The check is skipped
for all remote domains.

Instead, call resctrl_arch_rmid_read() for each domain, and get it to
read the arch specific counter via IPI if its called on a CPU outside
the target domain. By covering remote domains, this change stops the
limbo handler from being started unnecessarily.

This also allows resctrl_arch_rmid_read() to sleep.

Tested-by: Shaopeng Tan <tan.shaopeng@...itsu.com>
Signed-off-by: James Morse <james.morse@....com>
---
The alternative is to remove the counter read from this path altogether,
and assume user-space would never try to re-allocate the last RMID before
the limbo handler runs next.
---
 arch/x86/kernel/cpu/resctrl/monitor.c | 58 ++++++++++++++++++---------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index d309b830aeb2..d6ae4b713801 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -206,17 +206,19 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
 	return chunks >> shift;
 }
 
-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
-			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
-			   u64 *val)
+struct __rmid_read_arg
 {
-	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
-	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
-	struct arch_mbm_state *am;
-	u64 msr_val, chunks;
+	u32 rmid;
+	enum resctrl_event_id eventid;
 
-	if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
-		return -EINVAL;
+	u64 msr_val;
+};
+
+static void __rmid_read(void *arg)
+{
+	enum resctrl_event_id eventid = ((struct __rmid_read_arg *)arg)->eventid;
+	u32 rmid = ((struct __rmid_read_arg *)arg)->rmid;
+	u64 msr_val;
 
 	/*
 	 * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
@@ -229,6 +231,28 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
 	wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
 	rdmsrl(MSR_IA32_QM_CTR, msr_val);
 
+	((struct __rmid_read_arg *)arg)->msr_val = msr_val;
+}
+
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
+			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
+			   u64 *val)
+{
+	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
+	struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+	struct __rmid_read_arg arg;
+	struct arch_mbm_state *am;
+	u64 msr_val, chunks;
+	int err;
+
+	arg.rmid = rmid;
+	arg.eventid = eventid;
+
+	err = smp_call_function_any(&d->cpu_mask, __rmid_read, &arg, true);
+	if (err)
+		return err;
+
+	msr_val = arg.msr_val;
 	if (msr_val & RMID_VAL_ERROR)
 		return -EIO;
 	if (msr_val & RMID_VAL_UNAVAIL)
@@ -383,23 +407,18 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 {
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 	struct rdt_domain *d;
-	int cpu, err;
 	u64 val = 0;
 	u32 idx;
+	int err;
 
 	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
 
 	entry->busy = 0;
-	cpu = get_cpu();
 	list_for_each_entry(d, &r->domains, list) {
-		if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
-			err = resctrl_arch_rmid_read(r, d, entry->closid,
-						     entry->rmid,
-						     QOS_L3_OCCUP_EVENT_ID,
-						     &val);
-			if (err || val <= resctrl_rmid_realloc_threshold)
-				continue;
-		}
+		err = resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
+					     QOS_L3_OCCUP_EVENT_ID, &val);
+		if (err || val <= resctrl_rmid_realloc_threshold)
+			continue;
 
 		/*
 		 * For the first limbo RMID in the domain,
@@ -410,7 +429,6 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 		set_bit(idx, d->rmid_busy_llc);
 		entry->busy++;
 	}
-	put_cpu();
 
 	if (entry->busy)
 		rmid_limbo_count++;
-- 
2.30.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ