lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200118125354.15796-1-ming.lei@redhat.com>
Date:   Sat, 18 Jan 2020 20:53:54 +0800
From:   Ming Lei <ming.lei@...hat.com>
To:     linux-kernel@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>
Cc:     Ming Lei <ming.lei@...hat.com>, Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Peter Xu <peterx@...hat.com>,
        Juri Lelli <juri.lelli@...hat.com>
Subject: [PATCH V3] sched/isolation: isolate from handling managed interrupt

Userspace can't change managed interrupt's affinity via /proc interface,
however, applications often require the specified isolated CPUs not
disturbed by interrupts.

Add sub-parameter 'managed_irq' for 'isolcpus', so that we can isolate
from handling managed interrupt.

Not select irq effective CPU from isolated CPUs if the interrupt affinity
includes at least one housekeeping CPU. This way guarantees that isolated
CPUs won't be interrupted by managed irq if IO isn't submitted from any
isolated CPU.

Cc: Ingo Molnar <mingo@...hat.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Peter Xu <peterx@...hat.com>
Cc: Juri Lelli <juri.lelli@...hat.com>
Signed-off-by: Ming Lei <ming.lei@...hat.com>
---
V3:
	- add global lock to protect the global temporary cpumask
	- use delayed irq migration as suggested by Thomas 
V2:
	- not allocate cpumask in context with irq_desc::lock held
	- deal with cpu hotplug race with new flag of IRQD_MANAGED_FORCE_MIGRATE
	- use comment doc from Thomas


 .../admin-guide/kernel-parameters.txt         |  9 +++++
 include/linux/sched/isolation.h               |  1 +
 kernel/irq/cpuhotplug.c                       | 29 ++++++++++++++-
 kernel/irq/manage.c                           | 36 ++++++++++++++++++-
 kernel/sched/isolation.c                      |  6 ++++
 5 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ade4e6ec23e0..e0f18ac866d4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1933,6 +1933,15 @@
 			  <cpu number> begins at 0 and the maximum value is
 			  "number of CPUs in system - 1".
 
+			managed_irq
+			  Isolate from handling managed interrupt. Userspace can't
+			  change managed interrupt's affinity via /proc interface,
+			  however application often requires the specified isolated
+			  CPUs not disturbed by interrupts. This way guarantees that
+			  isolated CPU won't be interrupted if IO isn't submitted
+			  from isolated CPU when managed interrupt is used by IO
+			  drivers.
+
 			The format of <cpu-list> is described above.
 
 
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index 6c8512d3be88..0fbcbacd1b29 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -13,6 +13,7 @@ enum hk_flags {
 	HK_FLAG_TICK		= (1 << 4),
 	HK_FLAG_DOMAIN		= (1 << 5),
 	HK_FLAG_WQ		= (1 << 6),
+	HK_FLAG_MANAGED_IRQ	= (1 << 7),
 };
 
 #ifdef CONFIG_CPU_ISOLATION
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 6c7ca2e983a5..70b342ac223e 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/ratelimit.h>
 #include <linux/irq.h>
+#include <linux/sched/isolation.h>
 
 #include "internals.h"
 
@@ -171,6 +172,31 @@ void irq_migrate_all_off_this_cpu(void)
 	}
 }
 
+static bool hk_should_isolate(struct irq_data *data,
+		const struct cpumask *affinity, unsigned int cpu)
+{
+	const struct cpumask *hk_mask;
+
+	if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ))
+		return false;
+
+	if (!irqd_affinity_is_managed(data))
+		return false;
+
+	if (!cpumask_test_cpu(cpu, affinity))
+		return false;
+
+	hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
+	if (cpumask_subset(affinity, hk_mask))
+		return false;
+
+	if (cpumask_intersects(irq_data_get_effective_affinity_mask(data),
+				hk_mask))
+		return false;
+
+	return cpumask_test_cpu(cpu, hk_mask);
+}
+
 static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
 {
 	struct irq_data *data = irq_desc_get_irq_data(desc);
@@ -190,7 +216,8 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
 	 * CPU then it is already assigned to a CPU in the affinity
 	 * mask. No point in trying to move it around.
 	 */
-	if (!irqd_is_single_target(data))
+	if (!irqd_is_single_target(data) ||
+			hk_should_isolate(data, affinity, cpu))
 		irq_set_affinity_locked(data, affinity, false);
 }
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1753486b440c..a8af2ca806e2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/task.h>
+#include <linux/sched/isolation.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/task_work.h>
 
@@ -217,7 +218,40 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	if (!chip || !chip->irq_set_affinity)
 		return -EINVAL;
 
-	ret = chip->irq_set_affinity(data, mask, force);
+	/*
+	 * If this is a managed interrupt and housekeeping is enabled on
+	 * it check whether the requested affinity mask intersects with
+	 * a housekeeping CPU. If so, then remove the isolated CPUs from
+	 * the mask and just keep the housekeeping CPU(s). This prevents
+	 * the affinity setter from routing the interrupt to an isolated
+	 * CPU to avoid that I/O submitted from a housekeeping CPU causes
+	 * interrupts on an isolated one.
+	 *
+	 * If the masks do not intersect or include online CPU(s) then
+	 * keep the requested mask. The isolated target CPUs are only
+	 * receiving interrupts when the I/O operation was submitted
+	 * directly from them.
+	 *
+	 * If all housekeeping CPUs in the affinity mask are offline,
+	 * we will migrate the irq from isolate CPU when any housekeeping
+	 * CPU in the mask becomes online.
+	 */
+	if (irqd_affinity_is_managed(data) &&
+			housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) {
+		static DEFINE_RAW_SPINLOCK(prog_mask_lock);
+		static struct cpumask prog_mask;
+		const struct cpumask *hk_mask =
+			housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
+
+		raw_spin_lock(&prog_mask_lock);
+		cpumask_and(&prog_mask, mask, hk_mask);
+		if (!cpumask_intersects(&prog_mask, cpu_online_mask))
+			cpumask_copy(&prog_mask, mask);
+		ret = chip->irq_set_affinity(data, &prog_mask, force);
+		raw_spin_unlock(&prog_mask_lock);
+	} else {
+		ret = chip->irq_set_affinity(data, mask, force);
+	}
 	switch (ret) {
 	case IRQ_SET_MASK_OK:
 	case IRQ_SET_MASK_OK_DONE:
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 9fcb2a695a41..008d6ac2342b 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -163,6 +163,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
 			continue;
 		}
 
+		if (!strncmp(str, "managed_irq,", 12)) {
+			str += 12;
+			flags |= HK_FLAG_MANAGED_IRQ;
+			continue;
+		}
+
 		pr_warn("isolcpus: Error, unknown flag\n");
 		return 0;
 	}
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ