lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20241204114229.21452-5-wander@redhat.com>
Date: Wed,  4 Dec 2024 08:42:27 -0300
From: Wander Lairson Costa <wander@...hat.com>
To: Tony Nguyen <anthony.l.nguyen@...el.com>,
	Przemek Kitszel <przemyslaw.kitszel@...el.com>,
	Andrew Lunn <andrew+netdev@...n.ch>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
	Clark Williams <clrkwllms@...nel.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Auke Kok <auke-jan.h.kok@...el.com>,
	Jeff Garzik <jgarzik@...hat.com>,
	intel-wired-lan@...ts.osuosl.org (moderated list:INTEL ETHERNET DRIVERS),
	netdev@...r.kernel.org (open list:NETWORKING DRIVERS),
	linux-kernel@...r.kernel.org (open list),
	linux-rt-devel@...ts.linux.dev (open list:Real-time Linux (PREEMPT_RT):Keyword:PREEMPT_RT)
Cc: Wander Lairson Costa <wander@...hat.com>,
	Yuying Ma <yuma@...hat.com>
Subject: [PATCH iwl-net 4/4] igb: fix igb_msix_other() handling for PREEMPT_RT

During testing of SR-IOV, Red Hat QE encountered an issue where the
ip link up command intermittently fails for the igbvf interfaces when
using the PREEMPT_RT variant. Investigation revealed that
e1000_write_posted_mbx returns an error due to the lack of an ACK
from e1000_poll_for_ack.

The underlying issue arises from the fact that IRQs are threaded by
default under PREEMPT_RT. While the exact hardware details are not
available, it appears that the IRQ handled by igb_msix_other must
be processed before e1000_poll_for_ack times out. However,
e1000_write_posted_mbx is called with preemption disabled, leading
to a scenario where the IRQ is serviced only after the failure of
e1000_write_posted_mbx.

Commit 338c4d3902fe ("igb: Disable threaded IRQ for igb_msix_other")
forced the ISR to run in a non-threaded context. However, Sebastian
observed that some functions called within the ISR acquire locks that
may sleep.

In the previous two patches, we managed to make igb_msg_mask() safe to
call from an interrupt context.

In this commit, we move most of the ISR handling to an interrupt
context, leaving non IRQ safe code to be called from the thread
context under PREEMPT_RT.

Reproducer:

ipaddr_vlan=3
nic_test=ens14f0
vf=${nic_test}v0 # The main testing steps:
while true; do
    ip link set ${nic_test} mtu 1500
    ip link set ${vf} mtu 1500
    ip link set $vf up
    # 3. set vlan and ip for VF
    ip link set ${nic_test} vf 0 vlan ${ipaddr_vlan}
    ip addr add 172.30.${ipaddr_vlan}.1/24 dev ${vf}
    ip addr add 2021:db8:${ipaddr_vlan}::1/64 dev ${vf}
    # 4. check the link state for VF and PF
    ip link show ${nic_test}
    if ! ip link show $vf | grep 'state UP'; then
        echo 'Error found'
        break
    fi
    ip link set $vf down
done

You can also reproduce it more reliably by setting nr_cpus=1 in the
kernel command line.

Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver")
Signed-off-by: Wander Lairson Costa <wander@...hat.com>
Reported-by: Yuying Ma <yuma@...hat.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 35 ++++++++++++++++-------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 5828831fd29c2..b2894cebe2c9e 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -131,6 +131,7 @@ static void igb_set_uta(struct igb_adapter *adapter, bool set);
 static irqreturn_t igb_intr(int irq, void *);
 static irqreturn_t igb_intr_msi(int irq, void *);
 static irqreturn_t igb_msix_other(int irq, void *);
+static irqreturn_t igb_msix_other_threaded(int irq, void *);
 static irqreturn_t igb_msix_ring(int irq, void *);
 #ifdef CONFIG_IGB_DCA
 static void igb_update_dca(struct igb_q_vector *);
@@ -151,7 +152,6 @@ static void igb_rar_set_index(struct igb_adapter *, u32);
 static void igb_ping_all_vfs(struct igb_adapter *);
 static void igb_msg_task_irq_safe(struct igb_adapter *adapter);
 static void igb_msg_task_preemptible_safe(struct igb_adapter *adapter);
-static void igb_msg_task(struct igb_adapter *);
 static void igb_vmm_control(struct igb_adapter *);
 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
 static void igb_flush_mac_table(struct igb_adapter *);
@@ -908,8 +908,9 @@ static int igb_request_msix(struct igb_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 	int i, err = 0, vector = 0, free_vector = 0;
 
-	err = request_irq(adapter->msix_entries[vector].vector,
-			  igb_msix_other, 0, netdev->name, adapter);
+	err = request_threaded_irq(adapter->msix_entries[vector].vector,
+				   igb_msix_other, igb_msix_other_threaded,
+				   IRQF_NO_THREAD, netdev->name, adapter);
 	if (err)
 		goto err_out;
 
@@ -7113,9 +7114,27 @@ static irqreturn_t igb_msix_other(int irq, void *data)
 		igb_check_wvbr(adapter);
 	}
 
-	/* Check for a mailbox event */
+	/* Check for a mailbox event (interrupt safe part) */
 	if (icr & E1000_ICR_VMMB)
-		igb_msg_task(adapter);
+		igb_msg_task_irq_safe(adapter);
+
+	adapter->test_icr = icr;
+
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		return igb_msix_other_threaded(irq, data);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t igb_msix_other_threaded(int irq, void *data)
+{
+	struct igb_adapter *adapter = data;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 icr = adapter->test_icr;
+
+	/* Check for a mailbox event (preempible safe part) */
+	if (icr & E1000_ICR_VMMB)
+		igb_msg_task_preemptible_safe(adapter);
 
 	if (icr & E1000_ICR_LSC) {
 		hw->mac.get_link_status = 1;
@@ -8161,12 +8180,6 @@ static void igb_msg_task_preemptible_safe(struct igb_adapter *adapter)
 	vfs_spin_unlock_irqrestore(adapter, flags);
 }
 
-static __always_inline void igb_msg_task(struct igb_adapter *adapter)
-{
-	igb_msg_task_irq_safe(adapter);
-	igb_msg_task_preemptible_safe(adapter);
-}
-
 /**
  *  igb_set_uta - Set unicast filter table address
  *  @adapter: board private structure
-- 
2.47.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ