lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <adfb0005-3283-4138-97d5-b4af3a314d98@gmail.com>
Date: Sat, 11 May 2024 00:06:04 +0200
From: Heiner Kallweit <hkallweit1@...il.com>
To: Ken Milmore <ken.milmore@...il.com>, netdev@...r.kernel.org
Cc: nic_swsd@...ltek.com
Subject: Re: r8169: transmit queue timeouts and IRQ masking

On 10.05.2024 00:24, Ken Milmore wrote:
> On 08/05/2024 22:14, Heiner Kallweit wrote:
>>
>> Re-reading &tp->napi may be racy, and I think the code delivers
>> a wrong result if NAPI_STATE_SCHEDand NAPI_STATE_DISABLE
>> both are set.
>>
>>>  out:
>>>         rtl_ack_events(tp, status);
>>
>> The following uses a modified version of napi_schedule_prep()
>> to avoid re-reading the napi state.
>> We would have to see whether this extension to the net core is
>> acceptable, as r8169 would be the only user for now.
>> For testing it's one patch, for submitting it would need to be
>> splitted.
>>
>> ---
>>  drivers/net/ethernet/realtek/r8169_main.c |  6 ++++--
>>  include/linux/netdevice.h                 |  7 ++++++-
>>  net/core/dev.c                            | 12 ++++++------
>>  3 files changed, 16 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
>> index eb329f0ab..94b97a16d 100644
>> --- a/drivers/net/ethernet/realtek/r8169_main.c
>> +++ b/drivers/net/ethernet/realtek/r8169_main.c
>> @@ -4639,6 +4639,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>>  {
>>  	struct rtl8169_private *tp = dev_instance;
>>  	u32 status = rtl_get_events(tp);
>> +	int ret;
>>  
>>  	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
>>  		return IRQ_NONE;
>> @@ -4657,10 +4658,11 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>>  		rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
>>  	}
>>  
>> -	if (napi_schedule_prep(&tp->napi)) {
>> +	ret = __napi_schedule_prep(&tp->napi);
>> +	if (ret >= 0)
>>  		rtl_irq_disable(tp);
>> +	if (ret > 0)
>>  		__napi_schedule(&tp->napi);
>> -	}
>>  out:
>>  	rtl_ack_events(tp, status);
>>  
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 42b9e6dc6..3df560264 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -498,7 +498,12 @@ static inline bool napi_is_scheduled(struct napi_struct *n)
>>  	return test_bit(NAPI_STATE_SCHED, &n->state);
>>  }
>>  
>> -bool napi_schedule_prep(struct napi_struct *n);
>> +int __napi_schedule_prep(struct napi_struct *n);
>> +
>> +static inline bool napi_schedule_prep(struct napi_struct *n)
>> +{
>> +	return __napi_schedule_prep(n) > 0;
>> +}
>>  
>>  /**
>>   *	napi_schedule - schedule NAPI poll
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 4bf081c5a..126eab121 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -6102,21 +6102,21 @@ void __napi_schedule(struct napi_struct *n)
>>  EXPORT_SYMBOL(__napi_schedule);
>>  
>>  /**
>> - *	napi_schedule_prep - check if napi can be scheduled
>> + *	__napi_schedule_prep - check if napi can be scheduled
>>   *	@n: napi context
>>   *
>>   * Test if NAPI routine is already running, and if not mark
>>   * it as running.  This is used as a condition variable to
>> - * insure only one NAPI poll instance runs.  We also make
>> - * sure there is no pending NAPI disable.
>> + * insure only one NAPI poll instance runs. Return -1 if
>> + * there is a pending NAPI disable.
>>   */
>> -bool napi_schedule_prep(struct napi_struct *n)
>> +int __napi_schedule_prep(struct napi_struct *n)
>>  {
>>  	unsigned long new, val = READ_ONCE(n->state);
>>  
>>  	do {
>>  		if (unlikely(val & NAPIF_STATE_DISABLE))
>> -			return false;
>> +			return -1;
>>  		new = val | NAPIF_STATE_SCHED;
>>  
>>  		/* Sets STATE_MISSED bit if STATE_SCHED was already set
>> @@ -6131,7 +6131,7 @@ bool napi_schedule_prep(struct napi_struct *n)
>>  
>>  	return !(val & NAPIF_STATE_SCHED);
>>  }
>> -EXPORT_SYMBOL(napi_schedule_prep);
>> +EXPORT_SYMBOL(__napi_schedule_prep);
>>  
>>  /**
>>   * __napi_schedule_irqoff - schedule for receive
> 
> Here is a possible alternative (albeit expensive), using a flag in the driver:
> 
> diff --git linux-source-6.1~/drivers/net/ethernet/realtek/r8169_main.c linux-source-6.1/drivers/net/ethernet/realtek/r8169_main.c
> index 6e34177..d703af1 100644
> --- linux-source-6.1~/drivers/net/ethernet/realtek/r8169_main.c
> +++ linux-source-6.1/drivers/net/ethernet/realtek/r8169_main.c
> @@ -579,6 +579,7 @@ enum rtl_flag {
>         RTL_FLAG_TASK_RESET_PENDING,
>         RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
>         RTL_FLAG_TASK_TX_TIMEOUT,
> +       RTL_FLAG_IRQ_DISABLED,
>         RTL_FLAG_MAX
>  };
>  
> @@ -4609,6 +4610,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  
>         if (napi_schedule_prep(&tp->napi)) {
>                 rtl_irq_disable(tp);
> +               set_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags);
>                 __napi_schedule(&tp->napi);
>         }
>  out:
> @@ -4655,12 +4657,17 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
>         struct net_device *dev = tp->dev;
>         int work_done;
>  
> +       if (!test_and_set_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags))
> +               rtl_irq_disable(tp);
> +
>         rtl_tx(dev, tp, budget);
>  
>         work_done = rtl_rx(dev, tp, budget);
>  
> -       if (work_done < budget && napi_complete_done(napi, work_done))
> +       if (work_done < budget && napi_complete_done(napi, work_done)) {
> +               clear_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags);
>                 rtl_irq_enable(tp);
> +       }
>  
>         return work_done;
>  }
> 
> 
> 
> 

Nice idea. The following is a simplified version.
It's based on the thought that between scheduling NAPI and start of NAPI
polling interrupts don't hurt.

Signed-off-by: Heiner Kallweit <hkallweit1@...il.com>
---
 drivers/net/ethernet/realtek/r8169_main.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index e5ea827a2..7b04dfecc 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -592,6 +592,7 @@ enum rtl_flag {
 	RTL_FLAG_TASK_RESET_PENDING,
 	RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
 	RTL_FLAG_TASK_TX_TIMEOUT,
+	RTL_FLAG_IRQ_DISABLED,
 	RTL_FLAG_MAX
 };
 
@@ -4657,10 +4658,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 	}
 
-	if (napi_schedule_prep(&tp->napi)) {
-		rtl_irq_disable(tp);
-		__napi_schedule(&tp->napi);
-	}
+	napi_schedule(&tp->napi);
 out:
 	rtl_ack_events(tp, status);
 
@@ -4714,12 +4712,17 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	struct net_device *dev = tp->dev;
 	int work_done;
 
+	if (!test_and_set_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags))
+		rtl_irq_disable(tp);
+
 	rtl_tx(dev, tp, budget);
 
 	work_done = rtl_rx(dev, tp, budget);
 
-	if (work_done < budget && napi_complete_done(napi, work_done))
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		clear_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags);
 		rtl_irq_enable(tp);
+	}
 
 	return work_done;
 }
-- 
2.45.0



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ