lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5181a634-fe25-45e7-803e-eb8737990e01@gmail.com>
Date: Thu, 9 May 2024 23:24:42 +0100
From: Ken Milmore <ken.milmore@...il.com>
To: Heiner Kallweit <hkallweit1@...il.com>, netdev@...r.kernel.org
Cc: nic_swsd@...ltek.com
Subject: Re: r8169: transmit queue timeouts and IRQ masking

On 08/05/2024 22:14, Heiner Kallweit wrote:
> 
> Re-reading &tp->napi may be racy, and I think the code delivers
> a wrong result if NAPI_STATE_SCHEDand NAPI_STATE_DISABLE
> both are set.
> 
>>  out:
>>         rtl_ack_events(tp, status);
> 
> The following uses a modified version of napi_schedule_prep()
> to avoid re-reading the napi state.
> We would have to see whether this extension to the net core is
> acceptable, as r8169 would be the only user for now.
> For testing it's one patch, for submitting it would need to be
> splitted.
> 
> ---
>  drivers/net/ethernet/realtek/r8169_main.c |  6 ++++--
>  include/linux/netdevice.h                 |  7 ++++++-
>  net/core/dev.c                            | 12 ++++++------
>  3 files changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index eb329f0ab..94b97a16d 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -4639,6 +4639,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  {
>  	struct rtl8169_private *tp = dev_instance;
>  	u32 status = rtl_get_events(tp);
> +	int ret;
>  
>  	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
>  		return IRQ_NONE;
> @@ -4657,10 +4658,11 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  		rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
>  	}
>  
> -	if (napi_schedule_prep(&tp->napi)) {
> +	ret = __napi_schedule_prep(&tp->napi);
> +	if (ret >= 0)
>  		rtl_irq_disable(tp);
> +	if (ret > 0)
>  		__napi_schedule(&tp->napi);
> -	}
>  out:
>  	rtl_ack_events(tp, status);
>  
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 42b9e6dc6..3df560264 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -498,7 +498,12 @@ static inline bool napi_is_scheduled(struct napi_struct *n)
>  	return test_bit(NAPI_STATE_SCHED, &n->state);
>  }
>  
> -bool napi_schedule_prep(struct napi_struct *n);
> +int __napi_schedule_prep(struct napi_struct *n);
> +
> +static inline bool napi_schedule_prep(struct napi_struct *n)
> +{
> +	return __napi_schedule_prep(n) > 0;
> +}
>  
>  /**
>   *	napi_schedule - schedule NAPI poll
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 4bf081c5a..126eab121 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6102,21 +6102,21 @@ void __napi_schedule(struct napi_struct *n)
>  EXPORT_SYMBOL(__napi_schedule);
>  
>  /**
> - *	napi_schedule_prep - check if napi can be scheduled
> + *	__napi_schedule_prep - check if napi can be scheduled
>   *	@n: napi context
>   *
>   * Test if NAPI routine is already running, and if not mark
>   * it as running.  This is used as a condition variable to
> - * insure only one NAPI poll instance runs.  We also make
> - * sure there is no pending NAPI disable.
> + * insure only one NAPI poll instance runs. Return -1 if
> + * there is a pending NAPI disable.
>   */
> -bool napi_schedule_prep(struct napi_struct *n)
> +int __napi_schedule_prep(struct napi_struct *n)
>  {
>  	unsigned long new, val = READ_ONCE(n->state);
>  
>  	do {
>  		if (unlikely(val & NAPIF_STATE_DISABLE))
> -			return false;
> +			return -1;
>  		new = val | NAPIF_STATE_SCHED;
>  
>  		/* Sets STATE_MISSED bit if STATE_SCHED was already set
> @@ -6131,7 +6131,7 @@ bool napi_schedule_prep(struct napi_struct *n)
>  
>  	return !(val & NAPIF_STATE_SCHED);
>  }
> -EXPORT_SYMBOL(napi_schedule_prep);
> +EXPORT_SYMBOL(__napi_schedule_prep);
>  
>  /**
>   * __napi_schedule_irqoff - schedule for receive

Here is a possible alternative (albeit expensive), using a flag in the driver:

diff --git linux-source-6.1~/drivers/net/ethernet/realtek/r8169_main.c linux-source-6.1/drivers/net/ethernet/realtek/r8169_main.c
index 6e34177..d703af1 100644
--- linux-source-6.1~/drivers/net/ethernet/realtek/r8169_main.c
+++ linux-source-6.1/drivers/net/ethernet/realtek/r8169_main.c
@@ -579,6 +579,7 @@ enum rtl_flag {
        RTL_FLAG_TASK_RESET_PENDING,
        RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE,
        RTL_FLAG_TASK_TX_TIMEOUT,
+       RTL_FLAG_IRQ_DISABLED,
        RTL_FLAG_MAX
 };
 
@@ -4609,6 +4610,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 
        if (napi_schedule_prep(&tp->napi)) {
                rtl_irq_disable(tp);
+               set_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags);
                __napi_schedule(&tp->napi);
        }
 out:
@@ -4655,12 +4657,17 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
        struct net_device *dev = tp->dev;
        int work_done;
 
+       if (!test_and_set_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags))
+               rtl_irq_disable(tp);
+
        rtl_tx(dev, tp, budget);
 
        work_done = rtl_rx(dev, tp, budget);
 
-       if (work_done < budget && napi_complete_done(napi, work_done))
+       if (work_done < budget && napi_complete_done(napi, work_done)) {
+               clear_bit(RTL_FLAG_IRQ_DISABLED, tp->wk.flags);
                rtl_irq_enable(tp);
+       }
 
        return work_done;
 }





Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ