lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 3 Aug 2021 21:57:27 +0200
From:   Heiner Kallweit <hkallweit1@...il.com>
To:     Kai-Heng Feng <kai.heng.feng@...onical.com>, nic_swsd@...ltek.com
Cc:     "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        "open list:8169 10/100/1000 GIGABIT ETHERNET DRIVER" 
        <netdev@...r.kernel.org>, open list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 1/2] r8169: Implement dynamic ASPM mechanism

On 03.08.2021 17:28, Kai-Heng Feng wrote:
> r8169 NICs on some platforms have abysmal speed when ASPM is enabled.
> Same issue can be observed with older vendor drivers.
> 
> The issue is however solved by the latest vendor driver. There's a new

Is there any errata document from Realtek recommending this workaround?
Any prove that it solves the issues in all cases of ASPM issues we've
seen so far?
Also your heuristics logic seems to be different from the one in r8168.
The vendor driver considers also rx packets.

In addition you use this logic also for chip versions not covered by
r8168, like RTL8125. Any info from Realtek regarding these chip versions?

> mechanism, which disables r8169's internal ASPM when the NIC has
> substantial network traffic, and vice versa.
> 
10 packets per second I wouldn't call substantial traffic.
I'm afraid we may open a can of worms and may be bothered
with bug reports and complaints again.

> So implement the same mechanism here to resolve the issue.
> 
For me this risk is too high to re-enable ASPM for a lot of chip
versions w/o any official errata and workaround information.
I propose you make this change downstream, and if there are no
user complaints after some months I may consider to have something
like that in the mainline driver.

> Signed-off-by: Kai-Heng Feng <kai.heng.feng@...onical.com>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 36 +++++++++++++++++++++++
>  1 file changed, 36 insertions(+)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index c7af5bc3b8af..e257d3cd885e 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -624,6 +624,10 @@ struct rtl8169_private {
>  
>  	unsigned supports_gmii:1;
>  	unsigned aspm_manageable:1;
> +	unsigned aspm_enabled:1;
> +	struct timer_list aspm_timer;
> +	u32 aspm_packet_count;
> +
>  	dma_addr_t counters_phys_addr;
>  	struct rtl8169_counters *counters;
>  	struct rtl8169_tc_offsets tc_offset;
> @@ -2671,6 +2675,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
>  		RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
>  	}
>  
> +	tp->aspm_enabled = enable;
> +
>  	udelay(10);
>  }
>  
> @@ -4408,6 +4414,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp,
>  
>  	dirty_tx = tp->dirty_tx;
>  
> +	tp->aspm_packet_count += tp->cur_tx - dirty_tx;
>  	while (READ_ONCE(tp->cur_tx) != dirty_tx) {
>  		unsigned int entry = dirty_tx % NUM_TX_DESC;
>  		u32 status;
> @@ -4552,6 +4559,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		rtl8169_mark_to_asic(desc);
>  	}
>  
> +	tp->aspm_packet_count += count;
> +
>  	return count;
>  }
>  
> @@ -4659,8 +4668,31 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
>  	return 0;
>  }
>  
> +#define ASPM_PACKET_THRESHOLD 10
> +#define ASPM_TIMER_INTERVAL 1000
> +
> +static void rtl8169_aspm_timer(struct timer_list *timer)
> +{
> +	struct rtl8169_private *tp = from_timer(tp, timer, aspm_timer);
> +	bool enable;
> +
> +	enable = tp->aspm_packet_count <= ASPM_PACKET_THRESHOLD;
> +
> +	if (tp->aspm_enabled != enable) {
> +		rtl_unlock_config_regs(tp);
> +		rtl_hw_aspm_clkreq_enable(tp, enable);
> +		rtl_lock_config_regs(tp);

All this in interrupt context w/o locking?

> +	}
> +
> +	tp->aspm_packet_count = 0;
> +
> +	mod_timer(timer, jiffies + msecs_to_jiffies(ASPM_TIMER_INTERVAL));
> +}
> +
>  static void rtl8169_down(struct rtl8169_private *tp)
>  {
> +	del_timer_sync(&tp->aspm_timer);
> +
>  	/* Clear all task flags */
>  	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
>  
> @@ -4687,6 +4719,10 @@ static void rtl8169_up(struct rtl8169_private *tp)
>  	rtl_reset_work(tp);
>  
>  	phy_start(tp->phydev);
> +
> +	timer_setup(&tp->aspm_timer, rtl8169_aspm_timer, 0);
> +	mod_timer(&tp->aspm_timer,
> +		  jiffies + msecs_to_jiffies(ASPM_TIMER_INTERVAL));
>  }
>  
>  static int rtl8169_close(struct net_device *dev)
> 

Powered by blists - more mailing lists