[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAVpQUAXPadkvRa7Rdo-_bQOpH5XRr+GST4cdv6G-be=SQ5sAQ@mail.gmail.com>
Date: Tue, 4 Nov 2025 14:44:15 -0800
From: Kuniyuki Iwashima <kuniyu@...gle.com>
To: Florian Fainelli <florian.fainelli@...adcom.com>
Cc: netdev@...r.kernel.org, bcm-kernel-feedback-list@...adcom.com,
Doug Berger <opendmb@...il.com>, Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
Stanislav Fomichev <sdf@...ichev.me>, Antoine Tenart <atenart@...nel.org>, Yajun Deng <yajun.deng@...ux.dev>,
open list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH net-next v2 1/2] net: ethernet: Allow disabling pause on panic
On Tue, Nov 4, 2025 at 2:13 PM Florian Fainelli
<florian.fainelli@...adcom.com> wrote:
>
> Development devices on a lab network might be subject to kernel panics
> and if they have pause frame generation enabled, once the kernel panics,
> the Ethernet controller stops being serviced. This can create a flood of
> pause frames that certain switches are unable to handle resulting a
> completle paralysis of the network because they broadcast to other
> stations on that same network segment.
>
> To accomodate for such situation introduce a
> /sys/class/net/<device>/disable_pause_on_panic knob which will disable
> Ethernet pause frame generation upon kernel panic.
>
> Note that device driver wishing to make use of that feature need to
> implement ethtool_ops::set_pauseparam_panic to specifically deal with
> that atomic context.
>
> Signed-off-by: Florian Fainelli <florian.fainelli@...adcom.com>
> ---
> Documentation/ABI/testing/sysfs-class-net | 16 +++++
> include/linux/ethtool.h | 3 +
> include/linux/netdevice.h | 1 +
> net/core/net-sysfs.c | 34 ++++++++++
> net/ethernet/Makefile | 3 +-
> net/ethernet/pause_panic.c | 81 +++++++++++++++++++++++
> 6 files changed, 137 insertions(+), 1 deletion(-)
> create mode 100644 net/ethernet/pause_panic.c
>
> diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
> index ebf21beba846..f762ce439203 100644
> --- a/Documentation/ABI/testing/sysfs-class-net
> +++ b/Documentation/ABI/testing/sysfs-class-net
> @@ -352,3 +352,19 @@ Description:
> 0 threaded mode disabled for this dev
> 1 threaded mode enabled for this dev
> == ==================================
> +
> +What: /sys/class/net/<iface>/disable_pause_on_panic
> +Date: Nov 2025
> +KernelVersion: 6.20
> +Contact: netdev@...r.kernel.org
> +Description:
> + Boolean value to control whether to disable pause frame
> + generation on panic. This is helpful in environments where
> + the link partner may incorrect respond to pause frames (e.g.:
> + improperly configured Ethernet switches)
> +
> + Possible values:
> + == ==================================
> + 0 threaded mode disabled for this dev
> + 1 threaded mode enabled for this dev
nit: These lines need to be updated.
> + == ==================================
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index c2d8b4ec62eb..e014d0f2a5ac 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -956,6 +956,8 @@ struct kernel_ethtool_ts_info {
> * @get_pauseparam: Report pause parameters
> * @set_pauseparam: Set pause parameters. Returns a negative error code
> * or zero.
> + * @set_pauseparam_panic: Set pause parameters while in a panic context. This
> + * call is not allowed to sleep. Returns a negative error code or zero.
> * @self_test: Run specified self-tests
> * @get_strings: Return a set of strings that describe the requested objects
> * @set_phys_id: Identify the physical devices, e.g. by flashing an LED
> @@ -1170,6 +1172,7 @@ struct ethtool_ops {
> struct ethtool_pauseparam*);
> int (*set_pauseparam)(struct net_device *,
> struct ethtool_pauseparam*);
> + void (*set_pauseparam_panic)(struct net_device *);
> void (*self_test)(struct net_device *, struct ethtool_test *, u64 *);
> void (*get_strings)(struct net_device *, u32 stringset, u8 *);
> int (*set_phys_id)(struct net_device *, enum ethtool_phys_id_state);
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index e808071dbb7d..2d4b07693745 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2441,6 +2441,7 @@ struct net_device {
> bool proto_down;
> bool irq_affinity_auto;
> bool rx_cpu_rmap_auto;
> + bool disable_pause_on_panic;
>
> /* priv_flags_slow, ungrouped to save space */
> unsigned long see_all_hwtstamp_requests:1;
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index ca878525ad7c..c01dc3e200d8 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -770,6 +770,39 @@ static ssize_t threaded_store(struct device *dev,
> }
> static DEVICE_ATTR_RW(threaded);
>
> +static ssize_t disable_pause_on_panic_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct net_device *ndev = to_net_dev(dev);
> + ssize_t ret = -EINVAL;
> +
> + rcu_read_lock();
> + if (dev_isalive(ndev))
> + ret = sysfs_emit(buf, fmt_dec, READ_ONCE(ndev->disable_pause_on_panic));
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +
> +static int modify_disable_pause_on_panic(struct net_device *dev, unsigned long val)
> +{
> + if (val != 0 && val != 1)
> + return -EINVAL;
Should we validate !ops->set_pauseparam_panic here
rather than disable_pause_on_device() ?
ops = dev->ethtool_ops;
if (!ops || !ops->set_pauseparam_panic)
return -EOPNOTSUPP;
> +
> + WRITE_ONCE(dev->disable_pause_on_panic, val);
> +
> + return 0;
> +}
> +
> +static ssize_t disable_pause_on_panic_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t len)
> +{
> + return netdev_store(dev, attr, buf, len, modify_disable_pause_on_panic);
> +}
> +static DEVICE_ATTR_RW(disable_pause_on_panic);
> +
> static struct attribute *net_class_attrs[] __ro_after_init = {
> &dev_attr_netdev_group.attr,
> &dev_attr_type.attr,
> @@ -800,6 +833,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
> &dev_attr_carrier_up_count.attr,
> &dev_attr_carrier_down_count.attr,
> &dev_attr_threaded.attr,
> + &dev_attr_disable_pause_on_panic.attr,
> NULL,
> };
> ATTRIBUTE_GROUPS(net_class);
> diff --git a/net/ethernet/Makefile b/net/ethernet/Makefile
> index e03eff94e0db..9b1f3ff8695a 100644
> --- a/net/ethernet/Makefile
> +++ b/net/ethernet/Makefile
> @@ -3,4 +3,5 @@
> # Makefile for the Linux Ethernet layer.
> #
>
> -obj-y += eth.o
> +obj-y += eth.o \
> + pause_panic.o
> diff --git a/net/ethernet/pause_panic.c b/net/ethernet/pause_panic.c
> new file mode 100644
> index 000000000000..8ef61eb768a0
> --- /dev/null
> +++ b/net/ethernet/pause_panic.c
> @@ -0,0 +1,81 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Ethernet pause disable on panic handler
> + *
> + * This module provides per-device control via sysfs to disable Ethernet flow
> + * control (pause frames) on individual Ethernet devices when the kernel panics.
> + * Each device can be configured via /sys/class/net/<device>/disable_pause_on_panic.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/panic_notifier.h>
> +#include <linux/netdevice.h>
> +#include <linux/ethtool.h>
> +#include <linux/notifier.h>
> +#include <linux/if_ether.h>
> +#include <net/net_namespace.h>
> +
> +/*
> + * Disable pause/flow control on a single Ethernet device.
> + */
> +static void disable_pause_on_device(struct net_device *dev)
> +{
> + const struct ethtool_ops *ops;
> +
> + /* Only proceed if this device has the flag enabled */
> + if (!READ_ONCE(dev->disable_pause_on_panic))
> + return;
> +
> + ops = dev->ethtool_ops;
> + if (!ops || !ops->set_pauseparam_panic)
> + return;
> +
> + /*
> + * In panic context, we're in atomic context and cannot sleep.
> + */
> + ops->set_pauseparam_panic(dev);
> +}
> +
> +/*
> + * Panic notifier to disable pause frames on all Ethernet devices.
> + * Called in atomic context during kernel panic.
> + */
> +static int eth_pause_panic_handler(struct notifier_block *this,
> + unsigned long event, void *ptr)
> +{
> + struct net_device *dev;
> +
> + /*
> + * Iterate over all network devices in the init namespace.
> + * In panic context, we cannot acquire locks that might sleep,
> + * so we use RCU iteration.
> + * Each device will check its own disable_pause_on_panic flag.
> + */
> + rcu_read_lock();
> + for_each_netdev_rcu(&init_net, dev) {
> + /* Reference count might not be available in panic */
> + if (!dev)
> + continue;
This seems unnecessary unless while() + next_net_device_rcu()
is used instead of for_each_netdev_rcu().
Or are we assuming that something could overwrite NULL to
dev->dev_list.next and panic ?
> +
> + disable_pause_on_device(dev);
> + }
> + rcu_read_unlock();
> +
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block eth_pause_panic_notifier = {
> + .notifier_call = eth_pause_panic_handler,
> + .priority = INT_MAX, /* Run as late as possible */
> +};
> +
> +static int __init eth_pause_panic_init(void)
> +{
> + /* Register panic notifier */
> + atomic_notifier_chain_register(&panic_notifier_list,
> + ð_pause_panic_notifier);
> +
> + return 0;
> +}
> +device_initcall(eth_pause_panic_init);
> --
> 2.34.1
>
Powered by blists - more mailing lists