[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHS8izMN+_9iWiZnc_FdkMZVDsRRG9FM8JYsz84V8gqDJU_GAA@mail.gmail.com>
Date: Tue, 19 Aug 2025 18:32:52 -0700
From: Mina Almasry <almasrymina@...gle.com>
To: Pavel Begunkov <asml.silence@...il.com>
Cc: Jakub Kicinski <kuba@...nel.org>, netdev@...r.kernel.org,
Eric Dumazet <edumazet@...gle.com>, Willem de Bruijn <willemb@...gle.com>,
Paolo Abeni <pabeni@...hat.com>, andrew+netdev@...n.ch, horms@...nel.org,
davem@...emloft.net, sdf@...ichev.me, dw@...idwei.uk,
michael.chan@...adcom.com, dtatulea@...dia.com, ap420073@...il.com,
linux-kernel@...r.kernel.org, io-uring@...r.kernel.org
Subject: Re: [PATCH net-next v3 12/23] net: allocate per-queue config structs
and pass them thru the queue API
On Tue, Aug 19, 2025 at 2:29 PM Mina Almasry <almasrymina@...gle.com> wrote:
>
> On Mon, Aug 18, 2025 at 6:56 AM Pavel Begunkov <asml.silence@...il.com> wrote:
> >
> > From: Jakub Kicinski <kuba@...nel.org>
> >
> > Create an array of config structs to store per-queue config.
> > Pass these structs in the queue API. Drivers can also retrieve
> > the config for a single queue calling netdev_queue_config()
> > directly.
> >
> > Signed-off-by: Jakub Kicinski <kuba@...nel.org>
> > [pavel: patch up mlx callbacks with unused qcfg]
> > Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
> > ---
> > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++-
> > drivers/net/ethernet/google/gve/gve_main.c | 9 ++-
> > .../net/ethernet/mellanox/mlx5/core/en_main.c | 9 +--
> > drivers/net/netdevsim/netdev.c | 6 +-
> > include/net/netdev_queues.h | 19 ++++++
> > net/core/dev.h | 3 +
> > net/core/netdev_config.c | 58 +++++++++++++++++++
> > net/core/netdev_rx_queue.c | 11 +++-
> > 8 files changed, 109 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > index d3d9b72ef313..48ff6f024e07 100644
> > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> > @@ -15824,7 +15824,9 @@ static const struct netdev_stat_ops bnxt_stat_ops = {
> > .get_base_stats = bnxt_get_base_stats,
> > };
> >
> > -static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
> > +static int bnxt_queue_mem_alloc(struct net_device *dev,
> > + struct netdev_queue_config *qcfg,
> > + void *qmem, int idx)
> > {
> > struct bnxt_rx_ring_info *rxr, *clone;
> > struct bnxt *bp = netdev_priv(dev);
> > @@ -15992,7 +15994,9 @@ static void bnxt_copy_rx_ring(struct bnxt *bp,
> > dst->rx_agg_bmap = src->rx_agg_bmap;
> > }
> >
> > -static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
> > +static int bnxt_queue_start(struct net_device *dev,
> > + struct netdev_queue_config *qcfg,
> > + void *qmem, int idx)
> > {
> > struct bnxt *bp = netdev_priv(dev);
> > struct bnxt_rx_ring_info *rxr, *clone;
> > diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> > index 1f411d7c4373..f40edab616d8 100644
> > --- a/drivers/net/ethernet/google/gve/gve_main.c
> > +++ b/drivers/net/ethernet/google/gve/gve_main.c
> > @@ -2580,8 +2580,9 @@ static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
> > gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
> > }
> >
> > -static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
> > - int idx)
> > +static int gve_rx_queue_mem_alloc(struct net_device *dev,
> > + struct netdev_queue_config *qcfg,
> > + void *per_q_mem, int idx)
> > {
> > struct gve_priv *priv = netdev_priv(dev);
> > struct gve_rx_alloc_rings_cfg cfg = {0};
> > @@ -2602,7 +2603,9 @@ static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
> > return err;
> > }
> >
> > -static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
> > +static int gve_rx_queue_start(struct net_device *dev,
> > + struct netdev_queue_config *qcfg,
> > + void *per_q_mem, int idx)
> > {
> > struct gve_priv *priv = netdev_priv(dev);
> > struct gve_rx_ring *gve_per_q_mem;
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> > index 21bb88c5d3dc..83264c17a4f7 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> > @@ -5541,8 +5541,9 @@ struct mlx5_qmgmt_data {
> > struct mlx5e_channel_param cparam;
> > };
> >
> > -static int mlx5e_queue_mem_alloc(struct net_device *dev, void *newq,
> > - int queue_index)
> > +static int mlx5e_queue_mem_alloc(struct net_device *dev,
> > + struct netdev_queue_config *qcfg,
> > + void *newq, int queue_index)
> > {
> > struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq;
> > struct mlx5e_priv *priv = netdev_priv(dev);
> > @@ -5603,8 +5604,8 @@ static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index)
> > return 0;
> > }
> >
> > -static int mlx5e_queue_start(struct net_device *dev, void *newq,
> > - int queue_index)
> > +static int mlx5e_queue_start(struct net_device *dev, struct netdev_queue_config *qcfg,
> > + void *newq, int queue_index)
> > {
> > struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq;
> > struct mlx5e_priv *priv = netdev_priv(dev);
> > diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
> > index 0178219f0db5..985c3403ec57 100644
> > --- a/drivers/net/netdevsim/netdev.c
> > +++ b/drivers/net/netdevsim/netdev.c
> > @@ -733,7 +733,8 @@ struct nsim_queue_mem {
> > };
> >
> > static int
> > -nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
> > +nsim_queue_mem_alloc(struct net_device *dev, struct netdev_queue_config *qcfg,
> > + void *per_queue_mem, int idx)
> > {
> > struct nsim_queue_mem *qmem = per_queue_mem;
> > struct netdevsim *ns = netdev_priv(dev);
> > @@ -782,7 +783,8 @@ static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
> > }
> >
> > static int
> > -nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
> > +nsim_queue_start(struct net_device *dev, struct netdev_queue_config *qcfg,
> > + void *per_queue_mem, int idx)
> > {
> > struct nsim_queue_mem *qmem = per_queue_mem;
> > struct netdevsim *ns = netdev_priv(dev);
> > diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
> > index d73f9023c96f..b850cff71d12 100644
> > --- a/include/net/netdev_queues.h
> > +++ b/include/net/netdev_queues.h
> > @@ -32,6 +32,13 @@ struct netdev_config {
> > /** @hds_config: HDS enabled (ETHTOOL_A_RINGS_TCP_DATA_SPLIT).
> > */
> > u8 hds_config;
> > +
> > + /** @qcfg: per-queue configuration */
> > + struct netdev_queue_config *qcfg;
> > +};
> > +
> > +/* Same semantics as fields in struct netdev_config */
> > +struct netdev_queue_config {
> > };
>
> I was very confused why this is empty until I looked at patch 18 :-D
>
> >
> > /* See the netdev.yaml spec for definition of each statistic */
> > @@ -136,6 +143,10 @@ void netdev_stat_queue_sum(struct net_device *netdev,
> > *
> > * @ndo_queue_mem_size: Size of the struct that describes a queue's memory.
> > *
> > + * @ndo_queue_cfg_defaults: (Optional) Populate queue config struct with
> > + * defaults. Queue config structs are passed to this
> > + * helper before the user-requested settings are applied.
> > + *
> > * @ndo_queue_mem_alloc: Allocate memory for an RX queue at the specified index.
> > * The new memory is written at the specified address.
> > *
> > @@ -153,12 +164,17 @@ void netdev_stat_queue_sum(struct net_device *netdev,
> > */
> > struct netdev_queue_mgmt_ops {
> > size_t ndo_queue_mem_size;
> > + void (*ndo_queue_cfg_defaults)(struct net_device *dev,
> > + int idx,
> > + struct netdev_queue_config *qcfg);
> > int (*ndo_queue_mem_alloc)(struct net_device *dev,
> > + struct netdev_queue_config *qcfg,
> > void *per_queue_mem,
> > int idx);
> > void (*ndo_queue_mem_free)(struct net_device *dev,
> > void *per_queue_mem);
> > int (*ndo_queue_start)(struct net_device *dev,
> > + struct netdev_queue_config *qcfg,
> > void *per_queue_mem,
> > int idx);
> > int (*ndo_queue_stop)(struct net_device *dev,
> > @@ -166,6 +182,9 @@ struct netdev_queue_mgmt_ops {
> > int idx);
> > };
> >
> > +void netdev_queue_config(struct net_device *dev, int rxq,
> > + struct netdev_queue_config *qcfg);
> > +
> > /**
> > * DOC: Lockless queue stopping / waking helpers.
> > *
> > diff --git a/net/core/dev.h b/net/core/dev.h
> > index 7041c8bd2a0f..a553a0f1f846 100644
> > --- a/net/core/dev.h
> > +++ b/net/core/dev.h
> > @@ -9,6 +9,7 @@
> > #include <net/netdev_lock.h>
> >
> > struct net;
> > +struct netdev_queue_config;
> > struct netlink_ext_ack;
> > struct cpumask;
> >
> > @@ -96,6 +97,8 @@ int netdev_alloc_config(struct net_device *dev);
> > void __netdev_free_config(struct netdev_config *cfg);
> > void netdev_free_config(struct net_device *dev);
> > int netdev_reconfig_start(struct net_device *dev);
> > +void __netdev_queue_config(struct net_device *dev, int rxq,
> > + struct netdev_queue_config *qcfg, bool pending);
> >
> > /* netdev management, shared between various uAPI entry points */
> > struct netdev_name_node {
> > diff --git a/net/core/netdev_config.c b/net/core/netdev_config.c
> > index 270b7f10a192..bad2d53522f0 100644
> > --- a/net/core/netdev_config.c
> > +++ b/net/core/netdev_config.c
> > @@ -8,18 +8,29 @@
> > int netdev_alloc_config(struct net_device *dev)
> > {
> > struct netdev_config *cfg;
> > + unsigned int maxqs;
> >
> > cfg = kzalloc(sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT);
> > if (!cfg)
> > return -ENOMEM;
> >
> > + maxqs = max(dev->num_rx_queues, dev->num_tx_queues);
>
> I honestly did not think about tx queues at all for the queue api thus
> far. The ndos do specify that api applies to rx queues, and maybe the
> driver only implemented them to assume indeed the calls are to rx
> queues. Are you intentionally extending the queue api support for tx
> queues? Or maybe you're allocing configs for the tx queues to be used
> in some future?
>
> Other places in this patch series uses num_rx_queues directly. Feels
> like this should do the same.
>
> > + cfg->qcfg = kcalloc(maxqs, sizeof(*cfg->qcfg), GFP_KERNEL_ACCOUNT);
> > + if (!cfg->qcfg)
> > + goto err_free_cfg;
> > +
> > dev->cfg = cfg;
> > dev->cfg_pending = cfg;
> > return 0;
> > +
> > +err_free_cfg:
> > + kfree(cfg);
> > + return -ENOMEM;
> > }
> >
> > void __netdev_free_config(struct netdev_config *cfg)
> > {
> > + kfree(cfg->qcfg);
> > kfree(cfg);
> > }
> >
> > @@ -32,12 +43,59 @@ void netdev_free_config(struct net_device *dev)
> > int netdev_reconfig_start(struct net_device *dev)
> > {
> > struct netdev_config *cfg;
> > + unsigned int maxqs;
> >
> > WARN_ON(dev->cfg != dev->cfg_pending);
> > cfg = kmemdup(dev->cfg, sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT);
> > if (!cfg)
> > return -ENOMEM;
> >
> > + maxqs = max(dev->num_rx_queues, dev->num_tx_queues);
> > + cfg->qcfg = kmemdup_array(dev->cfg->qcfg, maxqs, sizeof(*cfg->qcfg),
> > + GFP_KERNEL_ACCOUNT);
> > + if (!cfg->qcfg)
> > + goto err_free_cfg;
> > +
> > dev->cfg_pending = cfg;
> > return 0;
> > +
> > +err_free_cfg:
> > + kfree(cfg);
> > + return -ENOMEM;
> > +}
> > +
> > +void __netdev_queue_config(struct net_device *dev, int rxq,
> > + struct netdev_queue_config *qcfg, bool pending)
> > +{
> > + memset(qcfg, 0, sizeof(*qcfg));
> > +
>
> This memset 0 is wrong for queue configs like hds_thresh where 0 is a
> value, not 'restore default'.
>
> Either netdev_queue_config needs to have a comment that says 'only
> values where 0 is restore default is allowed in this struct', or this
> function needs to handle 0-as-value configs correctly.
>
> But I wonder if the memset(0) is wrong in general. Isn't this helper
> trying to grab the _current_ configuration? So qcfg should be seeded
> with appropriate value from dev->qcfgs[rxq]? This function reads like
> it's trying to get the default configuration, but in a way that
> doesn't handle hds_thresh style semantics correctly?
>
Nevermind this comment, a close review of patch 18 answered this
question actually. You are indeed grabbing the configuration from
dev->qcfgs[rxq], you're just not doing it here because
netdev_queue_config is empty.
--
Thanks,
Mina
Powered by blists - more mailing lists