lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <f731b2e0-f952-4ec3-b05a-7522c73457c7@davidwei.uk>
Date: Tue, 23 Apr 2024 10:55:17 -0700
From: David Wei <dw@...idwei.uk>
To: Shailend Chand <shailend@...gle.com>, netdev@...r.kernel.org
Cc: almasrymina@...gle.com, davem@...emloft.net, edumazet@...gle.com,
 kuba@...nel.org, pabeni@...hat.com, willemb@...gle.com
Subject: Re: [RFC PATCH net-next 9/9] gve: Implement queue api

On 2024-04-19 3:23 pm, Shailend Chand wrote:
> On Thu, Apr 18, 2024 at 12:52 PM Shailend Chand <shailend@...gle.com> wrote:
>>
>> An api enabling the net stack to reset driver queues is implemented for
>> gve.
>>
>> Signed-off-by: Shailend Chand <shailend@...gle.com>
>> ---
>>  drivers/net/ethernet/google/gve/gve.h        |   6 +
>>  drivers/net/ethernet/google/gve/gve_dqo.h    |   6 +
>>  drivers/net/ethernet/google/gve/gve_main.c   | 143 +++++++++++++++++++
>>  drivers/net/ethernet/google/gve/gve_rx.c     |  12 +-
>>  drivers/net/ethernet/google/gve/gve_rx_dqo.c |  12 +-
>>  5 files changed, 167 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
>> index 9f6a897c87cb..d752e525bde7 100644
>> --- a/drivers/net/ethernet/google/gve/gve.h
>> +++ b/drivers/net/ethernet/google/gve/gve.h
>> @@ -1147,6 +1147,12 @@ bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
>>  void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
>>  int gve_rx_poll(struct gve_notify_block *block, int budget);
>>  bool gve_rx_work_pending(struct gve_rx_ring *rx);
>> +int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
>> +                         struct gve_rx_alloc_rings_cfg *cfg,
>> +                         struct gve_rx_ring *rx,
>> +                         int idx);
>> +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
>> +                         struct gve_rx_alloc_rings_cfg *cfg);
>>  int gve_rx_alloc_rings(struct gve_priv *priv);
>>  int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
>>                            struct gve_rx_alloc_rings_cfg *cfg);
>> diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
>> index b81584829c40..e83773fb891f 100644
>> --- a/drivers/net/ethernet/google/gve/gve_dqo.h
>> +++ b/drivers/net/ethernet/google/gve/gve_dqo.h
>> @@ -44,6 +44,12 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv,
>>                            struct gve_tx_alloc_rings_cfg *cfg);
>>  void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx);
>>  void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx);
>> +int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
>> +                         struct gve_rx_alloc_rings_cfg *cfg,
>> +                         struct gve_rx_ring *rx,
>> +                         int idx);
>> +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
>> +                         struct gve_rx_alloc_rings_cfg *cfg);
>>  int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
>>                            struct gve_rx_alloc_rings_cfg *cfg);
>>  void gve_rx_free_rings_dqo(struct gve_priv *priv,
>> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
>> index c348dff7cca6..5e652958f10f 100644
>> --- a/drivers/net/ethernet/google/gve/gve_main.c
>> +++ b/drivers/net/ethernet/google/gve/gve_main.c
>> @@ -17,6 +17,7 @@
>>  #include <linux/workqueue.h>
>>  #include <linux/utsname.h>
>>  #include <linux/version.h>
>> +#include <net/netdev_queues.h>
>>  #include <net/sch_generic.h>
>>  #include <net/xdp_sock_drv.h>
>>  #include "gve.h"
>> @@ -2070,6 +2071,15 @@ static void gve_turnup(struct gve_priv *priv)
>>         gve_set_napi_enabled(priv);
>>  }
>>
>> +static void gve_turnup_and_check_status(struct gve_priv *priv)
>> +{
>> +       u32 status;
>> +
>> +       gve_turnup(priv);
>> +       status = ioread32be(&priv->reg_bar0->device_status);
>> +       gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
>> +}
>> +
>>  static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
>>  {
>>         struct gve_notify_block *block;
>> @@ -2530,6 +2540,138 @@ static void gve_write_version(u8 __iomem *driver_version_register)
>>         writeb('\n', driver_version_register);
>>  }
>>
>> +static int gve_rx_queue_stop(struct net_device *dev, int idx,
>> +                            void **out_per_q_mem)
>> +{
>> +       struct gve_priv *priv = netdev_priv(dev);
>> +       struct gve_rx_ring *rx;
>> +       int err;
>> +
>> +       if (!priv->rx)
>> +               return -EAGAIN;
>> +       if (idx < 0 || idx >= priv->rx_cfg.max_queues)
>> +               return -ERANGE;
>> +
>> +       /* Destroying queue 0 while other queues exist is not supported in DQO */
>> +       if (!gve_is_gqi(priv) && idx == 0)
>> +               return -ERANGE;
>> +
>> +       rx = kvzalloc(sizeof(*rx), GFP_KERNEL);
>> +       if (!rx)
>> +               return -ENOMEM;
>> +       *rx = priv->rx[idx];
>> +
>> +       /* Single-queue destruction requires quiescence on all queues */
>> +       gve_turndown(priv);
>> +
>> +       /* This failure will trigger a reset - no need to clean up */
>> +       err = gve_adminq_destroy_single_rx_queue(priv, idx);
>> +       if (err) {
>> +               kvfree(rx);
>> +               return err;
>> +       }
>> +
>> +       if (gve_is_gqi(priv))
>> +               gve_rx_stop_ring_gqi(priv, idx);
>> +       else
>> +               gve_rx_stop_ring_dqo(priv, idx);
>> +
>> +       /* Turn the unstopped queues back up */
>> +       gve_turnup_and_check_status(priv);
>> +
>> +       *out_per_q_mem = rx;
>> +       return 0;
>> +}
>> +
>> +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
>> +{
>> +       struct gve_priv *priv = netdev_priv(dev);
>> +       struct gve_rx_alloc_rings_cfg cfg = {0};
>> +       struct gve_rx_ring *rx;
>> +
>> +       gve_rx_get_curr_alloc_cfg(priv, &cfg);
>> +       rx = (struct gve_rx_ring *)per_q_mem;
>> +       if (!rx)
>> +               return;
>> +
>> +       if (gve_is_gqi(priv))
>> +               gve_rx_free_ring_gqi(priv, rx, &cfg);
>> +       else
>> +               gve_rx_free_ring_dqo(priv, rx, &cfg);
>> +
>> +       kvfree(per_q_mem);
>> +}
>> +
>> +static void *gve_rx_queue_mem_alloc(struct net_device *dev, int idx)
>> +{
>> +       struct gve_priv *priv = netdev_priv(dev);
>> +       struct gve_rx_alloc_rings_cfg cfg = {0};
>> +       struct gve_rx_ring *rx;
>> +       int err;
>> +
>> +       gve_rx_get_curr_alloc_cfg(priv, &cfg);
>> +       if (idx < 0 || idx >= cfg.qcfg->max_queues)
>> +               return NULL;
>> +
>> +       rx = kvzalloc(sizeof(*rx), GFP_KERNEL);
>> +       if (!rx)
>> +               return NULL;
>> +
>> +       if (gve_is_gqi(priv))
>> +               err = gve_rx_alloc_ring_gqi(priv, &cfg, rx, idx);
>> +       else
>> +               err = gve_rx_alloc_ring_dqo(priv, &cfg, rx, idx);
>> +
>> +       if (err) {
>> +               kvfree(rx);
>> +               return NULL;
>> +       }
>> +       return rx;
>> +}
>> +
>> +static int gve_rx_queue_start(struct net_device *dev, int idx, void *per_q_mem)
>> +{
>> +       struct gve_priv *priv = netdev_priv(dev);
>> +       struct gve_rx_ring *rx;
>> +       int err;
>> +
>> +       if (!priv->rx)
>> +               return -EAGAIN;
>> +       if (idx < 0 || idx >= priv->rx_cfg.max_queues)
>> +               return -ERANGE;
>> +       rx = (struct gve_rx_ring *)per_q_mem;
>> +       priv->rx[idx] = *rx;
>> +
>> +       /* Single-queue creation requires quiescence on all queues */
>> +       gve_turndown(priv);
>> +
>> +       if (gve_is_gqi(priv))
>> +               gve_rx_start_ring_gqi(priv, idx);
>> +       else
>> +               gve_rx_start_ring_dqo(priv, idx);
>> +
>> +       /* This failure will trigger a reset - no need to clean up */
>> +       err = gve_adminq_create_single_rx_queue(priv, idx);
>> +       if (err)
>> +               return err;
>> +
>> +       if (gve_is_gqi(priv))
>> +               gve_rx_write_doorbell(priv, &priv->rx[idx]);
>> +       else
>> +               gve_rx_post_buffers_dqo(&priv->rx[idx]);
>> +
>> +       /* Turn the unstopped queues back up */
>> +       gve_turnup_and_check_status(priv);
>> +       return 0;
>> +}
> 
> I realized that due to not kvfree-ing the passed-in `per_q_mem`, there
> is a leak. The alloc and stop hooks kvzalloc
> a temp ring struct, which means the start and free hooks ought to have
> kvfreed them to keep symmetry and avoid leaking.
> The free hook is doing it but I forgot to do it in the start hook.
> 
> If we go down the route of making core aware of the ring struct size,
> then none of the four hooks
> need to worry about the temp struct: core can just alloc and free it
> for both the old and new queue.

Having the core own qmem is the direction to move towards. I have
patches that do this, so we could merge what you have so far (i.e. have
the ndos alloc/free qmem) then I'll build on top of it for FBNIC/bnxt.

What might be missing in this patchset is a user of
netdev_queue_mgmt_ops. I know Mina has a restart_rx_queue() somewhere
and perhaps that should be included.

> 
>> +
>> +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
>> +       .ndo_queue_mem_alloc    =       gve_rx_queue_mem_alloc,
>> +       .ndo_queue_mem_free     =       gve_rx_queue_mem_free,
>> +       .ndo_queue_start        =       gve_rx_queue_start,
>> +       .ndo_queue_stop         =       gve_rx_queue_stop,
>> +};
>> +
>>  static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>  {
>>         int max_tx_queues, max_rx_queues;
>> @@ -2584,6 +2726,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>         pci_set_drvdata(pdev, dev);
>>         dev->ethtool_ops = &gve_ethtool_ops;
>>         dev->netdev_ops = &gve_netdev_ops;
>> +       dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
>>
>>         /* Set default and supported features.
>>          *
>> diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
>> index 1d235caab4c5..307bf97d4778 100644
>> --- a/drivers/net/ethernet/google/gve/gve_rx.c
>> +++ b/drivers/net/ethernet/google/gve/gve_rx.c
>> @@ -101,8 +101,8 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx)
>>         gve_rx_reset_ring_gqi(priv, idx);
>>  }
>>
>> -static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
>> -                                struct gve_rx_alloc_rings_cfg *cfg)
>> +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
>> +                         struct gve_rx_alloc_rings_cfg *cfg)
>>  {
>>         struct device *dev = &priv->pdev->dev;
>>         u32 slots = rx->mask + 1;
>> @@ -270,10 +270,10 @@ void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx)
>>         gve_add_napi(priv, ntfy_idx, gve_napi_poll);
>>  }
>>
>> -static int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
>> -                                struct gve_rx_alloc_rings_cfg *cfg,
>> -                                struct gve_rx_ring *rx,
>> -                                int idx)
>> +int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
>> +                         struct gve_rx_alloc_rings_cfg *cfg,
>> +                         struct gve_rx_ring *rx,
>> +                         int idx)
>>  {
>>         struct device *hdev = &priv->pdev->dev;
>>         u32 slots = cfg->ring_size;
>> diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
>> index dc2c6bd92e82..dcbc37118870 100644
>> --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
>> +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
>> @@ -299,8 +299,8 @@ void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
>>         gve_rx_reset_ring_dqo(priv, idx);
>>  }
>>
>> -static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
>> -                                struct gve_rx_alloc_rings_cfg *cfg)
>> +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
>> +                         struct gve_rx_alloc_rings_cfg *cfg)
>>  {
>>         struct device *hdev = &priv->pdev->dev;
>>         size_t completion_queue_slots;
>> @@ -373,10 +373,10 @@ void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
>>         gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
>>  }
>>
>> -static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
>> -                                struct gve_rx_alloc_rings_cfg *cfg,
>> -                                struct gve_rx_ring *rx,
>> -                                int idx)
>> +int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
>> +                         struct gve_rx_alloc_rings_cfg *cfg,
>> +                         struct gve_rx_ring *rx,
>> +                         int idx)
>>  {
>>         struct device *hdev = &priv->pdev->dev;
>>         size_t size;
>> --
>> 2.44.0.769.g3c40516874-goog
>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ