[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CADEbmW1YeugjaPOUKyVLw+vDh6ahK+XTJjY7RF9qN2r=B8XoMg@mail.gmail.com>
Date: Fri, 8 Mar 2024 15:18:16 +0100
From: Michal Schmidt <mschmidt@...hat.com>
To: Przemek Kitszel <przemyslaw.kitszel@...el.com>
Cc: intel-wired-lan@...ts.osuosl.org, Jiri Pirko <jiri@...nulli.us>,
netdev@...r.kernel.org, Arkadiusz Kubalewski <arkadiusz.kubalewski@...el.com>,
Karol Kolacinski <karol.kolacinski@...el.com>, Jacob Keller <jacob.e.keller@...el.com>,
Jakub Kicinski <kuba@...nel.org>, "Temerkhanov, Sergey" <sergey.temerkhanov@...el.com>
Subject: Re: [Intel-wired-lan] [PATCH net-next v3 1/3] ice: add ice_adapter
for shared data across PFs on the same NIC
On Fri, Mar 8, 2024 at 1:17 PM Przemek Kitszel
<przemyslaw.kitszel@...el.com> wrote:
> On 3/7/24 23:25, Michal Schmidt wrote:
> > There is a need for synchronization between ice PFs on the same physical
> > adapter.
> >
> > Add a "struct ice_adapter" for holding data shared between PFs of the
> > same multifunction PCI device. The struct is refcounted - each ice_pf
> > holds a reference to it.
> >
> > Its first use will be for PTP. I expect it will be useful also to
> > improve the ugliness that is ice_prot_id_tbl.
> >
> > Signed-off-by: Michal Schmidt <mschmidt@...hat.com>
>
> Thank you very much for this series, we have spotted the need for
> something like that very recently, I have already pinged our PTP folks
> to take a look. (+CC Sergey)
>
> Why not wipe ice_ptp_lock() entirely?
> (could be left for Intel folks though)
I am doing this too, just not yet in this series I posted, because I
did not want to expand the scope of the series between reviews. See my
gitlab branch I linked in the cover letter, specifically this patch:
https://gitlab.com/mschmidt2/linux/-/commit/89a1bd2904ac8b0614bcfc2fce464bf5f60b0f0c
> please find the usual code related feedback inline
> (again, I really appreciate and I am grateful for this series)
>
> > ---
> > drivers/net/ethernet/intel/ice/Makefile | 3 +-
> > drivers/net/ethernet/intel/ice/ice.h | 2 +
> > drivers/net/ethernet/intel/ice/ice_adapter.c | 107 +++++++++++++++++++
> > drivers/net/ethernet/intel/ice/ice_adapter.h | 22 ++++
> > drivers/net/ethernet/intel/ice/ice_main.c | 8 ++
> > 5 files changed, 141 insertions(+), 1 deletion(-)
> > create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
> > create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
> >
> > diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
> > index cddd82d4ca0f..4fa09c321440 100644
> > --- a/drivers/net/ethernet/intel/ice/Makefile
> > +++ b/drivers/net/ethernet/intel/ice/Makefile
> > @@ -36,7 +36,8 @@ ice-y := ice_main.o \
> > ice_repr.o \
> > ice_tc_lib.o \
> > ice_fwlog.o \
> > - ice_debugfs.o
> > + ice_debugfs.o \
> > + ice_adapter.o
> > ice-$(CONFIG_PCI_IOV) += \
> > ice_sriov.o \
> > ice_virtchnl.o \
> > diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> > index 365c03d1c462..1ffecbdd361a 100644
> > --- a/drivers/net/ethernet/intel/ice/ice.h
> > +++ b/drivers/net/ethernet/intel/ice/ice.h
> > @@ -77,6 +77,7 @@
> > #include "ice_gnss.h"
> > #include "ice_irq.h"
> > #include "ice_dpll.h"
> > +#include "ice_adapter.h"
> >
> > #define ICE_BAR0 0
> > #define ICE_REQ_DESC_MULTIPLE 32
> > @@ -544,6 +545,7 @@ struct ice_agg_node {
> >
> > struct ice_pf {
> > struct pci_dev *pdev;
> > + struct ice_adapter *adapter;
> >
> > struct devlink_region *nvm_region;
> > struct devlink_region *sram_region;
> > diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > new file mode 100644
> > index 000000000000..6b9eeba6edf7
> > --- /dev/null
> > +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > @@ -0,0 +1,107 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +// SPDX-FileCopyrightText: Copyright Red Hat
> > +
> > +#include <linux/cleanup.h>
> > +#include <linux/mutex.h>
> > +#include <linux/pci.h>
> > +#include <linux/slab.h>
> > +#include <linux/xarray.h>
> > +#include "ice_adapter.h"
> > +
> > +static DEFINE_XARRAY(ice_adapters);
> > +
> > +static unsigned long ice_adapter_index(const struct pci_dev *pdev)
> > +{
> > + unsigned int domain = pci_domain_nr(pdev->bus);
> > +
> > + WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
> > + return ((unsigned long)domain << 13) |
> > + ((unsigned long)pdev->bus->number << 5) |
> > + PCI_SLOT(pdev->devfn);
>
> xarray is free to use non-0-based indices, so this whole function could
> be simplified as:
>
> /* note the PCI_SLOT() call to clear function from devfn */
> return PCI_DEVID(pci_domain_nr(pdev->bus), PCI_SLOT(pdev->devfn));
This is not equivalent. My function encodes three PCI numbers into the
index: domain, bus, slot.
Your version would have only: domain, slot. The bus number would be
lost. And also, higher-than-16 bits of the domain would be lost
unnecessarily.
> > +}
> > +
> > +static struct ice_adapter *ice_adapter_new(void)
> > +{
> > + struct ice_adapter *adapter;
> > +
> > + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
> > + if (!adapter)
> > + return NULL;
> > +
> > + refcount_set(&adapter->refcount, 1);
> > +
> > + return adapter;
> > +}
> > +
> > +static void ice_adapter_free(struct ice_adapter *adapter)
> > +{
> > + kfree(adapter);
> > +}
>
> I would say that this is too thin wrapper for "kernel interface" (memory
> ptr) to warrant it, IOW just place kfree in place of ice_adapter_free,
> that will also free us from the need to use DEFINE_FREE()
I am anticipating the need for struct members to destroy here. Eg. in
order to replace ice_ptp_lock entirely, I will add a mutex, which will
require mutex_destroy to be called in ice_adapter_free.
>
> > +
> > +DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T))
> > +
> > +/**
> > + * ice_adapter_get - Get a shared ice_adapter structure.
> > + * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter.
> > + *
> > + * Gets a pointer to a shared ice_adapter structure. Physical functions (PFs)
> > + * of the same multi-function PCI device share one ice_adapter structure.
> > + * The ice_adapter is reference-counted. The PF driver must use ice_adapter_put
> > + * to release its reference.
> > + *
> > + * Context: Process, may sleep.
> > + * Return: Pointer to ice_adapter on success.
> > + * ERR_PTR() on error. -ENOMEM is the only possible error.
>
> that's inconvenient, to the point that it will be better to have a dummy
> static entry used for this purpose, but I see that this is something
> broader that this particular use case, so - feel free to ignore
Perhaps I don't get what you mean by a static entry. Maybe a static
singleton instance of struct ice_adapter? I don't want that, because I
can have multiple E810 NICs in the system and I don't want them to
share anything unnecessarily.
Besides, this allocates only a small amount of memory and the
allocation is GFP_KERNEL. It won't fail under any realistic scenario
(afaik, the "too small to fail" rule still holds in the kernel). This
is called only from ice_probe, which surely allocates much more
memory. I am not calling this every time I need to access the
ice_adapter. I'm keeping a pointer in ice_pf.
> > + */
> > +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
> > +{
> > + struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL;
> > + unsigned long index = ice_adapter_index(pdev);
> > +
> > + adapter = ice_adapter_new();
> > + if (!adapter)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + xa_lock(&ice_adapters);
> > + ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL);
> > + if (xa_is_err(ret)) {
> > + ret = ERR_PTR(xa_err(ret));
> > + goto unlock;
> > + }
> > + if (ret) {
> > + refcount_inc(&ret->refcount);
> > + goto unlock;
> > + }
> > + ret = no_free_ptr(adapter);
>
> nice solution, but this is an idiom that we want across the kernel
> instead of opting out of auto management in such cases as this one?
> (esp that you have open-coded locking anyway)
I will follow up by changing the xa_lock usage to a guard if my
recently proposed patch ("xarray: add guard definitions for xa_lock")
gets accepted:
https://lore.kernel.org/lkml/20240228135352.14444-1-mschmidt@redhat.com/
> I would expect to have explicit two stores (first to ensure index is
> present, second to overwrite entry if null) easier than cmpxchg
> + unneeded allocation (that could cause whole function to fail!)
For reasons mentioned above, I am not worried about the allocation failing.
I am afraid moving away from the cmpxchg approach would force me to either:
- Re-add the additional mutex I had in v1 of this series and that
Jiri Pirko asked me to remove and rely on xa_lock; or
- Allocate ice_adapter under xa_lock, i.e. with GFP_ATOMIC. That
would only make running into ENOMEM more likely.
> > +unlock:
> > + xa_unlock(&ice_adapters);
> > + return ret;
> > +}
> > +
> > +/**
> > + * ice_adapter_put - Release a reference to the shared ice_adapter structure.
> > + * @pdev: Pointer to the pci_dev whose driver is releasing the ice_adapter.
> > + *
> > + * Releases the reference to ice_adapter previously obtained with
> > + * ice_adapter_get.
> > + *
> > + * Context: Any.
> > + */
> > +void ice_adapter_put(const struct pci_dev *pdev)
> > +{
> > + unsigned long index = ice_adapter_index(pdev);
> > + struct ice_adapter *adapter;
> > +
> > + xa_lock(&ice_adapters);
> > + adapter = xa_load(&ice_adapters, index);
> > + if (WARN_ON(!adapter))
> > + goto unlock;
> > +
> > + if (!refcount_dec_and_test(&adapter->refcount))
> > + goto unlock;
> > +
> > + WARN_ON(__xa_erase(&ice_adapters, index) != adapter);
> > + ice_adapter_free(adapter);
> > +unlock:
> > + xa_unlock(&ice_adapters);
> > +}
> > diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
> > new file mode 100644
> > index 000000000000..cb5a02eb24c1
> > --- /dev/null
> > +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
> > @@ -0,0 +1,22 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/* SPDX-FileCopyrightText: Copyright Red Hat */
> > +
> > +#ifndef _ICE_ADAPTER_H_
> > +#define _ICE_ADAPTER_H_
> > +
> > +#include <linux/refcount_types.h>
> > +
> > +struct pci_dev;
> > +
> > +/**
> > + * struct ice_adapter - PCI adapter resources shared across PFs
> > + * @refcount: Reference count. struct ice_pf objects hold the references.
> > + */
> > +struct ice_adapter {
> > + refcount_t refcount;
>
> this is refcounted always under a lock, so could be plain "int",
> but not a big deal
Yes, I know, but the over/under-flow checks provided by refcount_t
keep me warm and fuzzy :)
> > +};
> > +
> > +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev);
> > +void ice_adapter_put(const struct pci_dev *pdev);
> > +
> > +#endif /* _ICE_ADAPTER_H */
> > diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> > index 8f73ba77e835..a3c545e56256 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_main.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> > @@ -5093,6 +5093,7 @@ static int
> > ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> > {
> > struct device *dev = &pdev->dev;
> > + struct ice_adapter *adapter;
> > struct ice_pf *pf;
> > struct ice_hw *hw;
> > int err;
> > @@ -5145,7 +5146,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> >
> > pci_set_master(pdev);
> >
> > + adapter = ice_adapter_get(pdev);
> > + if (IS_ERR(adapter))
> > + return PTR_ERR(adapter);
> > +
> > pf->pdev = pdev;
> > + pf->adapter = adapter;
> > pci_set_drvdata(pdev, pf);
> > set_bit(ICE_DOWN, pf->state);
> > /* Disable service task until DOWN bit is cleared */
> > @@ -5196,6 +5202,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> > err_load:
> > ice_deinit(pf);
> > err_init:
> > + ice_adapter_put(pdev);
> > pci_disable_device(pdev);
> > return err;
> > }
> > @@ -5302,6 +5309,7 @@ static void ice_remove(struct pci_dev *pdev)
> > ice_setup_mc_magic_wake(pf);
> > ice_set_wake(pf);
> >
> > + ice_adapter_put(pdev);
> > pci_disable_device(pdev);
> > }
> >
>
Powered by blists - more mailing lists