lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CO1PR11MB5089BDFAF1B498FE9FDDA3FCD6DE2@CO1PR11MB5089.namprd11.prod.outlook.com>
Date: Tue, 18 Mar 2025 22:05:23 +0000
From: "Keller, Jacob E" <jacob.e.keller@...el.com>
To: Jiri Pirko <jiri@...nulli.us>, "netdev@...r.kernel.org"
	<netdev@...r.kernel.org>
CC: "davem@...emloft.net" <davem@...emloft.net>, "Dumazet, Eric"
	<edumazet@...gle.com>, "kuba@...nel.org" <kuba@...nel.org>,
	"pabeni@...hat.com" <pabeni@...hat.com>, "saeedm@...dia.com"
	<saeedm@...dia.com>, "leon@...nel.org" <leon@...nel.org>, "tariqt@...dia.com"
	<tariqt@...dia.com>, "andrew+netdev@...n.ch" <andrew+netdev@...n.ch>,
	"dakr@...nel.org" <dakr@...nel.org>, "rafael@...nel.org" <rafael@...nel.org>,
	"gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>, "Kitszel,
 Przemyslaw" <przemyslaw.kitszel@...el.com>, "Nguyen, Anthony L"
	<anthony.l.nguyen@...el.com>, "cratiu@...dia.com" <cratiu@...dia.com>,
	"Knitter, Konrad" <konrad.knitter@...el.com>, "cjubran@...dia.com"
	<cjubran@...dia.com>
Subject: RE: [PATCH net-next RFC 2/3] net/mlx5: Introduce shared devlink
 instance for PFs on same chip



> -----Original Message-----
> From: Jiri Pirko <jiri@...nulli.us>
> Sent: Tuesday, March 18, 2025 5:47 AM
> To: netdev@...r.kernel.org
> Cc: davem@...emloft.net; Dumazet, Eric <edumazet@...gle.com>;
> kuba@...nel.org; pabeni@...hat.com; saeedm@...dia.com; leon@...nel.org;
> tariqt@...dia.com; andrew+netdev@...n.ch; dakr@...nel.org;
> rafael@...nel.org; gregkh@...uxfoundation.org; Kitszel, Przemyslaw
> <przemyslaw.kitszel@...el.com>; Nguyen, Anthony L
> <anthony.l.nguyen@...el.com>; cratiu@...dia.com; Keller, Jacob E
> <jacob.e.keller@...el.com>; Knitter, Konrad <konrad.knitter@...el.com>;
> cjubran@...dia.com
> Subject: [PATCH net-next RFC 2/3] net/mlx5: Introduce shared devlink instance for
> PFs on same chip
> 
> From: Jiri Pirko <jiri@...dia.com>
> 
> Multiple PFS may reside on the same physical chip, running a single
> firmware. Some of the resources and configurations may be shared among
> these PFs. Currently, there is not good object to pin the configuration
> knobs on.
> 
> Introduce a shared devlink, instantiated upon probe of the first PF,
> removed during remove of the last PF. Back this shared devlink instance
> by faux device, as there is no PCI device related to it.
> 
> Make the PF devlink instances nested in this shared devlink instance.
> 
> Example:
> 
> $ devlink dev
> pci/0000:08:00.0:
>   nested_devlink:
>     auxiliary/mlx5_core.eth.0
> faux/mlx5_core_83013c12b77faa1a30000c82a1045c91:
>   nested_devlink:
>     pci/0000:08:00.0
>     pci/0000:08:00.1
> auxiliary/mlx5_core.eth.0
> pci/0000:08:00.1:
>   nested_devlink:
>     auxiliary/mlx5_core.eth.1
> auxiliary/mlx5_core.eth.1
> 
> Signed-off-by: Jiri Pirko <jiri@...dia.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/Makefile  |   4 +-
>  .../net/ethernet/mellanox/mlx5/core/main.c    |  18 +++
>  .../ethernet/mellanox/mlx5/core/sh_devlink.c  | 150 ++++++++++++++++++
>  .../ethernet/mellanox/mlx5/core/sh_devlink.h  |  10 ++
>  include/linux/mlx5/driver.h                   |   5 +
>  5 files changed, 185 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index 568bbe5f83f5..510850b6e6e2 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -16,8 +16,8 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o
> pagealloc.o \
>  		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
>  		fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o
> events.o wq.o lib/gid.o \
>  		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o
> diag/fs_tracepoint.o \
> -		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o
> diag/reporter_vnic.o \
> -		fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o
> +		diag/fw_tracer.o diag/crdump.o devlink.o sh_devlink.o
> diag/rsc_dump.o \
> +		diag/reporter_vnic.o fw_reset.o qos.o lib/tout.o lib/aso.o wc.o
> fs_pool.o
> 
>  #
>  # Netdev basic
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> index 710633d5fdbe..e1217a8bf4db 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> @@ -74,6 +74,7 @@
>  #include "mlx5_irq.h"
>  #include "hwmon.h"
>  #include "lag/lag.h"
> +#include "sh_devlink.h"
> 
>  MODULE_AUTHOR("Eli Cohen <eli@...lanox.com>");
>  MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX
> series) core driver");
> @@ -1554,10 +1555,17 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
>  	int err;
> 
>  	devl_lock(devlink);
> +	if (dev->shd) {
> +		err = devl_nested_devlink_set(priv_to_devlink(dev->shd),
> +					      devlink);
> +		if (err)
> +			goto unlock;
> +	}
>  	devl_register(devlink);
>  	err = mlx5_init_one_devl_locked(dev);
>  	if (err)
>  		devl_unregister(devlink);
> +unlock:
>  	devl_unlock(devlink);
>  	return err;
>  }
> @@ -1998,6 +2006,13 @@ static int probe_one(struct pci_dev *pdev, const struct
> pci_device_id *id)
>  		goto pci_init_err;
>  	}
> 
> +	err = mlx5_shd_init(dev);
> +	if (err) {
> +		mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n",
> +			      err);
> +		goto shd_init_err;
> +	}
> +
>  	err = mlx5_init_one(dev);
>  	if (err) {
>  		mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
> @@ -2009,6 +2024,8 @@ static int probe_one(struct pci_dev *pdev, const struct
> pci_device_id *id)
>  	return 0;
> 
>  err_init_one:
> +	mlx5_shd_uninit(dev);
> +shd_init_err:
>  	mlx5_pci_close(dev);
>  pci_init_err:
>  	mlx5_mdev_uninit(dev);
> @@ -2030,6 +2047,7 @@ static void remove_one(struct pci_dev *pdev)
>  	mlx5_drain_health_wq(dev);
>  	mlx5_sriov_disable(pdev, false);
>  	mlx5_uninit_one(dev);
> +	mlx5_shd_uninit(dev);
>  	mlx5_pci_close(dev);
>  	mlx5_mdev_uninit(dev);
>  	mlx5_adev_idx_free(dev->priv.adev_idx);
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
> b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
> new file mode 100644
> index 000000000000..671a3442525b
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
> @@ -0,0 +1,150 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
> +
> +#include <linux/device/faux.h>
> +#include <linux/mlx5/driver.h>
> +#include <linux/mlx5/vport.h>
> +
> +#include "sh_devlink.h"
> +
> +static LIST_HEAD(shd_list);
> +static DEFINE_MUTEX(shd_mutex); /* Protects shd_list and shd->list */
> +
> +/* This structure represents a shared devlink instance,
> + * there is one created for PF group of the same chip.
> + */
> +struct mlx5_shd {
> +	/* Node in shd list */
> +	struct list_head list;
> +	/* Serial number of the chip */
> +	const char *sn;
> +	/* List of per-PF dev instances. */
> +	struct list_head dev_list;
> +	/* Related faux device */
> +	struct faux_device *faux_dev;
> +};
> +

For ice, the equivalent of this would essentially replace ice_adapter I imagine.

> +static const struct devlink_ops mlx5_shd_ops = {
> +};
> +
> +static int mlx5_shd_faux_probe(struct faux_device *faux_dev)
> +{
> +	struct devlink *devlink;
> +	struct mlx5_shd *shd;
> +
> +	devlink = devlink_alloc(&mlx5_shd_ops, sizeof(struct mlx5_shd),
> &faux_dev->dev);
> +	if (!devlink)
> +		return -ENOMEM;
> +	shd = devlink_priv(devlink);
> +	faux_device_set_drvdata(faux_dev, shd);
> +
> +	devl_lock(devlink);
> +	devl_register(devlink);
> +	devl_unlock(devlink);
> +	return 0;
> +}
> +
> +static void mlx5_shd_faux_remove(struct faux_device *faux_dev)
> +{
> +	struct mlx5_shd *shd = faux_device_get_drvdata(faux_dev);
> +	struct devlink *devlink = priv_to_devlink(shd);
> +
> +	devl_lock(devlink);
> +	devl_unregister(devlink);
> +	devl_unlock(devlink);
> +	devlink_free(devlink);
> +}
> +
> +static const struct faux_device_ops mlx5_shd_faux_ops = {
> +	.probe = mlx5_shd_faux_probe,
> +	.remove = mlx5_shd_faux_remove,
> +};
> +
> +static struct mlx5_shd *mlx5_shd_create(const char *sn)
> +{
> +	struct faux_device *faux_dev;
> +	struct mlx5_shd *shd;
> +
> +	faux_dev = faux_device_create(THIS_MODULE, sn, NULL,
> &mlx5_shd_faux_ops);
> +	if (!faux_dev)
> +		return NULL;
> +	shd = faux_device_get_drvdata(faux_dev);
> +	if (!shd)
> +		return NULL;
> +	list_add_tail(&shd->list, &shd_list);
> +	shd->sn = sn;
> +	INIT_LIST_HEAD(&shd->dev_list);
> +	shd->faux_dev = faux_dev;
> +	return shd;
> +}
> +
> +static void mlx5_shd_destroy(struct mlx5_shd *shd)
> +{
> +	list_del(&shd->list);
> +	kfree(shd->sn);
> +	faux_device_destroy(shd->faux_dev);
> +}
> +
> +int mlx5_shd_init(struct mlx5_core_dev *dev)
> +{
> +	u8 *vpd_data __free(kfree) = NULL;
> +	struct pci_dev *pdev = dev->pdev;
> +	unsigned int vpd_size, kw_len;
> +	struct mlx5_shd *shd;
> +	const char *sn;
> +	char *end;
> +	int start;
> +	int err;
> +
> +	if (!mlx5_core_is_pf(dev))
> +		return 0;
> +
> +	vpd_data = pci_vpd_alloc(pdev, &vpd_size);
> +	if (IS_ERR(vpd_data)) {
> +		err = PTR_ERR(vpd_data);
> +		return err == -ENODEV ? 0 : err;
> +	}
> +	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3",
> &kw_len);
> +	if (start < 0) {
> +		/* Fall-back to SN for older devices. */
> +		start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
> +
> PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
> +		if (start < 0)
> +			return -ENOENT;
> +	}
> +	sn = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
> +	if (!sn)
> +		return -ENOMEM;
> +	end = strchrnul(sn, ' ');
> +	*end = '\0';
> +
> +	guard(mutex)(&shd_mutex);
> +	list_for_each_entry(shd, &shd_list, list) {
> +		if (!strcmp(shd->sn, sn)) {
> +			kfree(sn);
> +			goto found;
> +		}
> +	}
> +	shd = mlx5_shd_create(sn);
> +	if (!shd) {
> +		kfree(sn);
> +		return -ENOMEM;
> +	}

How is the faux device kept in memory? I guess its reference counted somewhere? But I don't see that reference being incremented in the list_for_each.

> +found:
> +	list_add_tail(&dev->shd_list, &shd->dev_list);
> +	dev->shd = shd;
> +	return 0;
> +}
> +
> +void mlx5_shd_uninit(struct mlx5_core_dev *dev)
> +{
> +	struct mlx5_shd *shd = dev->shd;
> +
> +	if (!dev->shd)
> +		return;
> +
> +	guard(mutex)(&shd_mutex);
> +	list_del(&dev->shd_list);
> +	if (list_empty(&shd->dev_list))
> +		mlx5_shd_destroy(shd);
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
> b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
> new file mode 100644
> index 000000000000..67df03e3c72e
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
> +
> +#ifndef __MLX5_SH_DEVLINK_H__
> +#define __MLX5_SH_DEVLINK_H__
> +
> +int mlx5_shd_init(struct mlx5_core_dev *dev);
> +void mlx5_shd_uninit(struct mlx5_core_dev *dev);
> +
> +#endif /* __MLX5_SH_DEVLINK_H__ */
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 46bd7550adf8..78f1f034568f 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -721,6 +721,8 @@ enum mlx5_wc_state {
>  	MLX5_WC_STATE_SUPPORTED,
>  };
> 
> +struct mlx5_shd;
> +
>  struct mlx5_core_dev {
>  	struct device *device;
>  	enum mlx5_coredev_type coredev_type;
> @@ -783,6 +785,9 @@ struct mlx5_core_dev {
>  	enum mlx5_wc_state wc_state;
>  	/* sync write combining state */
>  	struct mutex wc_state_lock;
> +	/* node in shared devlink list */
> +	struct list_head shd_list;
> +	struct mlx5_shd *shd;
>  };
> 
>  struct mlx5_db {
> --
> 2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ