lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e6ioea475mj73ijqrjsuxausfekawf3xgav6xtbutr64ab4vb4@dlyjvq4pdef3>
Date: Wed, 19 Mar 2025 12:44:40 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: "Keller, Jacob E" <jacob.e.keller@...el.com>
Cc: "netdev@...r.kernel.org" <netdev@...r.kernel.org>, 
	"davem@...emloft.net" <davem@...emloft.net>, "Dumazet, Eric" <edumazet@...gle.com>, 
	"kuba@...nel.org" <kuba@...nel.org>, "pabeni@...hat.com" <pabeni@...hat.com>, 
	"saeedm@...dia.com" <saeedm@...dia.com>, "leon@...nel.org" <leon@...nel.org>, 
	"tariqt@...dia.com" <tariqt@...dia.com>, "andrew+netdev@...n.ch" <andrew+netdev@...n.ch>, 
	"dakr@...nel.org" <dakr@...nel.org>, "rafael@...nel.org" <rafael@...nel.org>, 
	"gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>, "Kitszel, Przemyslaw" <przemyslaw.kitszel@...el.com>, 
	"Nguyen, Anthony L" <anthony.l.nguyen@...el.com>, "cratiu@...dia.com" <cratiu@...dia.com>, 
	"Knitter, Konrad" <konrad.knitter@...el.com>, "cjubran@...dia.com" <cjubran@...dia.com>
Subject: Re: [PATCH net-next RFC 2/3] net/mlx5: Introduce shared devlink
 instance for PFs on same chip

Tue, Mar 18, 2025 at 11:05:23PM +0100, jacob.e.keller@...el.com wrote:
>
>
>> -----Original Message-----
>> From: Jiri Pirko <jiri@...nulli.us>
>> Sent: Tuesday, March 18, 2025 5:47 AM
>> To: netdev@...r.kernel.org
>> Cc: davem@...emloft.net; Dumazet, Eric <edumazet@...gle.com>;
>> kuba@...nel.org; pabeni@...hat.com; saeedm@...dia.com; leon@...nel.org;
>> tariqt@...dia.com; andrew+netdev@...n.ch; dakr@...nel.org;
>> rafael@...nel.org; gregkh@...uxfoundation.org; Kitszel, Przemyslaw
>> <przemyslaw.kitszel@...el.com>; Nguyen, Anthony L
>> <anthony.l.nguyen@...el.com>; cratiu@...dia.com; Keller, Jacob E
>> <jacob.e.keller@...el.com>; Knitter, Konrad <konrad.knitter@...el.com>;
>> cjubran@...dia.com
>> Subject: [PATCH net-next RFC 2/3] net/mlx5: Introduce shared devlink instance for
>> PFs on same chip
>> 
>> From: Jiri Pirko <jiri@...dia.com>
>> 
>> Multiple PFS may reside on the same physical chip, running a single
>> firmware. Some of the resources and configurations may be shared among
>> these PFs. Currently, there is not good object to pin the configuration
>> knobs on.
>> 
>> Introduce a shared devlink, instantiated upon probe of the first PF,
>> removed during remove of the last PF. Back this shared devlink instance
>> by faux device, as there is no PCI device related to it.
>> 
>> Make the PF devlink instances nested in this shared devlink instance.
>> 
>> Example:
>> 
>> $ devlink dev
>> pci/0000:08:00.0:
>>   nested_devlink:
>>     auxiliary/mlx5_core.eth.0
>> faux/mlx5_core_83013c12b77faa1a30000c82a1045c91:
>>   nested_devlink:
>>     pci/0000:08:00.0
>>     pci/0000:08:00.1
>> auxiliary/mlx5_core.eth.0
>> pci/0000:08:00.1:
>>   nested_devlink:
>>     auxiliary/mlx5_core.eth.1
>> auxiliary/mlx5_core.eth.1
>> 
>> Signed-off-by: Jiri Pirko <jiri@...dia.com>
>> ---
>>  .../net/ethernet/mellanox/mlx5/core/Makefile  |   4 +-
>>  .../net/ethernet/mellanox/mlx5/core/main.c    |  18 +++
>>  .../ethernet/mellanox/mlx5/core/sh_devlink.c  | 150 ++++++++++++++++++
>>  .../ethernet/mellanox/mlx5/core/sh_devlink.h  |  10 ++
>>  include/linux/mlx5/driver.h                   |   5 +
>>  5 files changed, 185 insertions(+), 2 deletions(-)
>>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
>>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
>> 
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> index 568bbe5f83f5..510850b6e6e2 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> @@ -16,8 +16,8 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o
>> pagealloc.o \
>>  		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
>>  		fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o
>> events.o wq.o lib/gid.o \
>>  		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o
>> diag/fs_tracepoint.o \
>> -		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o
>> diag/reporter_vnic.o \
>> -		fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o
>> +		diag/fw_tracer.o diag/crdump.o devlink.o sh_devlink.o
>> diag/rsc_dump.o \
>> +		diag/reporter_vnic.o fw_reset.o qos.o lib/tout.o lib/aso.o wc.o
>> fs_pool.o
>> 
>>  #
>>  # Netdev basic
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> index 710633d5fdbe..e1217a8bf4db 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> @@ -74,6 +74,7 @@
>>  #include "mlx5_irq.h"
>>  #include "hwmon.h"
>>  #include "lag/lag.h"
>> +#include "sh_devlink.h"
>> 
>>  MODULE_AUTHOR("Eli Cohen <eli@...lanox.com>");
>>  MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX
>> series) core driver");
>> @@ -1554,10 +1555,17 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
>>  	int err;
>> 
>>  	devl_lock(devlink);
>> +	if (dev->shd) {
>> +		err = devl_nested_devlink_set(priv_to_devlink(dev->shd),
>> +					      devlink);
>> +		if (err)
>> +			goto unlock;
>> +	}
>>  	devl_register(devlink);
>>  	err = mlx5_init_one_devl_locked(dev);
>>  	if (err)
>>  		devl_unregister(devlink);
>> +unlock:
>>  	devl_unlock(devlink);
>>  	return err;
>>  }
>> @@ -1998,6 +2006,13 @@ static int probe_one(struct pci_dev *pdev, const struct
>> pci_device_id *id)
>>  		goto pci_init_err;
>>  	}
>> 
>> +	err = mlx5_shd_init(dev);
>> +	if (err) {
>> +		mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n",
>> +			      err);
>> +		goto shd_init_err;
>> +	}
>> +
>>  	err = mlx5_init_one(dev);
>>  	if (err) {
>>  		mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
>> @@ -2009,6 +2024,8 @@ static int probe_one(struct pci_dev *pdev, const struct
>> pci_device_id *id)
>>  	return 0;
>> 
>>  err_init_one:
>> +	mlx5_shd_uninit(dev);
>> +shd_init_err:
>>  	mlx5_pci_close(dev);
>>  pci_init_err:
>>  	mlx5_mdev_uninit(dev);
>> @@ -2030,6 +2047,7 @@ static void remove_one(struct pci_dev *pdev)
>>  	mlx5_drain_health_wq(dev);
>>  	mlx5_sriov_disable(pdev, false);
>>  	mlx5_uninit_one(dev);
>> +	mlx5_shd_uninit(dev);
>>  	mlx5_pci_close(dev);
>>  	mlx5_mdev_uninit(dev);
>>  	mlx5_adev_idx_free(dev->priv.adev_idx);
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
>> b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
>> new file mode 100644
>> index 000000000000..671a3442525b
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
>> @@ -0,0 +1,150 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
>> +
>> +#include <linux/device/faux.h>
>> +#include <linux/mlx5/driver.h>
>> +#include <linux/mlx5/vport.h>
>> +
>> +#include "sh_devlink.h"
>> +
>> +static LIST_HEAD(shd_list);
>> +static DEFINE_MUTEX(shd_mutex); /* Protects shd_list and shd->list */
>> +
>> +/* This structure represents a shared devlink instance,
>> + * there is one created for PF group of the same chip.
>> + */
>> +struct mlx5_shd {
>> +	/* Node in shd list */
>> +	struct list_head list;
>> +	/* Serial number of the chip */
>> +	const char *sn;
>> +	/* List of per-PF dev instances. */
>> +	struct list_head dev_list;
>> +	/* Related faux device */
>> +	struct faux_device *faux_dev;
>> +};
>> +
>
>For ice, the equivalent of this would essentially replace ice_adapter I imagine.

Yep.


>
>> +static const struct devlink_ops mlx5_shd_ops = {
>> +};
>> +
>> +static int mlx5_shd_faux_probe(struct faux_device *faux_dev)
>> +{
>> +	struct devlink *devlink;
>> +	struct mlx5_shd *shd;
>> +
>> +	devlink = devlink_alloc(&mlx5_shd_ops, sizeof(struct mlx5_shd),
>> &faux_dev->dev);
>> +	if (!devlink)
>> +		return -ENOMEM;
>> +	shd = devlink_priv(devlink);
>> +	faux_device_set_drvdata(faux_dev, shd);
>> +
>> +	devl_lock(devlink);
>> +	devl_register(devlink);
>> +	devl_unlock(devlink);
>> +	return 0;
>> +}
>> +
>> +static void mlx5_shd_faux_remove(struct faux_device *faux_dev)
>> +{
>> +	struct mlx5_shd *shd = faux_device_get_drvdata(faux_dev);
>> +	struct devlink *devlink = priv_to_devlink(shd);
>> +
>> +	devl_lock(devlink);
>> +	devl_unregister(devlink);
>> +	devl_unlock(devlink);
>> +	devlink_free(devlink);
>> +}
>> +
>> +static const struct faux_device_ops mlx5_shd_faux_ops = {
>> +	.probe = mlx5_shd_faux_probe,
>> +	.remove = mlx5_shd_faux_remove,
>> +};
>> +
>> +static struct mlx5_shd *mlx5_shd_create(const char *sn)
>> +{
>> +	struct faux_device *faux_dev;
>> +	struct mlx5_shd *shd;
>> +
>> +	faux_dev = faux_device_create(THIS_MODULE, sn, NULL,
>> &mlx5_shd_faux_ops);
>> +	if (!faux_dev)
>> +		return NULL;
>> +	shd = faux_device_get_drvdata(faux_dev);
>> +	if (!shd)
>> +		return NULL;
>> +	list_add_tail(&shd->list, &shd_list);
>> +	shd->sn = sn;
>> +	INIT_LIST_HEAD(&shd->dev_list);
>> +	shd->faux_dev = faux_dev;
>> +	return shd;
>> +}
>> +
>> +static void mlx5_shd_destroy(struct mlx5_shd *shd)
>> +{
>> +	list_del(&shd->list);
>> +	kfree(shd->sn);
>> +	faux_device_destroy(shd->faux_dev);
>> +}
>> +
>> +int mlx5_shd_init(struct mlx5_core_dev *dev)
>> +{
>> +	u8 *vpd_data __free(kfree) = NULL;
>> +	struct pci_dev *pdev = dev->pdev;
>> +	unsigned int vpd_size, kw_len;
>> +	struct mlx5_shd *shd;
>> +	const char *sn;
>> +	char *end;
>> +	int start;
>> +	int err;
>> +
>> +	if (!mlx5_core_is_pf(dev))
>> +		return 0;
>> +
>> +	vpd_data = pci_vpd_alloc(pdev, &vpd_size);
>> +	if (IS_ERR(vpd_data)) {
>> +		err = PTR_ERR(vpd_data);
>> +		return err == -ENODEV ? 0 : err;
>> +	}
>> +	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3",
>> &kw_len);
>> +	if (start < 0) {
>> +		/* Fall-back to SN for older devices. */
>> +		start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
>> +
>> PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
>> +		if (start < 0)
>> +			return -ENOENT;
>> +	}
>> +	sn = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
>> +	if (!sn)
>> +		return -ENOMEM;
>> +	end = strchrnul(sn, ' ');
>> +	*end = '\0';
>> +
>> +	guard(mutex)(&shd_mutex);
>> +	list_for_each_entry(shd, &shd_list, list) {
>> +		if (!strcmp(shd->sn, sn)) {
>> +			kfree(sn);
>> +			goto found;
>> +		}
>> +	}
>> +	shd = mlx5_shd_create(sn);
>> +	if (!shd) {
>> +		kfree(sn);
>> +		return -ENOMEM;
>> +	}
>
>How is the faux device kept in memory? I guess its reference counted somewhere? But I don't see that reference being incremented in the list_for_each.

There is one faux created in mlx5_shd_create() for one shd instance.
Reference counting is done by &shd->dev_list. See mlx5_shd_uninit()
where mlx5_shd_destroy() is called.



>
>> +found:
>> +	list_add_tail(&dev->shd_list, &shd->dev_list);
>> +	dev->shd = shd;
>> +	return 0;
>> +}
>> +
>> +void mlx5_shd_uninit(struct mlx5_core_dev *dev)
>> +{
>> +	struct mlx5_shd *shd = dev->shd;
>> +
>> +	if (!dev->shd)
>> +		return;
>> +
>> +	guard(mutex)(&shd_mutex);
>> +	list_del(&dev->shd_list);
>> +	if (list_empty(&shd->dev_list))
>> +		mlx5_shd_destroy(shd);
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
>> b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
>> new file mode 100644
>> index 000000000000..67df03e3c72e
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
>> @@ -0,0 +1,10 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
>> +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
>> +
>> +#ifndef __MLX5_SH_DEVLINK_H__
>> +#define __MLX5_SH_DEVLINK_H__
>> +
>> +int mlx5_shd_init(struct mlx5_core_dev *dev);
>> +void mlx5_shd_uninit(struct mlx5_core_dev *dev);
>> +
>> +#endif /* __MLX5_SH_DEVLINK_H__ */
>> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
>> index 46bd7550adf8..78f1f034568f 100644
>> --- a/include/linux/mlx5/driver.h
>> +++ b/include/linux/mlx5/driver.h
>> @@ -721,6 +721,8 @@ enum mlx5_wc_state {
>>  	MLX5_WC_STATE_SUPPORTED,
>>  };
>> 
>> +struct mlx5_shd;
>> +
>>  struct mlx5_core_dev {
>>  	struct device *device;
>>  	enum mlx5_coredev_type coredev_type;
>> @@ -783,6 +785,9 @@ struct mlx5_core_dev {
>>  	enum mlx5_wc_state wc_state;
>>  	/* sync write combining state */
>>  	struct mutex wc_state_lock;
>> +	/* node in shared devlink list */
>> +	struct list_head shd_list;
>> +	struct mlx5_shd *shd;
>>  };
>> 
>>  struct mlx5_db {
>> --
>> 2.48.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ