lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5f7eba6a-442c-4b26-d83f-de7cf633ce65@redhat.com>
Date:   Tue, 22 Nov 2022 11:53:44 +0800
From:   Jason Wang <jasowang@...hat.com>
To:     Shannon Nelson <snelson@...sando.io>, netdev@...r.kernel.org,
        davem@...emloft.net, kuba@...nel.org, mst@...hat.com,
        virtualization@...ts.linux-foundation.org
Cc:     drivers@...sando.io
Subject: Re: [RFC PATCH net-next 14/19] pds_vdpa: Add new PCI VF device for
 PDS vDPA services


在 2022/11/19 06:56, Shannon Nelson 写道:
> This is the initial PCI driver framework for the new pds_vdpa VF
> device driver, an auxiliary_bus client of the pds_core driver.
> This does the very basics of registering for the new PCI
> device 1dd8:100b, setting up debugfs entries, and registering
> with devlink.
>
> The new PCI device id has not made it to the official PCI ID Repository
> yet, but will soon be registered there.
>
> Signed-off-by: Shannon Nelson <snelson@...sando.io>
> ---
>   drivers/vdpa/pds/Makefile       |   7 +
>   drivers/vdpa/pds/debugfs.c      |  44 +++++++
>   drivers/vdpa/pds/debugfs.h      |  22 ++++
>   drivers/vdpa/pds/pci_drv.c      | 143 +++++++++++++++++++++
>   drivers/vdpa/pds/pci_drv.h      |  46 +++++++
>   include/linux/pds/pds_core_if.h |   1 +
>   include/linux/pds/pds_vdpa.h    | 219 ++++++++++++++++++++++++++++++++
>   7 files changed, 482 insertions(+)
>   create mode 100644 drivers/vdpa/pds/Makefile
>   create mode 100644 drivers/vdpa/pds/debugfs.c
>   create mode 100644 drivers/vdpa/pds/debugfs.h
>   create mode 100644 drivers/vdpa/pds/pci_drv.c
>   create mode 100644 drivers/vdpa/pds/pci_drv.h
>   create mode 100644 include/linux/pds/pds_vdpa.h
>
> diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile
> new file mode 100644
> index 000000000000..3ba28a875574
> --- /dev/null
> +++ b/drivers/vdpa/pds/Makefile
> @@ -0,0 +1,7 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +# Copyright(c) 2022 Pensando Systems, Inc
> +
> +obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o
> +
> +pds_vdpa-y := pci_drv.o	\
> +	      debugfs.o
> diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c
> new file mode 100644
> index 000000000000..f5b6654ae89b
> --- /dev/null
> +++ b/drivers/vdpa/pds/debugfs.c
> @@ -0,0 +1,44 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2022 Pensando Systems, Inc */
> +
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/types.h>
> +
> +#include <linux/pds/pds_core_if.h>
> +#include <linux/pds/pds_vdpa.h>
> +
> +#include "pci_drv.h"
> +#include "debugfs.h"
> +
> +#ifdef CONFIG_DEBUG_FS
> +
> +static struct dentry *dbfs_dir;
> +
> +void
> +pds_vdpa_debugfs_create(void)
> +{
> +	dbfs_dir = debugfs_create_dir(PDS_VDPA_DRV_NAME, NULL);
> +}
> +
> +void
> +pds_vdpa_debugfs_destroy(void)
> +{
> +	debugfs_remove_recursive(dbfs_dir);
> +	dbfs_dir = NULL;
> +}
> +
> +void
> +pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_pci_device *vdpa_pdev)
> +{
> +	vdpa_pdev->dentry = debugfs_create_dir(pci_name(vdpa_pdev->pdev), dbfs_dir);
> +}
> +
> +void
> +pds_vdpa_debugfs_del_pcidev(struct pds_vdpa_pci_device *vdpa_pdev)
> +{
> +	debugfs_remove_recursive(vdpa_pdev->dentry);
> +	vdpa_pdev->dentry = NULL;
> +}
> +
> +#endif /* CONFIG_DEBUG_FS */
> diff --git a/drivers/vdpa/pds/debugfs.h b/drivers/vdpa/pds/debugfs.h
> new file mode 100644
> index 000000000000..ac31ab47746b
> --- /dev/null
> +++ b/drivers/vdpa/pds/debugfs.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright(c) 2022 Pensando Systems, Inc */
> +
> +#ifndef _PDS_VDPA_DEBUGFS_H_
> +#define _PDS_VDPA_DEBUGFS_H_
> +
> +#include <linux/debugfs.h>
> +
> +#ifdef CONFIG_DEBUG_FS
> +
> +void pds_vdpa_debugfs_create(void);
> +void pds_vdpa_debugfs_destroy(void);
> +void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_pci_device *vdpa_pdev);
> +void pds_vdpa_debugfs_del_pcidev(struct pds_vdpa_pci_device *vdpa_pdev);
> +#else
> +static inline void pds_vdpa_debugfs_create(void) { }
> +static inline void pds_vdpa_debugfs_destroy(void) { }
> +static inline void pds_vdpa_debugfs_add_pcidev(struct pds_vdpa_pci_device *vdpa_pdev) { }
> +static inline void pds_vdpa_debugfs_del_pcidev(struct pds_vdpa_pci_device *vdpa_pdev) { }
> +#endif
> +
> +#endif /* _PDS_VDPA_DEBUGFS_H_ */
> diff --git a/drivers/vdpa/pds/pci_drv.c b/drivers/vdpa/pds/pci_drv.c
> new file mode 100644
> index 000000000000..369e11153f21
> --- /dev/null
> +++ b/drivers/vdpa/pds/pci_drv.c
> @@ -0,0 +1,143 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2022 Pensando Systems, Inc */
> +
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/aer.h>
> +#include <linux/types.h>
> +#include <linux/vdpa.h>
> +
> +#include <linux/pds/pds_core_if.h>
> +#include <linux/pds/pds_vdpa.h>
> +
> +#include "pci_drv.h"
> +#include "debugfs.h"
> +
> +static void
> +pds_vdpa_dma_action(void *data)
> +{
> +	pci_free_irq_vectors((struct pci_dev *)data);
> +}


Nit: since we're release irq vectors, it might be better to use 
"pds_vdpa_irq_action"


> +
> +static int
> +pds_vdpa_pci_probe(struct pci_dev *pdev,
> +		   const struct pci_device_id *id)
> +{
> +	struct pds_vdpa_pci_device *vdpa_pdev;
> +	struct device *dev = &pdev->dev;
> +	int err;
> +
> +	vdpa_pdev = kzalloc(sizeof(*vdpa_pdev), GFP_KERNEL);
> +	if (!vdpa_pdev)
> +		return -ENOMEM;
> +	pci_set_drvdata(pdev, vdpa_pdev);
> +
> +	vdpa_pdev->pdev = pdev;
> +	vdpa_pdev->vf_id = pci_iov_vf_id(pdev);
> +	vdpa_pdev->pci_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
> +
> +	/* Query system for DMA addressing limitation for the device. */
> +	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(PDS_CORE_ADDR_LEN));
> +	if (err) {
> +		dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting. %pe\n",
> +			ERR_PTR(err));
> +		goto err_out_free_mem;
> +	}
> +
> +	pci_enable_pcie_error_reporting(pdev);
> +
> +	/* Use devres management */
> +	err = pcim_enable_device(pdev);
> +	if (err) {
> +		dev_err(dev, "Cannot enable PCI device: %pe\n", ERR_PTR(err));
> +		goto err_out_free_mem;
> +	}
> +
> +	err = devm_add_action_or_reset(dev, pds_vdpa_dma_action, pdev);
> +	if (err) {
> +		dev_err(dev, "Failed adding devres for freeing irq vectors: %pe\n",
> +			ERR_PTR(err));
> +		goto err_out_pci_release_device;
> +	}
> +
> +	pci_set_master(pdev);
> +
> +	pds_vdpa_debugfs_add_pcidev(vdpa_pdev);
> +
> +	dev_info(dev, "%s: PF %#04x VF %#04x (%d) vf_id %d domain %d vdpa_aux %p vdpa_pdev %p\n",
> +		 __func__, pci_dev_id(vdpa_pdev->pdev->physfn),
> +		 vdpa_pdev->pci_id, vdpa_pdev->pci_id, vdpa_pdev->vf_id,
> +		 pci_domain_nr(pdev->bus), vdpa_pdev->vdpa_aux, vdpa_pdev);
> +
> +	return 0;
> +
> +err_out_pci_release_device:
> +	pci_disable_device(pdev);


Do we still need to care about this consider we use 
devres/pcim_enable_device()?


> +err_out_free_mem:
> +	pci_disable_pcie_error_reporting(pdev);
> +	kfree(vdpa_pdev);
> +	return err;
> +}
> +
> +static void
> +pds_vdpa_pci_remove(struct pci_dev *pdev)
> +{
> +	struct pds_vdpa_pci_device *vdpa_pdev = pci_get_drvdata(pdev);
> +
> +	pds_vdpa_debugfs_del_pcidev(vdpa_pdev);
> +	pci_clear_master(pdev);
> +	pci_disable_pcie_error_reporting(pdev);
> +	pci_disable_device(pdev);
> +	kfree(vdpa_pdev);
> +
> +	dev_info(&pdev->dev, "Removed\n");
> +}
> +
> +static const struct pci_device_id
> +pds_vdpa_pci_table[] = {
> +	{ PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_VDPA_VF) },
> +	{ 0, }
> +};
> +MODULE_DEVICE_TABLE(pci, pds_vdpa_pci_table);
> +
> +static struct pci_driver
> +pds_vdpa_pci_driver = {
> +	.name = PDS_VDPA_DRV_NAME,
> +	.id_table = pds_vdpa_pci_table,
> +	.probe = pds_vdpa_pci_probe,
> +	.remove = pds_vdpa_pci_remove
> +};
> +
> +static void __exit
> +pds_vdpa_pci_cleanup(void)
> +{
> +	pci_unregister_driver(&pds_vdpa_pci_driver);
> +
> +	pds_vdpa_debugfs_destroy();
> +}
> +module_exit(pds_vdpa_pci_cleanup);
> +
> +static int __init
> +pds_vdpa_pci_init(void)
> +{
> +	int err;
> +
> +	pds_vdpa_debugfs_create();
> +
> +	err = pci_register_driver(&pds_vdpa_pci_driver);
> +	if (err) {
> +		pr_err("%s: pci driver register failed: %pe\n", __func__, ERR_PTR(err));
> +		goto err_pci;
> +	}
> +
> +	return 0;
> +
> +err_pci:
> +	pds_vdpa_debugfs_destroy();
> +	return err;
> +}
> +module_init(pds_vdpa_pci_init);
> +
> +MODULE_DESCRIPTION(PDS_VDPA_DRV_DESCRIPTION);
> +MODULE_AUTHOR("Pensando Systems, Inc");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/vdpa/pds/pci_drv.h b/drivers/vdpa/pds/pci_drv.h
> new file mode 100644
> index 000000000000..747809e0df9e
> --- /dev/null
> +++ b/drivers/vdpa/pds/pci_drv.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Copyright(c) 2022 Pensando Systems, Inc */
> +
> +#ifndef _PCI_DRV_H
> +#define _PCI_DRV_H
> +
> +#include <linux/pci.h>
> +#include <linux/virtio_pci_modern.h>
> +
> +#define PDS_VDPA_DRV_NAME           "pds_vdpa"
> +#define PDS_VDPA_DRV_DESCRIPTION    "Pensando vDPA VF Device Driver"
> +
> +#define PDS_VDPA_BAR_BASE	0
> +#define PDS_VDPA_BAR_INTR	2
> +#define PDS_VDPA_BAR_DBELL	4
> +
> +struct pds_dev_bar {
> +	int           index;
> +	void __iomem  *vaddr;
> +	phys_addr_t   pa;
> +	unsigned long len;
> +};
> +
> +struct pds_vdpa_intr_info {
> +	int index;
> +	int irq;
> +	int qid;
> +	char name[32];
> +};
> +
> +struct pds_vdpa_pci_device {
> +	struct pci_dev *pdev;
> +	struct pds_vdpa_aux *vdpa_aux;
> +
> +	int vf_id;
> +	int pci_id;
> +
> +	int nintrs;
> +	struct pds_vdpa_intr_info *intrs;
> +
> +	struct dentry *dentry;
> +
> +	struct virtio_pci_modern_device vd_mdev;
> +};
> +
> +#endif /* _PCI_DRV_H */
> diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h
> index 6333ec351e14..6e92697657e4 100644
> --- a/include/linux/pds/pds_core_if.h
> +++ b/include/linux/pds/pds_core_if.h
> @@ -8,6 +8,7 @@
>   
>   #define PCI_VENDOR_ID_PENSANDO			0x1dd8
>   #define PCI_DEVICE_ID_PENSANDO_CORE_PF		0x100c
> +#define PCI_DEVICE_ID_PENSANDO_VDPA_VF          0x100b
>   
>   #define PDS_CORE_BARS_MAX			4
>   #define PDS_CORE_PCI_BAR_DBELL			1
> diff --git a/include/linux/pds/pds_vdpa.h b/include/linux/pds/pds_vdpa.h
> new file mode 100644
> index 000000000000..7ecef890f175
> --- /dev/null
> +++ b/include/linux/pds/pds_vdpa.h
> @@ -0,0 +1,219 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Copyright(c) 2022 Pensando Systems, Inc */
> +
> +#ifndef _PDS_VDPA_IF_H_
> +#define _PDS_VDPA_IF_H_
> +
> +#include <linux/pds/pds_common.h>
> +
> +#define PDS_DEV_TYPE_VDPA_STR	"vDPA"
> +#define PDS_VDPA_DEV_NAME	PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
> +
> +/*
> + * enum pds_vdpa_cmd_opcode - vDPA Device commands
> + */
> +enum pds_vdpa_cmd_opcode {
> +	PDS_VDPA_CMD_INIT		= 48,
> +	PDS_VDPA_CMD_IDENT		= 49,
> +	PDS_VDPA_CMD_RESET		= 51,
> +	PDS_VDPA_CMD_VQ_RESET		= 52,
> +	PDS_VDPA_CMD_VQ_INIT		= 53,
> +	PDS_VDPA_CMD_STATUS_UPDATE	= 54,
> +	PDS_VDPA_CMD_SET_FEATURES	= 55,
> +	PDS_VDPA_CMD_SET_ATTR		= 56,
> +};
> +
> +/**
> + * struct pds_vdpa_cmd - generic command
> + * @opcode:	Opcode
> + * @vdpa_index:	Index for vdpa subdevice
> + * @vf_id:	VF id
> + */
> +struct pds_vdpa_cmd {
> +	u8     opcode;
> +	u8     vdpa_index;
> +	__le16 vf_id;
> +};
> +
> +/**
> + * struct pds_vdpa_comp - generic command completion
> + * @status:	Status of the command (enum pds_core_status_code)
> + * @rsvd:	Word boundary padding
> + * @color:	Color bit
> + */
> +struct pds_vdpa_comp {
> +	u8 status;
> +	u8 rsvd[14];
> +	u8 color;
> +};
> +
> +/**
> + * struct pds_vdpa_init_cmd - INIT command
> + * @opcode:	Opcode PDS_VDPA_CMD_INIT
> + * @vdpa_index: Index for vdpa subdevice
> + * @vf_id:	VF id
> + * @len:	length of config info DMA space
> + * @config_pa:	address for DMA of virtio_net_config struct


Looks like the structure is not specific to net, if yes, we may tweak 
the above comment to say it's the address of the device configuration space.


> + */
> +struct pds_vdpa_init_cmd {
> +	u8     opcode;
> +	u8     vdpa_index;
> +	__le16 vf_id;
> +	__le32 len;
> +	__le64 config_pa;
> +};
> +
> +/**
> + * struct pds_vdpa_ident - vDPA identification data
> + * @hw_features:	vDPA features supported by device
> + * @max_vqs:		max queues available (2 queues for a single queuepair)
> + * @max_qlen:		log(2) of maximum number of descriptors
> + * @min_qlen:		log(2) of minimum number of descriptors


Note that is you have the plan to support packed virtqueue, the qlen is 
not necessarily the power of 2.


> + *
> + * This struct is used in a DMA block that is set up for the PDS_VDPA_CMD_IDENT
> + * transaction.  Set up the DMA block and send the address in the IDENT cmd
> + * data, the DSC will write the ident information, then we can remove the DMA
> + * block after reading the answer.  If the completion status is 0, then there
> + * is valid information, else there was an error and the data should be invalid.
> + */
> +struct pds_vdpa_ident {
> +	__le64 hw_features;
> +	__le16 max_vqs;
> +	__le16 max_qlen;
> +	__le16 min_qlen;
> +};
> +
> +/**
> + * struct pds_vdpa_ident_cmd - IDENT command
> + * @opcode:	Opcode PDS_VDPA_CMD_IDENT
> + * @rsvd:       Word boundary padding
> + * @vf_id:	VF id
> + * @len:	length of ident info DMA space
> + * @ident_pa:	address for DMA of ident info (struct pds_vdpa_ident)
> + *			only used for this transaction, then forgotten by DSC
> + */
> +struct pds_vdpa_ident_cmd {
> +	u8     opcode;
> +	u8     rsvd;
> +	__le16 vf_id;
> +	__le32 len;
> +	__le64 ident_pa;
> +};
> +
> +/**
> + * struct pds_vdpa_status_cmd - STATUS_UPDATE command
> + * @opcode:	Opcode PDS_VDPA_CMD_STATUS_UPDATE
> + * @vdpa_index: Index for vdpa subdevice
> + * @vf_id:	VF id
> + * @status:	new status bits
> + */
> +struct pds_vdpa_status_cmd {
> +	u8     opcode;
> +	u8     vdpa_index;
> +	__le16 vf_id;
> +	u8     status;
> +};
> +
> +/**
> + * enum pds_vdpa_attr - List of VDPA device attributes
> + * @PDS_VDPA_ATTR_MAC:          MAC address
> + * @PDS_VDPA_ATTR_MAX_VQ_PAIRS: Max virtqueue pairs
> + */
> +enum pds_vdpa_attr {
> +	PDS_VDPA_ATTR_MAC          = 1,
> +	PDS_VDPA_ATTR_MAX_VQ_PAIRS = 2,
> +};
> +
> +/**
> + * struct pds_vdpa_setattr_cmd - SET_ATTR command
> + * @opcode:		Opcode PDS_VDPA_CMD_SET_ATTR
> + * @vdpa_index:		Index for vdpa subdevice
> + * @vf_id:		VF id
> + * @attr:		attribute to be changed (enum pds_vdpa_attr)
> + * @pad:		Word boundary padding
> + * @mac:		new mac address to be assigned as vdpa device address
> + * @max_vq_pairs:	new limit of virtqueue pairs
> + */
> +struct pds_vdpa_setattr_cmd {
> +	u8     opcode;
> +	u8     vdpa_index;
> +	__le16 vf_id;
> +	u8     attr;
> +	u8     pad[3];
> +	union {
> +		u8 mac[6];
> +		__le16 max_vq_pairs;


So does this mean if we want to set both mac and max_vq_paris, we need 
two commands? The seems to be less efficient, since the mgmt layer could 
provision more attributes here. Can we pack all attributes into a single 
command?


> +	} __packed;
> +};
> +
> +/**
> + * struct pds_vdpa_vq_init_cmd - queue init command
> + * @opcode: Opcode PDS_VDPA_CMD_VQ_INIT
> + * @vdpa_index:	Index for vdpa subdevice
> + * @vf_id:	VF id
> + * @qid:	Queue id (bit0 clear = rx, bit0 set = tx, qid=N is ctrlq)


I wonder any reason we need to design it like this, it would be better 
to make it general to be used by other type of virtio devices.


> + * @len:	log(2) of max descriptor count
> + * @desc_addr:	DMA address of descriptor area
> + * @avail_addr:	DMA address of available descriptors (aka driver area)
> + * @used_addr:	DMA address of used descriptors (aka device area)
> + * @intr_index:	interrupt index


Is this something like MSI-X vector?


> + */
> +struct pds_vdpa_vq_init_cmd {
> +	u8     opcode;
> +	u8     vdpa_index;
> +	__le16 vf_id;
> +	__le16 qid;
> +	__le16 len;
> +	__le64 desc_addr;
> +	__le64 avail_addr;
> +	__le64 used_addr;
> +	__le16 intr_index;
> +};
> +
> +/**
> + * struct pds_vdpa_vq_init_comp - queue init completion
> + * @status:	Status of the command (enum pds_core_status_code)
> + * @hw_qtype:	HW queue type, used in doorbell selection
> + * @hw_qindex:	HW queue index, used in doorbell selection
> + * @rsvd:	Word boundary padding
> + * @color:	Color bit


More comment is needed to know the how to use this color bit.


> + */
> +struct pds_vdpa_vq_init_comp {
> +	u8     status;
> +	u8     hw_qtype;
> +	__le16 hw_qindex;
> +	u8     rsvd[11];
> +	u8     color;
> +};
> +
> +/**
> + * struct pds_vdpa_vq_reset_cmd - queue reset command
> + * @opcode:	Opcode PDS_VDPA_CMD_VQ_RESET


Is there a chance that we could have more type of opcode here?

Thanks


> + * @vdpa_index:	Index for vdpa subdevice
> + * @vf_id:	VF id
> + * @qid:	Queue id
> + */
> +struct pds_vdpa_vq_reset_cmd {
> +	u8     opcode;
> +	u8     vdpa_index;
> +	__le16 vf_id;
> +	__le16 qid;
> +};
> +
> +/**
> + * struct pds_vdpa_set_features_cmd - set hw features
> + * @opcode: Opcode PDS_VDPA_CMD_SET_FEATURES
> + * @vdpa_index:	Index for vdpa subdevice
> + * @vf_id:	VF id
> + * @rsvd:       Word boundary padding
> + * @features:	Feature bit mask
> + */
> +struct pds_vdpa_set_features_cmd {
> +	u8     opcode;
> +	u8     vdpa_index;
> +	__le16 vf_id;
> +	__le32 rsvd;
> +	__le64 features;
> +};
> +
> +#endif /* _PDS_VDPA_IF_H_ */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ