netdev - Re: [PATCH net-next] eea: Add basic driver framework for Alibaba Elastic Ethernet Adaptor

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7b957110-c675-438a-b0c2-ebc161a5d8e7@lunn.ch>
Date: Thu, 10 Jul 2025 15:45:38 +0200
From: Andrew Lunn <andrew@...n.ch>
To: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
Cc: netdev@...r.kernel.org, Andrew Lunn <andrew+netdev@...n.ch>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
	Wen Gu <guwen@...ux.alibaba.com>,
	Philo Lu <lulie@...ux.alibaba.com>,
	Lorenzo Bianconi <lorenzo@...nel.org>,
	Lukas Bulwahn <lukas.bulwahn@...hat.com>,
	Parthiban Veerasooran <Parthiban.Veerasooran@...rochip.com>,
	Geert Uytterhoeven <geert+renesas@...der.be>,
	Alexander Duyck <alexanderduyck@...com>,
	Dust Li <dust.li@...ux.alibaba.com>
Subject: Re: [PATCH net-next] eea: Add basic driver framework for Alibaba
 Elastic Ethernet Adaptor

> +module_param(aq_timeout, uint, 0644);

No module params please.

> +struct eea_aq_host_info_cfg {
> +#ifndef EEA_OS_DISTRO
> +#define EEA_OS_DISTRO		0
> +#endif
> +
> +#ifndef EEA_DRV_TYPE
> +#define EEA_DRV_TYPE		0
> +#endif
> +
> +#define EEA_OS_LINUX		1
> +#define EEA_SPEC_VER_MAJOR	1
> +#define EEA_SPEC_VER_MINOR	0
> +	__le16	os_type;        /* Linux, Win.. */
> +	__le16	os_dist;
> +	__le16	drv_type;
> +
> +	__le16	kern_ver_major;
> +	__le16	kern_ver_minor;
> +	__le16	kern_ver_sub_minor;
> +
> +	__le16	drv_ver_major;
> +	__le16	drv_ver_minor;
> +	__le16	drv_ver_sub_minor;
> +
> +	__le16	spec_ver_major;
> +	__le16	spec_ver_minor;
> +	__le16	pci_bdf;
> +	__le32	pci_domain;
> +
> +	u8      os_ver_str[64];
> +	u8      isa_str[64];

Why does it care about the OS, kernel version etc?

> +#define DMA_FLAGS (GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO)

Don't hide GFP_ flags behind a #define. It makes the code harder to
review.

> +	start = get_jiffies_64();
> +	while (!(cdesc = ering_cq_get_desc(enet->adminq.ring))) {
> +		cond_resched();
> +		cpu_relax();
> +
> +		timeout = secs_to_jiffies(READ_ONCE(aq_timeout));
> +		if (time_after64(get_jiffies_64(), start + timeout)) {
> +			netdev_err(enet->netdev, "admin queue timeout. timeout %d\n",
> +				   READ_ONCE(aq_timeout));
> +			return -1;
> +		}
> +	}

See if you can one of the macros from iopoll.h

> +static void eea_get_drvinfo(struct net_device *netdev,
> +			    struct ethtool_drvinfo *info)
> +{
> +	struct eea_net *enet = netdev_priv(netdev);
> +	struct eea_device *edev = enet->edev;
> +
> +	strscpy(info->driver,   KBUILD_MODNAME,     sizeof(info->driver));
> +	strscpy(info->bus_info, eea_pci_name(edev), sizeof(info->bus_info));
> +	snprintf(info->version, sizeof(info->version), "%d.%d.%d",
> +		 EEA_VER_MAJOR, EEA_VER_MINOR, EEA_VER_SUB_MINOR);

A hard coded version is pointless, because it never changes, yet the
kernel around the driver changes every week. Don't set version, and
the core will fill in the git hash, which is useful.

> +static int eea_get_link_ksettings(struct net_device *netdev,
> +				  struct ethtool_link_ksettings *cmd)
> +{
> +	struct eea_net *enet = netdev_priv(netdev);
> +
> +	cmd->base.speed  = enet->speed;
> +	cmd->base.duplex = enet->duplex;
> +	cmd->base.port   = PORT_OTHER;
> +
> +	return 0;
> +}
> +
> +static int eea_set_link_ksettings(struct net_device *netdev,
> +				  const struct ethtool_link_ksettings *cmd)
> +{
> +	return 0;

-EOPNOTSUPP. 

> +module_param(split_hdr_size, int, 0644);

No module params please.

> +static int eea_netdev_init_features(struct net_device *netdev,
> +				    struct eea_net *enet,
> +				    struct eea_device *edev)
> +{
> +	struct eea_aq_cfg *cfg __free(kfree) = NULL;
> +	int err;
> +	u32 mtu;
> +
> +	cfg = kmalloc(sizeof(*cfg), GFP_KERNEL);
> +
> +	err = eea_adminq_query_cfg(enet, cfg);
> +
> +	if (err)
> +		return err;
> +
> +	eea_update_cfg(enet, edev, cfg);
> +
> +	netdev->priv_flags |= IFF_UNICAST_FLT;
> +	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
> +
> +	netdev->hw_features |= NETIF_F_HW_CSUM;
> +	netdev->hw_features |= NETIF_F_GRO_HW;
> +	netdev->hw_features |= NETIF_F_SG;
> +	netdev->hw_features |= NETIF_F_TSO;
> +	netdev->hw_features |= NETIF_F_TSO_ECN;
> +	netdev->hw_features |= NETIF_F_TSO6;
> +
> +	netdev->features |= NETIF_F_HIGHDMA;
> +	netdev->features |= NETIF_F_HW_CSUM;
> +	netdev->features |= NETIF_F_SG;
> +	netdev->features |= NETIF_F_GSO_ROBUST;
> +	netdev->features |= netdev->hw_features & NETIF_F_ALL_TSO;
> +	netdev->features |= NETIF_F_RXCSUM;
> +	netdev->features |= NETIF_F_GRO_HW;
> +
> +	netdev->vlan_features = netdev->features;
> +
> +	eth_hw_addr_set(netdev, cfg->mac);
> +
> +	enet->speed = SPEED_UNKNOWN;
> +	enet->duplex = DUPLEX_UNKNOWN;
> +
> +	netdev->min_mtu = ETH_MIN_MTU;
> +
> +	mtu = le16_to_cpu(cfg->mtu);
> +	if (mtu < netdev->min_mtu) {
> +		dev_err(edev->dma_dev, "device MTU too small. %d < %d", mtu, netdev->min_mtu);
> +		return -EINVAL;
> +	}
> +
> +	netdev->mtu = mtu;
> +	netdev->max_mtu = mtu;

Setting mtu the same as max_mtu is unusual? Are you defaulting to jumbo? 

> +	netif_carrier_on(netdev);

Does the firmware give you no idea about carrier? You seem to have it
hard coded in a number of places.

> +static struct eea_net *eea_netdev_alloc(struct eea_device *edev, u32 pairs)
> +{
> +	struct net_device *netdev;
> +	struct eea_net *enet;
> +
> +	netdev = alloc_etherdev_mq(sizeof(struct eea_net), pairs);
> +	if (!netdev) {
> +		dev_warn(edev->dma_dev, "alloc_etherdev_mq failed with pairs %d\n", pairs);

dev_warn()? That is fatal, so dev_err(). Please only use dev_warn()
for something which you can recover from.

> +int eea_net_probe(struct eea_device *edev)
> +{
> +	struct eea_net *enet;
> +	int err = -ENOMEM;
> +
> +	if (edev->ha_reset)
> +		return eea_net_reprobe(edev);
> +
> +	enet = eea_netdev_alloc(edev, edev->rx_num);
> +	if (!enet)
> +		return -ENOMEM;
> +
> +	err = eea_create_adminq(enet, edev->rx_num + edev->tx_num);
> +	if (err)
> +		goto err_adminq;
> +
> +	err = eea_adminq_config_host_info(enet);
> +	if (err)
> +		goto err_hinfo;
> +
> +	err = eea_netdev_init_features(enet->netdev, enet, edev);
> +	if (err)
> +		goto err_feature;
> +
> +	err = register_netdev(enet->netdev);
> +	if (err)
> +		goto err_ready;
> +
> +	eea_update_ts_off(edev, enet);
> +	netif_carrier_off(enet->netdev);
> +
> +	netdev_info(enet->netdev, "eea probe success.\n");

netdev_dbg() or nothing. Don't spam the kernel log.

> +#define cfg_write64(reg, item, val) { \
> +	void *_r = reg; \
> +	iowrite64_twopart(val, \
> +			  cfg_pointer(_r, item ## _lo), \
> +			  cfg_pointer(_r, item ## _hi)); \
> +}

This might be better as a function, so you get better type checking.

> +static inline void iowrite64_twopart(u64 val, __le32 __iomem *lo,
> +				     __le32 __iomem *hi)

No inline functions in .c files. Let the compiler decide.

> +void eea_device_reset(struct eea_device *edev)
> +{
> +	struct eea_pci_device *ep_dev = edev->ep_dev;
> +	int i;
> +
> +	eea_pci_io_set_status(edev, 0);
> +
> +	while (eea_pci_io_get_status(edev))
> +		msleep(20);

No endless loops. iopoll.h

> +void __force *eea_pci_db_addr(struct eea_device *edev, u32 off)
> +{
> +	return (void __force *)edev->ep_dev->db_base + off;
> +}

When i see __force i start to wounder if the types are wrong. This
probably needs a comment.

> +/* ha handle code */
> +static void eea_ha_handle_work(struct work_struct *work)
> +{
> +	struct eea_pci_device *ep_dev;
> +	struct eea_device *edev;
> +	struct pci_dev *pci_dev;
> +	u16 reset;
> +
> +	ep_dev = container_of(work, struct eea_pci_device, ha_handle_work);
> +	edev = &ep_dev->edev;
> +
> +	dev_warn(&ep_dev->pci_dev->dev, "recv ha interrupt.\n");

What does a ha interrupt mean? Why is a dev_warn() needed? Some
comments would be good.

> +
> +	if (ep_dev->reset_pos) {
> +		pci_read_config_word(ep_dev->pci_dev, ep_dev->reset_pos, &reset);
> +		/* clear bit */
> +		pci_write_config_word(ep_dev->pci_dev, ep_dev->reset_pos, 0xFFFF);
> +
> +		if (reset & EEA_PCI_CAP_RESET_FLAG) {
> +			dev_warn(&ep_dev->pci_dev->dev, "recv device reset request.\n");
> +
> +			pci_dev = ep_dev->pci_dev;
> +
> +			if (mutex_trylock(&edev->ha_lock)) {

Maybe add a comment why you use trylock(). Who else might be holding
the lock, and why cannot you wait for it to be release?

> +				edev->ha_reset = true;
> +
> +				__eea_pci_remove(pci_dev, false);
> +				__eea_pci_probe(pci_dev, ep_dev);
> +
> +				edev->ha_reset = false;
> +				mutex_unlock(&edev->ha_lock);
> +			} else {
> +				dev_warn(&ep_dev->pci_dev->dev,
> +					 "ha device reset: trylock failed.\n");
> +			}
> +			return;
> +		}
> +	}
> +
> +	eea_queues_check_and_reset(&ep_dev->edev);
> +}
> +
> +static irqreturn_t eea_pci_ha_handle(int irq, void *data)
> +{
> +	struct eea_device *edev = data;
> +
> +	schedule_work(&edev->ep_dev->ha_handle_work);
> +
> +	return IRQ_HANDLED;

Maybe just use a threaded interrupt? What will make the code simpler.

> +static int eea_pci_ha_init(struct eea_device *edev, struct pci_dev *pci_dev)
> +{
> +	u8 pos, cfg_type_off, type, cfg_drv_off, cfg_dev_off;
> +	struct eea_pci_device *ep_dev = edev->ep_dev;
> +	int irq;
> +
> +	cfg_type_off = offsetof(struct eea_pci_cap, cfg_type);
> +	cfg_drv_off = offsetof(struct eea_pci_reset_reg, driver);
> +	cfg_dev_off = offsetof(struct eea_pci_reset_reg, device);
> +
> +	for (pos = pci_find_capability(pci_dev, PCI_CAP_ID_VNDR);
> +	     pos > 0;
> +	     pos = pci_find_next_capability(pci_dev, pos, PCI_CAP_ID_VNDR)) {
> +		pci_read_config_byte(pci_dev, pos + cfg_type_off, &type);
> +
> +		if (type == EEA_PCI_CAP_RESET_DEVICE) {
> +			/* notify device, driver support this feature. */
> +			pci_write_config_word(pci_dev, pos + cfg_drv_off, EEA_PCI_CAP_RESET_FLAG);
> +			pci_write_config_word(pci_dev, pos + cfg_dev_off, 0xFFFF);
> +
> +			edev->ep_dev->reset_pos = pos + cfg_dev_off;
> +			goto found;
> +		}
> +	}
> +
> +	dev_warn(&edev->ep_dev->pci_dev->dev, "Not Found reset cap.\n");
> +

Should there be an return -ENODEV; here?

> +found:
> +	snprintf(ep_dev->ha_irq_name, sizeof(ep_dev->ha_irq_name), "eea-ha@%s",
> +		 pci_name(ep_dev->pci_dev));
> +
> +	irq = pci_irq_vector(ep_dev->pci_dev, 0);
> +
> +	INIT_WORK(&ep_dev->ha_handle_work, eea_ha_handle_work);
> +
> +	return request_irq(irq, eea_pci_ha_handle, 0, ep_dev->ha_irq_name, edev);
> +}
> +static inline bool ering_irq_unactive(struct ering *ering)
> +{
> +	union {
> +		u64 data;
> +		struct db db;
> +	} val;
> +
> +	if (ering->mask == EEA_IRQ_MASK)
> +		return true;
> +
> +	ering->mask = EEA_IRQ_MASK;
> +
> +	val.db.kick_flags = EEA_IRQ_MASK;
> +
> +	writeq(val.data, (void __iomem *)ering->db);
> +
> +	return true;
> +}
> +
> +static inline bool ering_irq_active(struct ering *ering, struct ering *tx_ering)
> +{
> +	union {
> +		u64 data;
> +		struct db db;
> +	} val;
> +
> +	if (ering->mask == EEA_IRQ_UNMASK)
> +		return true;
> +
> +	ering->mask = EEA_IRQ_UNMASK;
> +
> +	val.db.kick_flags = EEA_IRQ_UNMASK;
> +
> +	val.db.tx_cq_head = cpu_to_le16(tx_ering->cq.hw_idx);
> +	val.db.rx_cq_head = cpu_to_le16(ering->cq.hw_idx);
> +
> +	writeq(val.data, (void __iomem *)ering->db);
> +
> +	return true;
> +}
> +
> +static inline void *ering_cq_get_desc(const struct ering *ering)
> +{
> +	u8 phase;
> +	u8 *desc;
> +
> +	desc = ering->cq.desc + (ering->cq.head << ering->cq.desc_size_shift);
> +
> +	phase = *(u8 *)(desc + ering->cq.desc_size - 1);
> +
> +	if ((phase & ERING_DESC_F_CQ_PHASE)  == ering->cq.phase) {
> +		dma_rmb();
> +		return desc;
> +	}
> +
> +	return NULL;
> +}

These three should be in a .c file. They are too big for inline in a
header file.

	Andrew