linux-kernel - Re: [PATCH v4] NET: Add ezchip ethernet driver

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <557DE348.2030105@gmail.com>
Date:	Sun, 14 Jun 2015 13:25:44 -0700
From:	Florian Fainelli <f.fainelli@...il.com>
To:	Noam Camus <noamc@...hip.com>, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
CC:	Alexey.Brodkin@...opsys.com, vgupta@...opsys.com,
	giladb@...hip.com, cmetcalf@...hip.com,
	Tal Zilcer <talz@...hip.com>
Subject: Re: [PATCH v4] NET: Add ezchip ethernet driver

Le 06/13/15 23:26, Noam Camus a écrit :
> From: Noam Camus <noamc@...hip.com>
> 
> Simple LAN device for debug or management purposes.
> Device supports interrupts for RX and TX(completion).
> Device does not have DMA ability.
> 
> Signed-off-by: Noam Camus <noamc@...hip.com>
> Signed-off-by: Tal Zilcer <talz@...hip.com>
> Acked-by: Alexey Brodkin <abrodkin@...opsys.com>
> ---
> +	/* copy last bytes (if any) */
> +	if (last) {
> +		u32 buf = nps_enet_reg_get(priv, NPS_ENET_REG_RX_BUF);
> +
> +		memcpy(reg, &buf, last);

memcpy_fromio() for this entire function?

> +	}
> +}
> +
> +static void nps_enet_rx_handler(struct net_device *ndev)
> +{
> +	struct nps_enet_priv *priv = netdev_priv(ndev);
> +	struct sk_buff *skb = priv->irq_data.skb;
> +	struct nps_enet_rx_ctl rx_ctrl;
> +	u32 frame_len;
> +
> +	rx_ctrl.value = priv->irq_data.rx_ctrl.value;
> +	frame_len = rx_ctrl.nr;
> +	/* Check if we got RX */
> +	if (!rx_ctrl.cr)
> +		return;
> +
> +	ndev->stats.rx_packets++;
> +	ndev->stats.rx_bytes += frame_len;
> +
> +	netif_receive_skb(skb);

Not sure why nps_enet_rx_handler here does not do most of the processing
of what nps_enet_rx_prep() does, but see below:

> +}
> +
> +static s32 nps_enet_rx_prep(struct net_device *ndev)
> +{
> +	struct nps_enet_priv *priv = netdev_priv(ndev);
> +	struct sk_buff *skb;
> +	struct nps_enet_rx_ctl rx_ctrl;
> +	u32 frame_len, err = 0;
> +	s32 ret = 0;

You do not seem to be propagating anything different than 1 or 0, does
that need to be signed?

> +
> +	rx_ctrl.value = nps_enet_reg_get(priv, NPS_ENET_REG_RX_CTL);
> +	frame_len = rx_ctrl.nr;
> +
> +	/* Check if we got RX */
> +	if (!rx_ctrl.cr) {
> +		priv->irq_data.rx_ctrl.value = 0;
> +		return ret;
> +	}
> +
> +	/* Check Rx error */
> +	if (rx_ctrl.er) {
> +		ndev->stats.rx_errors++;
> +		err = 1;
> +	}
> +
> +	/* Check Rx CRC error */
> +	if (rx_ctrl.crc) {
> +		ndev->stats.rx_crc_errors++;
> +		ndev->stats.rx_dropped++;
> +		err = 1;
> +	}
> +
> +	/* Check Frame length Min 64b */
> +	if (unlikely(frame_len < ETH_ZLEN)) {
> +		ndev->stats.rx_length_errors++;
> +		ndev->stats.rx_dropped++;
> +		err = 1;
> +	}
> +
> +	if (err)
> +		goto rx_irq_clean;

Ok, so ret = 0 means errors and ret = 1 means success, that is pretty
counter intuititive to what other parts of the kernel do.

> +
> +	/* Skb allocation */
> +	skb = netdev_alloc_skb_ip_align(ndev, frame_len);
> +	if (unlikely(!skb)) {
> +		ndev->stats.rx_errors++;
> +		ndev->stats.rx_dropped++;
> +		goto rx_irq_clean;
> +	}
> +
> +	/* Copy frame from Rx fifo into the skb */
> +	nps_enet_read_rx_fifo(ndev, skb->data, frame_len);
> +
> +	skb_put(skb, frame_len);
> +	skb->dev = ndev;

eth_type_trans does that assignement for you already.

> +	skb->protocol = eth_type_trans(skb, ndev);
> +	skb->ip_summed = CHECKSUM_UNNECESSARY;
> +
> +	priv->irq_data.skb = skb;
> +	priv->irq_data.rx_ctrl.value = rx_ctrl.value;

So you are retaining the SKB here, called from hard interrupt context
and only processing it in nps_enet_rx_handler(), is there a particular
reason why you are not storing the rx_ctrl status word in top-half and
processing the SKB in bottom-half entirely instead? This needs to be
documented in your driver design.

> +	ret = 1;
> +	goto rx_irq_frame_done;
> +
> +rx_irq_clean:
> +	/* Clean Rx fifo */
> +	nps_enet_clean_rx_fifo(ndev, frame_len);
> +	priv->irq_data.rx_ctrl.value = 0;
> +
> +rx_irq_frame_done:
> +	/* Ack Rx ctrl register */
> +	nps_enet_reg_set(priv, NPS_ENET_REG_RX_CTL, 0);
> +
> +	return ret;
> +}
> +
> +static void nps_enet_tx_handler(struct net_device *ndev)
> +{
> +	struct nps_enet_priv *priv = netdev_priv(ndev);
> +	struct nps_enet_tx_ctl tx_ctrl;
> +
> +	tx_ctrl.value = priv->irq_data.tx_ctrl.value;
> +	/* Check if we got TX */
> +	if (!priv->tx_packet_sent || tx_ctrl.ct)
> +		return;
> +
> +	/* Check Tx transmit error */
> +	if (unlikely(tx_ctrl.et)) {
> +		ndev->stats.tx_errors++;
> +	} else {
> +		ndev->stats.tx_packets++;
> +		ndev->stats.tx_bytes += tx_ctrl.nt;
> +	}
> +
> +	/* In nps_enet_start_xmit we disabled sending frames*/
> +	netif_wake_queue(ndev);

You might want to explain why this is safe to do here, presumably since
this seems to be a FIFO with a single entry (is that the case?) you can
do that as soon as you get the TX completion interrupt, but this needs
to be documented.

> +
> +	priv->tx_packet_sent = false;
> +}
> +
> +static s32 nps_enet_tx_prep(struct net_device *ndev)
> +{
> +	struct nps_enet_priv *priv = netdev_priv(ndev);
> +	struct nps_enet_tx_ctl tx_ctrl;
> +	s32 ret = 0;
> +
> +	tx_ctrl.value = nps_enet_reg_get(priv, NPS_ENET_REG_TX_CTL);
> +	if (!tx_ctrl.ct && priv->tx_packet_sent) {
> +		/* Ack Tx ctrl register */
> +		nps_enet_reg_set(priv, NPS_ENET_REG_TX_CTL, 0);
> +		priv->irq_data.tx_ctrl.value = tx_ctrl.value;
> +		ret = 1;
> +	} else {
> +		priv->irq_data.tx_ctrl.value = 0;
> +	}
> +
> +	return ret;
> +}
> +
> +static s32 nps_enet_prep(struct net_device *ndev)
> +{
> +	s32 ret = 0;
> +
> +	ret |= nps_enet_tx_prep(ndev);
> +	ret |= nps_enet_rx_prep(ndev);

Ok, now it is a little clearer why these two functions return 0 for
error/nothing than 1, these should probably be named something like
nps_enet_tx_has_work() or something like that.

> +
> +	return ret;
> +}
> +
> +/**
> + * nps_enet_poll - NAPI poll handler.
> + * @napi:       Pointer to napi_struct structure.
> + * @budget:     How many frames to process on 1 call.
> + *
> + * returns:     Number of processed frames
> + */
> +static int nps_enet_poll(struct napi_struct *napi, int budget)
> +{
> +	struct net_device *ndev = napi->dev;
> +	struct nps_enet_priv *priv = netdev_priv(ndev);
> +	struct nps_enet_buf_int_enable buf_int_enable = {
> +		.rx_rdy = NPS_ENET_ENABLE,
> +		.tx_done = NPS_ENET_ENABLE,};
> +
> +	nps_enet_tx_handler(ndev);
> +	nps_enet_rx_handler(ndev);
> +	napi_complete(napi);

That is not so simple, tx_handler should not be bounded to a particular
budget, however your RX handlers should not exceed bugdet. If you
process less than budget, you can go ahead and call napi_complete to
exit ->poll() if not, then you need to poll again the RX queue for more
packets.

If you only have a capacity of one oustanding packet in your FIFO, you
would want to adjust your budget accordingly.

> +	nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE,
> +			 buf_int_enable.value);
> +
> +	return budget;
> +}
> +

[snip]

> +/**
> + * nps_enet_start_xmit - Starts the data transmission.
> + * @skb:        sk_buff pointer that contains data to be Transmitted.
> + * @ndev:       Pointer to net_device structure.
> + *
> + * returns: NETDEV_TX_OK, on success
> + *              NETDEV_TX_BUSY, if any of the descriptors are not free.
> + *
> + * This function is invoked from upper layers to initiate transmission.
> + */
> +static netdev_tx_t nps_enet_start_xmit(struct sk_buff *skb,
> +				       struct net_device *ndev)
> +{
> +	/* This driver handles one frame at a time  */
> +	netif_stop_queue(ndev);
> +
> +	nps_enet_send_frame(ndev, skb);
> +
> +	dev_kfree_skb(skb);

You do have a TX completion handler, so you should not be freeing the
SKB just now, instead you should retain a reference to it, limit to only
how many outstanding transmits as allowed by your hardware, and call
dev_kfree_skb() in your tx_handler() instead. Here you are lying to the
networking stack by telling it earlier than you should that the packet
has been transmitted.

> +
> +	return NETDEV_TX_OK;
> +}
> +
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> +static void nps_enet_poll_controller(struct net_device *ndev)
> +{
> +	disable_irq(ndev->irq);
> +	nps_enet_board_irq_handler(ndev->irq, ndev);
> +	enable_irq(ndev->irq);
> +}
> +#endif
> +
> +static const struct net_device_ops nps_netdev_ops = {
> +	.ndo_open		= nps_enet_open,
> +	.ndo_stop		= nps_enet_stop,
> +	.ndo_start_xmit		= nps_enet_start_xmit,
> +	.ndo_set_mac_address	= nps_enet_set_mac_address,
> +#ifdef CONFIG_NET_POLL_CONTROLLER
> +	.ndo_poll_controller	= nps_enet_poll_controller,
> +#endif

No set_rx_mode callback implemented at all?

> +};
> +
> +static s32 nps_enet_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct net_device *ndev;
> +	struct nps_enet_priv *priv;
> +	s32 err = 0;
> +	const char *mac_addr;
> +	struct resource res_regs;
> +
> +	if (!dev->of_node)
> +		return -ENODEV;
> +
> +	ndev = alloc_etherdev(sizeof(struct nps_enet_priv));
> +	if (!ndev)
> +		return -ENOMEM;
> +
> +	platform_set_drvdata(pdev, ndev);
> +	SET_NETDEV_DEV(ndev, dev);
> +	priv = netdev_priv(ndev);
> +
> +	/* Get ENET registers base address from device tree */
> +	err = of_address_to_resource(dev->of_node, 0, &res_regs);
> +	if (err) {
> +		dev_err(dev, "failed to retrieve registers base from device tree\n");
> +		err = -ENODEV;
> +		goto out_netdev;
> +	}

You could do platform_get_resource() here and save a check against this,
since devm_ioremap_resource() already checks for the resource pointer
correctness.

> +
> +	/* The EZ NET specific entries in the device structure. */
> +	ndev->netdev_ops = &nps_netdev_ops;
> +	ndev->watchdog_timeo = (400 * HZ / 1000);
> +	ndev->flags &= ~IFF_MULTICAST;

Since you are disabling multicast at the hardware level, it still seems
like this is something that could be fixed in the future by properly
adding support for this, even though that means filtering mutlicast
groups entirely in software.

> +
> +	priv->regs_base = devm_ioremap_resource(dev, &res_regs);
> +	if (IS_ERR(priv->regs_base)) {
> +		err = PTR_ERR(priv->regs_base);
> +		goto out_netdev;
> +	}
> +	dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs_base);
> +
> +	/* set kernel MAC address to dev */
> +	mac_addr = of_get_mac_address(dev->of_node);
> +	if (mac_addr)
> +		ether_addr_copy(ndev->dev_addr, mac_addr);
> +	else
> +		eth_hw_addr_random(ndev);
> +
> +	/* Get IRQ number from device tree */
> +	priv->irq = irq_of_parse_and_map(dev->of_node, 0);
> +	if (!priv->irq) {
> +		dev_err(dev, "failed to retrieve <irq Rx-Tx> value from device tree\n");
> +		err = -ENODEV;
> +		goto out_netdev;
> +	}

platform_get_irq() is a little shorter and friendlier towards non-DT
platforms (just in case).

> +
> +	netif_napi_add(ndev, &priv->napi, nps_enet_poll, NAPI_POLL_WEIGHT);
> +
> +	/* Register the driver. Should be the last thing in probe */
> +	err = register_netdev(ndev);
> +	if (err) {
> +		dev_err(dev, "Failed to register ndev for %s, err = 0x%08x\n",
> +			ndev->name, (s32)err);
> +		err = -ENODEV;
> +		goto out_netif_api;
> +	}
> +
> +	dev_info(dev, "(rx/tx=%d)\n", priv->irq);

You are still taking an error path here, you probably want to return
here with 0.

> +
> +out_netif_api:
> +	netif_napi_del(&priv->napi);
> +out_netdev:
> +	if (err)
> +		free_netdev(ndev);
> +
> +	return 0;
> +}

-- 
Florian
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/