linux-kernel - Re: [PATCH v4] lpc32xx: Added ethernet driver

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1330987524.2538.61.camel@bwh-desktop>
Date:	Mon, 5 Mar 2012 22:45:24 +0000
From:	Ben Hutchings <bhutchings@...arflare.com>
To:	Roland Stigge <stigge@...com.de>
CC:	<davem@...emloft.net>, <jeffrey.t.kirsher@...el.com>,
	<alexander.h.duyck@...el.com>, <eilong@...adcom.com>,
	<ian.campbell@...rix.com>, <netdev@...r.kernel.org>,
	<w.sang@...gutronix.de>, <linux-kernel@...r.kernel.org>,
	<kevin.wells@....com>, <linux-arm-kernel@...ts.infradead.org>,
	<arnd@...db.de>, <baruch@...s.co.il>, <joe@...ches.com>
Subject: Re: [PATCH v4] lpc32xx: Added ethernet driver

On Mon, 2012-03-05 at 22:40 +0100, Roland Stigge wrote:
[...]
> --- /dev/null
> +++ linux-2.6/drivers/net/ethernet/nxp/lpc_eth.c
[...]
> +/*
> + * Transmit timeout, default 2.5 seconds.
> + */
> +static int watchdog = 2500;
> +module_param(watchdog, int, 0400);
> +MODULE_PARM_DESC(watchdog, "transmit timeout in milliseconds");

Why does this need to be a parameter?

> +/*
> + * Structure of a TX/RX descriptors and RX status
> + */
> +struct txrx_desc_t {
> +	volatile u32 packet;
> +	volatile u32 control;
> +};
> +struct rx_status_t {
> +	volatile u32 statusinfo;
> +	volatile u32 statushashcrc;
> +};

Use of volatile here is probably wrong; see
Documentation/volatile-considered-harmful.txt.

> +/*
> + * Device driver data structure
> + */
> +struct netdata_local {
> +	struct platform_device	*pdev;
> +	struct net_device	*ndev;
> +	spinlock_t		lock;
> +	void __iomem		*net_base;
> +	u32			msg_enable;
> +	struct sk_buff		*skb[ENET_TX_DESC];
> +	unsigned int		last_tx_idx;
> +	unsigned int		num_used_tx_buffs;
> +	struct mii_bus		*mii_bus;
> +	struct phy_device	*phy_dev;
> +	struct clk		*clk;
> +	dma_addr_t		dma_buff_base_p;
> +	void			*dma_buff_base_v;
> +	size_t			dma_buff_size;
> +	struct txrx_desc_t	*tx_desc_v[ENET_TX_DESC];
> +	u32			*tx_stat_v[ENET_TX_DESC];
> +	void			*tx_buff_v[ENET_TX_DESC];
> +	struct txrx_desc_t	*rx_desc_v[ENET_RX_DESC];
> +	struct rx_status_t	*rx_stat_v[ENET_RX_DESC];
> +	void			*rx_buff_v[ENET_RX_DESC];

Why are these 6 members defined as arrays of pointers and not single
pointers to arrays?  It appears that each array element is initialised
to point into an array and then never changed; for example
tx_desc_v[i] == tx_desc_v[0] + i.

[...]
> +static void __lpc_eth_init(struct netdata_local *pldat)
> +{
[...]
> +	/* Clear and enable interrupts */
> +	writel(0xFFFF, LPC_ENET_INTCLEAR(pldat->net_base));
> +	lpc_eth_enable_int(pldat->net_base);
> +
> +	/* Get the next TX buffer output index */
> +	pldat->num_used_tx_buffs = 0;
> +	pldat->last_tx_idx =
> +		readl(LPC_ENET_TXCONSUMEINDEX(pldat->net_base));

Doesn't this need to be done *before* enabling interrupts?  Also, I
think you need an smp_wmb() so that the interrupt handler is guaranteed
to see all these writes.

[...]
> +static void lpc_handle_link_change(struct net_device *ndev)
> +{
> +	struct netdata_local *pldat = netdev_priv(ndev);
> +	struct phy_device *phydev = pldat->phy_dev;
> +	unsigned long flags;
> +
> +	int status_change = 0;

This is logically a boolean, so use bool/true/false.

[...]
> +static void __lpc_handle_xmit(struct net_device *ndev)
> +{
[...]
> +		/* Any errors occurred? */
> +		if (txstat & TXSTATUS_ERROR) {
> +			if (txstat & TXSTATUS_UNDERRUN) {
> +				/* FIFO underrun */
> +				ndev->stats.tx_fifo_errors++;
> +			}
> +			if (txstat & TXSTATUS_LATECOLL) {
> +				/* Late collision */
> +				ndev->stats.tx_aborted_errors++;
> +			}
> +			if (txstat & TXSTATUS_EXCESSCOLL) {
> +				/* Excessive collision */
> +				ndev->stats.tx_aborted_errors++;
> +			}
> +			if (txstat & TXSTATUS_EXCESSDEFER) {
> +				/* Defer limit */
> +				ndev->stats.tx_aborted_errors++;
> +			}
> +			ndev->stats.tx_errors++;
> +
> +			/* Buffer transmit failed, requeue it */
> +			lpc_eth_hard_start_xmit(skb, ndev);

It is not the responsibility of an Ethernet network driver to retry
transmission.  Also, this will deadlock on an SMP system since
lpc_eth_hard_start_xmit() will try to acquire pldat->lock which is
already held.  I suggest you test with lockdep in case there any other
such bugs which are being masked by running on a UP system.

[...]
> +static int __lpc_handle_recv(struct net_device *ndev, int budget)
> +{
[...]
> +			/* Packet is good */
> +			skb = dev_alloc_skb(len + 8);
> +			if (!skb)
> +				ndev->stats.rx_dropped++;
> +			else {
> +				skb_reserve(skb, 8);

What are you reserving this space for?

> +				prdbuf = skb_put(skb, (len - 0));

len - 0??

> +				/* Copy packet from buffer */
> +				memcpy(prdbuf,
> +					pldat->rx_buff_v[rxconsidx], len);
> +
> +				/* Pass to upper layer */
> +				skb->protocol = eth_type_trans(skb, ndev);
> +				netif_rx(skb);
> +				ndev->last_rx = jiffies;

Drivers don't need to set last_rx any more.

[...]
> +static int lpc_eth_poll(struct napi_struct *napi, int budget)
> +{
> +	struct netdata_local *pldat = container_of(napi,
> +			struct netdata_local, napi);
> +	struct net_device *ndev = pldat->ndev;
> +	unsigned long flags;
> +	int rx_done = 0;
> +
> +	spin_lock_irqsave(&pldat->lock, flags);
> +
> +	__lpc_handle_xmit(ndev);
> +	rx_done = __lpc_handle_recv(ndev, budget);
> +
> +	if (rx_done < budget) {
> +		napi_complete(napi);
> +		lpc_eth_enable_int(pldat->net_base);
> +	}
> +
> +	spin_unlock_irqrestore(&pldat->lock, flags);

This is really sad.  You implement NAPI but then take away most of the
benefits of that by disabling interrupts.

It looks like you could safely unlock pldat->lock before calling
__lpc_handle_recv - nothing else manipulates RX queue state so no lock
is required.

As for the TX side, you can probably use the TX queue lock
(__netif_tx_lock, __netif_tx_unlock) to serialise with
lpc_eth_hard_start_xmit() and avoid taking pldat->lock in either
__lpc_handle_xmit() or here.

[...]
> +	return rx_done;
> +}
> +
> +static irqreturn_t __lpc_eth_interrupt(int irq, void *dev_id)
> +{
> +	struct net_device *ndev = dev_id;
> +	struct netdata_local *pldat = netdev_priv(ndev);
> +	u32 tmp;
> +
> +	spin_lock(&pldat->lock);
> +
> +	tmp = readl(LPC_ENET_INTSTATUS(pldat->net_base));
> +	/* Clear interrupts */
> +	writel(tmp, LPC_ENET_INTCLEAR(pldat->net_base));
> +
> +	if (likely(napi_schedule_prep(&pldat->napi))) {
> +		lpc_eth_disable_int(pldat->net_base);
> +		__napi_schedule(&pldat->napi);
> +	}

I think you need to mask the interrupt unconditionally here.  See my
next comment.

> +	spin_unlock(&pldat->lock);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int lpc_eth_close(struct net_device *ndev)
> +{
> +	unsigned long flags;
> +	struct netdata_local *pldat = netdev_priv(ndev);
> +
> +	if (netif_msg_ifdown(pldat))
> +		dev_dbg(&pldat->pdev->dev, "shutting down %s\n", ndev->name);
> +
> +	napi_disable(&pldat->napi);

If an interrupt is received after this returns, napi_schedule_prep()
will return false and the interrupt will not be masked.

There is a similar problem in lpc_eth_open() where you enable interrupts
before NAPI.

> +	netif_stop_queue(ndev);
> +
> +	if (pldat->phy_dev)
> +		phy_stop(pldat->phy_dev);
> +
> +	spin_lock_irqsave(&pldat->lock, flags);
> +	__lpc_eth_reset(pldat);
> +	netif_carrier_off(ndev);
> +	writel(0, LPC_ENET_MAC1(pldat->net_base));
> +	writel(0, LPC_ENET_MAC2(pldat->net_base));
> +	spin_unlock_irqrestore(&pldat->lock, flags);
> +
> +	__lpc_eth_clock_enable(pldat, 0);
> +
> +	return 0;
> +}
[...]
> +static int lpc_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> +{
> +	struct netdata_local *pldat = netdev_priv(ndev);
> +	u32 len, txidx;
> +	u32 *ptxstat;
> +	struct txrx_desc_t *ptxrxdesc;
> +
> +	len = skb->len;
> +
> +	spin_lock_irq(&pldat->lock);
> +
> +	if (pldat->num_used_tx_buffs >= (ENET_TX_DESC - 1)) {
> +		/* This function should never be called when there are no
> +		   buffers, log the error */

So use WARN to make the error really obvious.

> +		netif_stop_queue(ndev);
> +		spin_unlock_irq(&pldat->lock);
> +		dev_err(&pldat->pdev->dev,
> +			"BUG! TX request when no free TX buffers!\n");
> +		return 1;

This is not a valid return value; you need to return NETDEV_TX_BUSY.

> +	}
> +
> +	/* Get the next TX descriptor index */
> +	txidx = readl(LPC_ENET_TXPRODUCEINDEX(pldat->net_base));
> +
> +	/* Setup control for the transfer */
> +	ptxstat = pldat->tx_stat_v[txidx];
> +	*ptxstat = 0;
> +	ptxrxdesc = pldat->tx_desc_v[txidx];
> +	ptxrxdesc->control =
> +		(len - 1) | TXDESC_CONTROL_LAST | TXDESC_CONTROL_INT;
> +
> +	/* Copy data to the DMA buffer */
> +	memcpy(pldat->tx_buff_v[txidx], skb->data, len);
> +
> +	/* Save the buffer and increment the buffer counter */
> +	pldat->skb[txidx] = skb;
> +	pldat->num_used_tx_buffs++;
> +
> +	/* Start transmit */
> +	txidx++;
> +	if (txidx >= ENET_TX_DESC)
> +		txidx = 0;
> +	writel(txidx, LPC_ENET_TXPRODUCEINDEX(pldat->net_base));
> +
> +	/* Stop queue if no more TX buffers */
> +	if (pldat->num_used_tx_buffs >= (ENET_TX_DESC - 1))
> +		netif_stop_queue(ndev);
> +
> +	spin_unlock_irq(&pldat->lock);
> +	ndev->trans_start = jiffies;

Drivers don't need to set trans_start any more.

> +	return 0;

NETDEV_TX_OK

> +}
[...]
> +static void lpc_eth_timeout(struct net_device *ndev)
> +{
> +	struct netdata_local *pldat = netdev_priv(ndev);
> +
> +	/* This should never happen and indicates a problem */
> +	dev_err(&pldat->pdev->dev, "BUG! TX timeout occurred!\n");
> +}

The networking core will print a big fat warning before calling this
function, so this message is redundant.  Also, timeout isn't necessarily
a bug; it can be caused by a misconfigured switch.

[...]
> +static int lpc_eth_ethtool_getsettings(struct net_device *ndev,
> +	struct ethtool_cmd *cmd)
> +{
> +	struct netdata_local *pldat = netdev_priv(ndev);
> +	struct phy_device *phydev = pldat->phy_dev;
> +
> +	if (!phydev)
> +		return -ENODEV;

-EOPNOTSUPP

> +	return phy_ethtool_gset(phydev, cmd);
> +}
> +
> +static int lpc_eth_ethtool_setsettings(struct net_device *ndev,
> +	struct ethtool_cmd *cmd)
> +{
> +	struct netdata_local *pldat = netdev_priv(ndev);
> +	struct phy_device *phydev = pldat->phy_dev;
> +
> +	if (!phydev)
> +		return -ENODEV;
[...]

-EOPNOTSUPP

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/