netdev - Re: [patch sungem] improved locking

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5cac192f0611291437g26fefebdocdacbb0cd2aa2fdd@mail.gmail.com>
Date:	Wed, 29 Nov 2006 23:37:05 +0100
From:	"Eric Lemoine" <eric.lemoine@...il.com>
To:	"David Miller" <davem@...emloft.net>
Cc:	netdev@...r.kernel.org, benh@...nel.crashing.org
Subject: Re: [patch sungem] improved locking

On 11/29/06, Eric Lemoine <eric.lemoine@...il.com> wrote:
> I think the following code for gem_interrupt should do the trick:
>
> static irqreturn_t gem_interrupt(int irq, void *dev_id)
> {
>     struct net_device *dev = dev_id;
>     struct gem *gp = dev->priv;
>     unsigned int handled = 1;
>
>     if (!gp->running)
>         goto out;
>
>     if (netif_rx_schedule_prep(dev)) {
>         u32 gem_status = gem_read_status(gp);
>         gem_disable_ints();
>
>         if (unlikely(!gem_status))
>             handled = 0;
>
>         if (gem_irq_sync(gp)) {
>             netif_poll_enable(dev);
>             goto out;
>         }
>
>         gp->status = gem_status;
>         __netif_rx_schedule(dev);
>     }
>
> out:
>     return IRQ_RETVAL(handled);
> }
>
> The important thing is: we __netif_rx_schedule even if gem_status is 0
> (shared irq case) because we don't want to miss events should the
> following scenario occur:
>
> CPU0                                                     CPU1
> gem interrupt
> prepare rx schedule
>                                                              gem_interrupt
>                                                              rx is
> already scheduled
> shared irq -> (we can miss NIC events
> if we don't __netif_rx_schedule before
> returning)

This new patch implements the above, and fixes a bug that was in the
previous patch. This bug made BUG_ON() in gem_irq_quiesce() trigger
when putting the link down while the device was closed.

Patch applies to current git net-2.6 (2.6.19-rc6). Dave, do you still
use that tree, or net-2.6.20 is your only working tree at this time?

Thanks,

--
Eric


Signed-off-by: Eric Lemoine <eric.lemoine@...il.com>

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 253e96e..1085c3d 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -9,26 +9,6 @@
  *
  * NAPI and NETPOLL support
  * (C) 2004 by Eric Lemoine (eric.lemoine@...il.com)
- *
- * TODO:
- *  - Now that the driver was significantly simplified, I need to rework
- *    the locking. I'm sure we don't need _2_ spinlocks, and we probably
- *    can avoid taking most of them for so long period of time (and schedule
- *    instead). The main issues at this point are caused by the netdev layer
- *    though:
- *
- *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
- *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to
keep a spinlock
- *    where it could have been dropped. change_mtu especially would
love also to
- *    be able to msleep instead of horrid locked delays when resetting the HW,
- *    but that read_lock() makes it impossible, unless I defer it's action to
- *    the reset task, which means it'll be asynchronous (won't take
effect until
- *    the system schedules a bit).
- *
- *    Also, it would probably be possible to also remove most of the long-life
- *    locking in open/resume code path (gem_reinit_chip) by beeing more careful
- *    about when we can start taking interrupts or get xmit() called...
  */

 #include <linux/module.h>
@@ -206,18 +186,66 @@ static inline void phy_write(struct gem
 	__phy_write(gp, gp->mii_phy_addr, reg, val);
 }

-static inline void gem_enable_ints(struct gem *gp)
+static inline void __gem_enable_ints(struct gem *gp)
 {
-	/* Enable all interrupts but TXDONE */
+	/* Enable all interrupts, but TXDONE */
 	writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }

+static inline void gem_enable_ints(struct gem *gp)
+{
+	gp->irq_sync = 0;
+	wmb();
+	__gem_enable_ints(gp);
+}
+
 static inline void gem_disable_ints(struct gem *gp)
 {
 	/* Disable all interrupts, including TXDONE */
 	writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
 }

+static inline u32 gem_read_status(struct gem *gp)
+{
+	return readl(gp->regs + GREG_STAT);
+}
+
+static inline void gem_netif_stop(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	dev->trans_start = jiffies; /* prevent tx timeout */
+	netif_poll_disable(dev);
+	netif_tx_disable(dev);
+}
+
+static inline void gem_netif_start(struct gem *gp)
+{
+	struct net_device *dev = gp->dev;
+
+	/* Unconditionnaly netif_wake_queue is ok so long as caller has
+	 * freed tx slots, whis done in gem_reinit_chip().
+	 */
+	netif_wake_queue(dev);
+	netif_poll_enable(dev);
+}
+
+static inline void gem_schedule_reset_task(struct gem *gp)
+{
+	gp->reset_task_pending = 1;
+	smp_mb();
+	schedule_work(&gp->reset_task);
+}
+
+static inline void gem_wait_reset_task(struct gem *gp)
+{
+	mb();
+	while (gp->reset_task_pending) {
+		yield();
+		mb();
+	}
+}
+
 static void gem_get_cell(struct gem *gp)
 {
 	BUG_ON(gp->cell_enabled < 0);
@@ -658,12 +686,20 @@ static int gem_abnormal_irq(struct net_d
 	return 0;

 do_reset:
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
+	gem_schedule_reset_task(gp);

 	return 1;
 }

+static inline u32 gem_tx_avail(struct gem *gp)
+{
+	smp_mb();
+	return (gp->tx_old <= gp->tx_new) ?
+		gp->tx_old + (TX_RING_SIZE - 1) - gp->tx_new :
+		gp->tx_old - gp->tx_new - 1;
+}
+
+
 static __inline__ void gem_tx(struct net_device *dev, struct gem *gp,
u32 gem_status)
 {
 	int entry, limit;
@@ -717,11 +753,20 @@ static __inline__ void gem_tx(struct net
 		gp->net_stats.tx_packets++;
 		dev_kfree_skb_irq(skb);
 	}
+
 	gp->tx_old = entry;

-	if (netif_queue_stopped(dev) &&
-	    TX_BUFFS_AVAIL(gp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dev);
+	/* Need to make tx_old update visible to gem_start_xmit() */
+	smp_mb();
+
+	if (unlikely(netif_queue_stopped(dev) &&
+	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))) {
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) &&
+	    	    gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+		netif_tx_unlock(dev);
+	}
 }

 static __inline__ void gem_post_rxds(struct gem *gp, int limit)
@@ -882,12 +927,6 @@ static int gem_rx(struct gem *gp, int wo
 static int gem_poll(struct net_device *dev, int *budget)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	/*
-	 * NAPI locking nightmare: See comment at head of driver
-	 */
-	spin_lock_irqsave(&gp->lock, flags);

 	do {
 		int work_to_do, work_done;
@@ -899,19 +938,10 @@ static int gem_poll(struct net_device *d
 		}

 		/* Run TX completion thread */
-		spin_lock(&gp->tx_lock);
 		gem_tx(dev, gp, gp->status);
-		spin_unlock(&gp->tx_lock);

-		spin_unlock_irqrestore(&gp->lock, flags);
-
-		/* Run RX thread. We don't use any locking here,
-		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
-		 * schedule_timeout()'s if polling is already disabled.
-		 */
+		/* Run RX thread */
 		work_to_do = min(*budget, dev->quota);
-
 		work_done = gem_rx(gp, work_to_do);

 		*budget -= work_done;
@@ -920,53 +950,90 @@ static int gem_poll(struct net_device *d
 		if (work_done >= work_to_do)
 			return 1;

-		spin_lock_irqsave(&gp->lock, flags);
-
-		gp->status = readl(gp->regs + GREG_STAT);
+		gp->status = gem_read_status(gp);
 	} while (gp->status & GREG_STAT_NAPI);

 	__netif_rx_complete(dev);
-	gem_enable_ints(gp);
+	__gem_enable_ints(gp);

-	spin_unlock_irqrestore(&gp->lock, flags);
 	return 0;
 }

+static void gem_irq_quiesce(struct gem *gp)
+{
+	BUG_ON(gp->irq_sync);
+
+	gp->irq_sync = 1;
+	smp_mb();
+
+	synchronize_irq(gp->pdev->irq);
+}
+
+static inline int gem_irq_sync(struct gem *gp)
+{
+	return gp->irq_sync;
+}
+
+static inline void gem_full_lock(struct gem *gp, int irq_sync)
+{
+	spin_lock_bh(&gp->lock);
+	if (irq_sync)
+		gem_irq_quiesce(gp);
+}
+
+static inline void gem_full_unlock(struct gem *gp)
+{
+	spin_unlock_bh(&gp->lock);
+}
+
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct gem *gp = dev->priv;
-	unsigned long flags;
+	unsigned int handled = 1;

 	/* Swallow interrupts when shutting the chip down, though
 	 * that shouldn't happen, we should have done free_irq() at
 	 * this point...
 	 */
 	if (!gp->running)
-		return IRQ_HANDLED;
-
-	spin_lock_irqsave(&gp->lock, flags);
+		goto out;

 	if (netif_rx_schedule_prep(dev)) {
-		u32 gem_status = readl(gp->regs + GREG_STAT);
+		u32 gem_status;
+		
+		gem_status = gem_read_status(gp);
+		if (unlikely(!gem_status)) {
+			/* Shared IRQ. We still want to __netif_rx_schedule
+			 * here, because we don't want to miss NIC events
+			 * should the following scenario occur:
+			 *
+			 * CPU0				CPU1
+			 * interrupt
+			 * prepare rx schedule
+			 * 				interrupt
+			 * 				rx is already scheduled
+			 * gem_status is 0 (we can
+			 * miss NIC events if we
+			 * don't __netif_rx_schedule
+			 * and return right away here)
+			 */
+			handled = 0;
+		}
+
+		gem_disable_ints(gp);

-		if (gem_status == 0) {
+		if (gem_irq_sync(gp)) {
 			netif_poll_enable(dev);
-			spin_unlock_irqrestore(&gp->lock, flags);
-			return IRQ_NONE;
+			goto out;
 		}
+
 		gp->status = gem_status;
-		gem_disable_ints(gp);
 		__netif_rx_schedule(dev);
 	}

-	spin_unlock_irqrestore(&gp->lock, flags);
-
-	/* If polling was disabled at the time we received that
-	 * interrupt, we may return IRQ_HANDLED here while we
-	 * should return IRQ_NONE. No big deal...
-	 */
-	return IRQ_HANDLED;
+out:
+	return IRQ_RETVAL(handled);
 }

 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -999,14 +1066,7 @@ static void gem_tx_timeout(struct net_de
 	       readl(gp->regs + MAC_RXSTAT),
 	       readl(gp->regs + MAC_RXCFG));

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
-	gp->reset_task_pending = 1;
-	schedule_work(&gp->reset_task);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_schedule_reset_task(gp);
 }

 static __inline__ int gem_intme(int entry)
@@ -1023,7 +1083,6 @@ static int gem_start_xmit(struct sk_buff
 	struct gem *gp = dev->priv;
 	int entry;
 	u64 ctrl;
-	unsigned long flags;

 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1037,24 +1096,13 @@ static int gem_start_xmit(struct sk_buff
 			(csum_stuff_off << 21));
 	}

-	local_irq_save(flags);
-	if (!spin_trylock(&gp->tx_lock)) {
-		/* Tell upper layer to requeue */
-		local_irq_restore(flags);
-		return NETDEV_TX_LOCKED;
-	}
-	/* We raced with gem_do_stop() */
-	if (!gp->running) {
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
 	/* This is a hard error, log it. */
-	if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-		netif_stop_queue(dev);
-		spin_unlock_irqrestore(&gp->tx_lock, flags);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
+	if (gem_tx_avail(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		return NETDEV_TX_BUSY;
 	}

@@ -1131,15 +1179,17 @@ static int gem_start_xmit(struct sk_buff
 	}

 	gp->tx_new = entry;
-	if (TX_BUFFS_AVAIL(gp) <= (MAX_SKB_FRAGS + 1))
+	if (unlikely(gem_tx_avail(gp) <= (MAX_SKB_FRAGS + 1))) {
 		netif_stop_queue(dev);
+		if (gem_tx_avail(gp) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(dev);
+	}

 	if (netif_msg_tx_queued(gp))
 		printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
 		       dev->name, entry, skb->len);
 	mb();
 	writel(gp->tx_new, gp->regs + TXDMA_KICK);
-	spin_unlock_irqrestore(&gp->tx_lock, flags);

 	dev->trans_start = jiffies;

@@ -1148,7 +1198,6 @@ static int gem_start_xmit(struct sk_buff

 #define STOP_TRIES 32

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reset(struct gem *gp)
 {
 	int limit;
@@ -1174,7 +1223,6 @@ static void gem_reset(struct gem *gp)
 		printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_start_dma(struct gem *gp)
 {
 	u32 val;
@@ -1197,9 +1245,7 @@ static void gem_start_dma(struct gem *gp
 	writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. DMA won't be
- * actually stopped before about 4ms tho ...
- */
+/* DMA won't be actually stopped before about 4ms tho ... */
 static void gem_stop_dma(struct gem *gp)
 {
 	u32 val;
@@ -1220,8 +1266,7 @@ static void gem_stop_dma(struct gem *gp)
 }


-/* Must be invoked under gp->lock and gp->tx_lock. */
-// XXX dbl check what that function should do when called on PCS PHY
+/* XXX dbl check what that function should do when called on PCS PHY */
 static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
@@ -1306,8 +1351,6 @@ non_mii:

 /* A link-up condition has occurred, initialize and enable the
  * rest of the chip.
- *
- * Must be invoked under gp->lock and gp->tx_lock.
  */
 static int gem_set_link_modes(struct gem *gp)
 {
@@ -1417,7 +1460,6 @@ static int gem_set_link_modes(struct gem
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static int gem_mdio_link_not_up(struct gem *gp)
 {
 	switch (gp->lstate) {
@@ -1474,13 +1516,13 @@ static void gem_link_timer(unsigned long
 	if (gp->asleep)
 		return;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);

 	/* If the reset task is still pending, we just
 	 * reschedule the link timer
 	 */
+	smp_mb();
 	if (gp->reset_task_pending)
 		goto restart;

@@ -1528,8 +1570,7 @@ static void gem_link_timer(unsigned long
 				printk(KERN_INFO "%s: Link down\n",
 					gp->dev->name);
 			netif_carrier_off(gp->dev);
-			gp->reset_task_pending = 1;
-			schedule_work(&gp->reset_task);
+			gem_schedule_reset_task(gp);
 			restart_aneg = 1;
 		} else if (++gp->timer_ticks > 10) {
 			if (found_mii_phy(gp))
@@ -1546,11 +1587,9 @@ restart:
 	mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
 out_unlock:
 	gem_put_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_clean_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1601,7 +1640,6 @@ static void gem_clean_rings(struct gem *
 	}
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_rings(struct gem *gp)
 {
 	struct gem_init_block *gb = gp->init_block;
@@ -1775,12 +1813,9 @@ #endif
 	netif_carrier_off(gp->dev);

 	/* Can I advertise gigabit here ? I'd need BCM PHY docs... */
-	spin_lock_irq(&gp->lock);
 	gem_begin_auto_negotiation(gp, NULL);
-	spin_unlock_irq(&gp->lock);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_dma(struct gem *gp)
 {
 	u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1818,7 +1853,6 @@ static void gem_init_dma(struct gem *gp)
 		       gp->regs + RXDMA_BLANK);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static u32 gem_setup_multicast(struct gem *gp)
 {
 	u32 rxcfg = 0;
@@ -1860,7 +1894,6 @@ static u32 gem_setup_multicast(struct ge
 	return rxcfg;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_mac(struct gem *gp)
 {
 	unsigned char *e = &gp->dev->dev_addr[0];
@@ -1943,7 +1976,6 @@ #endif
 		writel(0, gp->regs + WOL_WAKECSR);
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_init_pause_thresholds(struct gem *gp)
 {
        	u32 cfg;
@@ -2096,7 +2128,6 @@ static int gem_check_invariants(struct g
 	return 0;
 }

-/* Must be invoked under gp->lock and gp->tx_lock. */
 static void gem_reinit_chip(struct gem *gp)
 {
 	/* Reset the chip */
@@ -2116,12 +2147,10 @@ static void gem_reinit_chip(struct gem *
 	gem_init_mac(gp);
 }

-
 /* Must be invoked with no lock held. */
 static void gem_stop_phy(struct gem *gp, int wol)
 {
 	u32 mifcfg;
-	unsigned long flags;

 	/* Let the chip settle down a bit, it seems that helps
 	 * for sleep mode on some models
@@ -2167,13 +2196,13 @@ static void gem_stop_phy(struct gem *gp,
 	writel(0, gp->regs + RXDMA_CFG);

 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);
+
 		gem_reset(gp);
 		writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
 		writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+
+		gem_full_unlock(gp);

 		/* No need to take the lock here */

@@ -2192,14 +2221,11 @@ static void gem_stop_phy(struct gem *gp,
 	}
 }

-
 static int gem_do_start(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* Enable the cell */
 	gem_get_cell(gp);
@@ -2211,28 +2237,26 @@ static int gem_do_start(struct net_devic

 	if (gp->lstate == link_up) {
 		netif_carrier_on(gp->dev);
+		/* gem_set_link_modes starts DMA and enabled ints */
 		gem_set_link_modes(gp);
 	}

-	netif_wake_queue(gp->dev);
-
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gp->irq_sync = 0;
+	gem_full_unlock(gp);

 	if (request_irq(gp->pdev->irq, gem_interrupt,
-				   IRQF_SHARED, dev->name, (void *)dev)) {
+		        IRQF_SHARED, dev->name, (void *)dev)) {
+
 		printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);

-		spin_lock_irqsave(&gp->lock, flags);
-		spin_lock(&gp->tx_lock);
+		gem_full_lock(gp, 0);

 		gp->running =  0;
 		gem_reset(gp);
 		gem_clean_rings(gp);
 		gem_put_cell(gp);

-		spin_unlock(&gp->tx_lock);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);

 		return -EAGAIN;
 	}
@@ -2243,22 +2267,14 @@ static int gem_do_start(struct net_devic
 static void gem_do_stop(struct net_device *dev, int wol)
 {
 	struct gem *gp = dev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
-
-	gp->running = 0;

-	/* Stop netif queue */
-	netif_stop_queue(dev);
-
-	/* Make sure ints are disabled */
+	gem_full_lock(gp, 1);
 	gem_disable_ints(gp);
+	
+	gp->running = 0;

-	/* We can drop the lock now */
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	/* We can drop the lock now that running is set to 0 */
+	gem_full_unlock(gp);

 	/* If we are going to sleep with WOL */
 	gem_stop_dma(gp);
@@ -2275,9 +2291,9 @@ static void gem_do_stop(struct net_devic

 	/* Cell not needed neither if no WOL */
 	if (!wol) {
-		spin_lock_irqsave(&gp->lock, flags);
+		gem_full_lock(gp, 0);
 		gem_put_cell(gp);
-		spin_unlock_irqrestore(&gp->lock, flags);
+		gem_full_unlock(gp);
 	}
 }

@@ -2287,31 +2303,26 @@ static void gem_reset_task(void *data)

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(gp->dev);
-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
-
 	if (gp->running == 0)
 		goto not_running;

-	if (gp->running) {
-		netif_stop_queue(gp->dev);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);

-		/* Reset the chip & rings */
-		gem_reinit_chip(gp);
-		if (gp->lstate == link_up)
-			gem_set_link_modes(gp);
-		netif_wake_queue(gp->dev);
-	}
- not_running:
-	gp->reset_task_pending = 0;
+	gem_netif_stop(gp);
+
+	/* Reset the chip & rings */
+	gem_reinit_chip(gp);
+	if (gp->lstate == link_up)
+		gem_set_link_modes(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_netif_start(gp);

-	netif_poll_enable(gp->dev);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);

+not_running:
+	gp->reset_task_pending = 0;
 	mutex_unlock(&gp->pm_mutex);
 }

@@ -2324,8 +2335,12 @@ static int gem_open(struct net_device *d
 	mutex_lock(&gp->pm_mutex);

 	/* We need the cell enabled */
-	if (!gp->asleep)
+	if (!gp->asleep) {
 		rc = gem_do_start(dev);
+		if (rc == 0)
+			netif_start_queue(dev);
+	}
+
 	gp->opened = (rc == 0);

 	mutex_unlock(&gp->pm_mutex);
@@ -2337,15 +2352,14 @@ static int gem_close(struct net_device *
 {
 	struct gem *gp = dev->priv;

-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
-
 	mutex_lock(&gp->pm_mutex);

 	gp->opened = 0;
-	if (!gp->asleep)
+	if (!gp->asleep) {
+		/* Upper layer took care of disabling polling */
+		netif_stop_queue(dev);
 		gem_do_stop(dev, 0);
+	}

 	mutex_unlock(&gp->pm_mutex);

@@ -2357,22 +2371,17 @@ static int gem_suspend(struct pci_dev *p
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	mutex_lock(&gp->pm_mutex);

-	netif_poll_disable(dev);
-
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
 	       (gp->wake_on_lan && gp->opened) ? "enabled" : "disabled");

 	/* Keep the cell enabled during the entire operation */
-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	/* If the driver is opened, we stop the MAC */
 	if (gp->opened) {
@@ -2381,6 +2390,7 @@ static int gem_suspend(struct pci_dev *p

 		/* Switch off MAC, remember WOL setting */
 		gp->asleep_wol = gp->wake_on_lan;
+		gem_netif_stop(gp);
 		gem_do_stop(dev, gp->asleep_wol);
 	} else
 		gp->asleep_wol = 0;
@@ -2399,8 +2409,7 @@ static int gem_suspend(struct pci_dev *p
 	mutex_unlock(&gp->pm_mutex);

 	/* Wait for a pending reset task to complete */
-	while (gp->reset_task_pending)
-		yield();
+	gem_wait_reset_task(gp);
 	flush_scheduled_work();

 	/* Shut the PHY down eventually and setup WOL */
@@ -2421,7 +2430,6 @@ static int gem_resume(struct pci_dev *pd
 {
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct gem *gp = dev->priv;
-	unsigned long flags;

 	printk(KERN_INFO "%s: resuming\n", dev->name);

@@ -2465,11 +2473,9 @@ static int gem_resume(struct pci_dev *pd

 		/* Re-attach net device */
 		netif_device_attach(dev);
-
 	}

-	spin_lock_irqsave(&gp->lock, flags);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	/* If we had WOL enabled, the cell clock was never turned off during
 	 * sleep, so we end up beeing unbalanced. Fix that here
@@ -2482,10 +2488,9 @@ static int gem_resume(struct pci_dev *pd
 	 */
 	gem_put_cell(gp);

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

-	netif_poll_enable(dev);
+	gem_netif_start(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2498,8 +2503,8 @@ static struct net_device_stats *gem_get_
 	struct gem *gp = dev->priv;
 	struct net_device_stats *stats = &gp->net_stats;

-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 0);

 	/* I have seen this being called while the PM was in progress,
 	 * so we shield against this
@@ -2522,27 +2527,26 @@ static struct net_device_stats *gem_get_
 		writel(0, gp->regs + MAC_LCOLL);
 	}

-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
+	gem_netif_start(gp);

 	return &gp->net_stats;
 }

+/* gem_set_multicast is called with netif_tx_lock held. Thus, it
+ * cannot sleep.
+ */
 static void gem_set_multicast(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 	u32 rxcfg, rxcfg_new;
 	int limit = 10000;

-
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+	gem_full_lock(gp, 0);

 	if (!gp->running)
 		goto bail;

-	netif_stop_queue(dev);
-
 	rxcfg = readl(gp->regs + MAC_RXCFG);
 	rxcfg_new = gem_setup_multicast(gp);
 #ifdef STRIP_FCS
@@ -2562,11 +2566,8 @@ #endif

 	writel(rxcfg, gp->regs + MAC_RXCFG);

-	netif_wake_queue(dev);
-
  bail:
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);
 }

 /* Jumbo-grams don't seem to work :-( */
@@ -2593,16 +2594,22 @@ static int gem_change_mtu(struct net_dev
 	}

 	mutex_lock(&gp->pm_mutex);
-	spin_lock_irq(&gp->lock);
-	spin_lock(&gp->tx_lock);
+
+	gem_netif_stop(gp);
+	gem_full_lock(gp, 1);
+	gem_disable_ints(gp);
+	
 	dev->mtu = new_mtu;
 	if (gp->running) {
 		gem_reinit_chip(gp);
 		if (gp->lstate == link_up)
 			gem_set_link_modes(gp);
 	}
-	spin_unlock(&gp->tx_lock);
-	spin_unlock_irq(&gp->lock);
+
+	gem_netif_start(gp);
+	gem_enable_ints(gp);
+	gem_full_unlock(gp);
+
 	mutex_unlock(&gp->pm_mutex);

 	return 0;
@@ -2635,7 +2642,7 @@ static int gem_get_settings(struct net_d
 		cmd->phy_address = 0; /* XXX fixed PHYAD */

 		/* Return current PHY settings */
-		spin_lock_irq(&gp->lock);
+		gem_full_lock(gp, 0);
 		cmd->autoneg = gp->want_autoneg;
 		cmd->speed = gp->phy_mii.speed;
 		cmd->duplex = gp->phy_mii.duplex;
@@ -2647,7 +2654,7 @@ static int gem_get_settings(struct net_d
 		 */
 		if (cmd->advertising == 0)
 			cmd->advertising = cmd->supported;
-		spin_unlock_irq(&gp->lock);
+		gem_full_unlock(gp);
 	} else { // XXX PCS ?
 		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
@@ -2685,11 +2692,11 @@ static int gem_set_settings(struct net_d
 		return -EINVAL;

 	/* Apply settings and restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, cmd);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2702,11 +2709,11 @@ static int gem_nway_reset(struct net_dev
 		return -EINVAL;

 	/* Restart link process. */
-	spin_lock_irq(&gp->lock);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
 	gem_begin_auto_negotiation(gp, NULL);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);
+	gem_full_unlock(gp);

 	return 0;
 }
@@ -2770,16 +2777,15 @@ static int gem_ioctl(struct net_device *
 	struct gem *gp = dev->priv;
 	struct mii_ioctl_data *data = if_mii(ifr);
 	int rc = -EOPNOTSUPP;
-	unsigned long flags;

 	/* Hold the PM mutex while doing ioctl's or we may collide
 	 * with power management.
 	 */
 	mutex_lock(&gp->pm_mutex);

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_get_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	switch (cmd) {
 	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
@@ -2809,9 +2815,9 @@ static int gem_ioctl(struct net_device *
 		break;
 	};

-	spin_lock_irqsave(&gp->lock, flags);
+	gem_full_lock(gp, 0);
 	gem_put_cell(gp);
-	spin_unlock_irqrestore(&gp->lock, flags);
+	gem_full_unlock(gp);

 	mutex_unlock(&gp->pm_mutex);

@@ -2918,6 +2924,7 @@ static void gem_remove_one(struct pci_de
 	if (dev) {
 		struct gem *gp = dev->priv;

+		/* unregister_netdev will close the device if it's open */
 		unregister_netdev(dev);

 		/* Stop the link timer */
@@ -2927,8 +2934,7 @@ static void gem_remove_one(struct pci_de
 		gem_get_cell(gp);

 		/* Wait for a pending reset task to complete */
-		while (gp->reset_task_pending)
-			yield();
+		gem_wait_reset_task(gp);
 		flush_scheduled_work();

 		/* Shut the PHY down */
@@ -3035,8 +3041,8 @@ static int __devinit gem_init_one(struct

 	gp->msg_enable = DEFAULT_MSG;

+	gp->irq_sync = 0;
 	spin_lock_init(&gp->lock);
-	spin_lock_init(&gp->tx_lock);
 	mutex_init(&gp->pm_mutex);

 	init_timer(&gp->link_timer);
@@ -3132,9 +3138,7 @@ #endif
 	 */
 	gem_init_phy(gp);

-	spin_lock_irq(&gp->lock);
 	gem_put_cell(gp);
-	spin_unlock_irq(&gp->lock);

 	/* Register with kernel */
 	if (register_netdev(dev)) {
@@ -3156,8 +3160,7 @@ #endif
 		printk(KERN_INFO "%s: Found %s PHY\n", dev->name,
 			gp->phy_mii.def ? gp->phy_mii.def->name : "no");

-	/* GEM can do it all... */
-	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;

@@ -3177,7 +3180,6 @@ err_out_free_netdev:
 err_disable_device:
 	pci_disable_device(pdev);
 	return err;
-
 }


diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c..5869818 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -929,11 +929,6 @@ #endif
 #define NEXT_TX(N)	(((N) + 1) & (TX_RING_SIZE - 1))
 #define NEXT_RX(N)	(((N) + 1) & (RX_RING_SIZE - 1))

-#define TX_BUFFS_AVAIL(GP)					\
-	(((GP)->tx_old <= (GP)->tx_new) ?			\
-	  (GP)->tx_old + (TX_RING_SIZE - 1) - (GP)->tx_new :	\
-	  (GP)->tx_old - (GP)->tx_new - 1)
-
 #define RX_OFFSET          2
 #define RX_BUF_ALLOC_SIZE(gp)	((gp)->rx_buf_sz + 28 + RX_OFFSET + 64)

@@ -973,8 +968,8 @@ enum link_state {
 };

 struct gem {
+	int			irq_sync;
 	spinlock_t		lock;
-	spinlock_t		tx_lock;
 	void __iomem		*regs;
 	int			rx_new, rx_old;
 	int			tx_new, tx_old;

View attachment "sungem-locking-take2.patch" of type "text/x-patch" (26595 bytes)