[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4DAFA9F9.5080909@hotmail.com>
Date: Wed, 20 Apr 2011 23:52:25 -0400
From: John Lumby <johnlumby@...mail.com>
To: Francois Romieu <romieu@...zoreil.com>
CC: netdev@...r.kernel.org, Ben Hutchings <bhutchings@...arflare.com>,
nic_swsd@...ltek.com
Subject: Re: r8169 : always copying the rx buffer to new skb
On 04/20/11 15:13, Francois Romieu wrote:
>
> Why don't you send the patch through the mailing list ?
>
> (hint, hint)
>
based on 2.6.39-rc2.
also has changes for ethtool -
. get and set ring parms (suggested by Ben)
. get and set rx_copybreak - not sure if this is a good idea
or not,
as it's a driver parm, not NIC setting,
but there are 22 net drivers that have the parm so I thought
maybe useful.
-------------------------------------------------------------------------------------
--- linux-2.6.39-rc2FCrtl/drivers/net/r8169.c.orig 2011-04-05
21:30:43.000000000 -0400
+++ linux-2.6.39-rc2FCrtl/drivers/net/r8169.c 2011-04-20
21:34:42.000000000 -0400
@@ -56,7 +56,7 @@
(NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN)
#define TX_BUFFS_AVAIL(tp) \
- (tp->dirty_tx + NUM_TX_DESC - tp->cur_tx - 1)
+ (tp->dirty_tx + tp->num_tx_allocd - tp->cur_tx - 1)
/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
The RTL chips use a 64 element hash table based on the Ethernet CRC. */
@@ -74,11 +74,19 @@ static const int multicast_filter_limit
#define R8169_REGS_SIZE 256
#define R8169_NAPI_WEIGHT 64
-#define NUM_TX_DESC 64 /* Number of Tx descriptor registers */
-#define NUM_RX_DESC 256 /* Number of Rx descriptor registers */
-#define RX_BUF_SIZE 1536 /* Rx Buffer size */
-#define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc))
-#define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc))
+/* #define NUM_TX_DESC 64 Number of Tx descriptor registers is
now based on variable num_tx_allocd */
+/* #define NUM_RX_DESC 256 Number of in-use Rx descriptor
registers is now based on variable num_rx_allocd :
+ see comments attached to definition of
that variable */
+#define MIN_NUM_RX_DESC 16 /* minimum number of Rx descriptor
registers with which the chip can operate ? */
+#define MAX_NUM_RX_DESC 256 /* maximum number of Rx descriptor
registers with which the chip can operate ? */
+#define MIN_NUM_TX_DESC 16 /* minimum number of Tx descriptor
registers with which the chip can operate ? */
+#define MAX_NUM_TX_DESC 64 /* maximum number of Tx descriptor
registers with which the chip can operate ? */
+
+ /* number of in-use Rx descriptors is based on
variable num_rx_allocd
+ ** and num_rx_allocd is always <= num_rx_requested value
+ */
+#define R8169_RX_RING_BYTES (tp->num_rx_requested * sizeof(struct
RxDesc))
+#define R8169_TX_RING_BYTES (tp->num_tx_requested * sizeof(struct
TxDesc))
#define RTL8169_TX_TIMEOUT (6*HZ)
#define RTL8169_PHY_TIMEOUT (10*HZ)
@@ -198,12 +206,23 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_p
MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
-static int rx_buf_sz = 16383;
+static const int rx_buf_sz = 16383;
+/*
+ * we set our default copybreak very high to eliminate
+ * the possibility of running out of receive buffers.
+ * HOWEVER lowering it will reduce memcpying
+ * and may improve performance significantly.
+ */
+static int rx_copybreak = 16383;
static int use_dac;
static struct {
u32 msg_enable;
-} debug = { -1 };
+} debug = {
+-1};
+#ifdef RTL8169_DEBUG
+static int simulate_alloc_fail = 0; /* set to (P-1) to fail alloc
on all except every P attempts */
+#endif /* RTL8169_DEBUG */
enum rtl_registers {
MAC0 = 0, /* Ethernet hardware address. */
MAC4 = 4,
@@ -522,16 +541,50 @@ struct rtl8169_private {
u32 msg_enable;
int chipset;
int mac_version;
- u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
- u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
- u32 dirty_rx;
- u32 dirty_tx;
+
+ /* Note - re number of Rx/Tx descriptor buffers allocated :
+ ** we maintain two values per ring - requested and allocd.
+ ** requested can be set by ethtool and defaults to the max
permitted
+ ** allocd is the number actually obtained at open and may be
less than
+ ** requested, but provided it is at least the minimum
required, we'll continue.
+ ** ethtool setting is asynchronous and takes effect at next open.
+ ** The num_xx_allocd count is used as modulus for
+ ** locating active entries in the array using logic like this
snippet
+ ** in rtl8169_rx_interrupt :
+ ** entry = cur_rx % num_rx_allocd;
+ ** The size of each array of per-ring-element thingy is always
the maximum.
+ **
+ ** at present, with the tx ring info embedded in private,
+ ** it is a bit silly pretending to provide a settable tx_requested,
+ ** but if desired, at expense of extra ptr deref,
+ ** could change it to an array of pointers
+ */
+ u32 num_tx_requested; /* num Tx buffers requested */
+ u32 num_rx_requested; /* num Rx buffers requested */
+ u32 num_tx_allocd; /* num Tx descriptor buffers allocated */
+ u32 num_rx_allocd; /* num Rx descriptor buffers allocated */
+
+ /* note - the following two counters are monotonically-ascending
- can be thought of
+ ** as the count of number of buffers which the hardware
has accessed.
+ */
+ u32 cur_rx; /* Index of next Rx pkt. */
+ u32 cur_tx; /* Index of next Tx pkt. */
+
+ u32 totl_rx_replenished; /* monotonically-ascending count of
replenished buffers */
+ u32 replenish_rx_cursor; /* Index of next Rx pkt. to replenish
(modulo, not monotonic) */
+ /* the following counts pkts copied as opposed to uncopied
(unhooked) */
+ /* note - count of uncopied packets = cur_rx
- copied_rx_pkt_count */
+ u32 copied_rx_pkt_count; /* total pkts copied to new skb */
+ u32 totl_rx_alloc_fail; /* rx alloc failures */
+ u32 dirty_tx; /* monotonic count of transmitted packets (or
fragments?) */
struct TxDesc *TxDescArray; /* 256-aligned Tx descriptor ring */
struct RxDesc *RxDescArray; /* 256-aligned Rx descriptor ring */
dma_addr_t TxPhyAddr;
dma_addr_t RxPhyAddr;
- void *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */
- struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */
+ struct sk_buff *Rx_skbuff[MAX_NUM_RX_DESC]; /* Rx data buffers */
+ struct ring_info tx_skb[MAX_NUM_TX_DESC]; /* Tx data buffers */
+
+ unsigned align;
struct timer_list timer;
u16 cp_cmd;
u16 intr_event;
@@ -569,6 +622,14 @@ struct rtl8169_private {
MODULE_AUTHOR("Realtek and the Linux r8169 crew
<netdev@...r.kernel.org>");
MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
+module_param(rx_copybreak, int, 0);
+MODULE_PARM_DESC(rx_copybreak, "Copy breakpoint for
copy-only-tiny-frames");
+#ifdef RTL8169_DEBUG
+module_param(simulate_alloc_fail, int, 0);
+MODULE_PARM_DESC(simulate_alloc_fail,
+ "set to (2**P - 1) eg 15, to fail alloc rx skb on all except
every 2**P attempts");
+#endif /* RTL8169_DEBUG */
+
module_param(use_dac, int, 0);
MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot.");
module_param_named(debug, debug.msg_enable, int, 0);
@@ -583,7 +644,7 @@ static int rtl8169_open(struct net_devic
static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance);
-static int rtl8169_init_ring(struct net_device *dev);
+static int rtl8169_init_ring(struct rtl8169_private *tp);
static void rtl_hw_start(struct net_device *dev);
static int rtl8169_close(struct net_device *dev);
static void rtl_set_rx_mode(struct net_device *dev);
@@ -1242,6 +1303,15 @@ static int rtl8169_set_settings(struct n
spin_lock_irqsave(&tp->lock, flags);
ret = rtl8169_set_speed(dev,
cmd->autoneg, cmd->speed, cmd->duplex, cmd->advertising);
+
+ /* check that ethtool has set a copybreak value before accepting
it */
+ if ( (cmd->supported & (SUPPORTED_cmd_extension |
+ SUPPORTED_cmd_extension_rx_copybreak))
+ && (cmd->rx_copybreak <= rx_buf_sz) ) {
+ rx_copybreak = cmd->rx_copybreak;
+ netif_info(tp, drv, dev, "set rx_copybreak to %d\n",
+ rx_copybreak);
+ }
spin_unlock_irqrestore(&tp->lock, flags);
return ret;
@@ -1254,6 +1324,49 @@ static u32 rtl8169_get_rx_csum(struct ne
return tp->cp_cmd & RxChkSum;
}
+static void rtl8169_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct rtl8169_private *tp = netdev_priv(netdev);
+
+ ring->rx_max_pending = MAX_NUM_RX_DESC;
+ ring->tx_max_pending = MAX_NUM_TX_DESC;
+ ring->rx_mini_max_pending = 0;
+ ring->rx_jumbo_max_pending = 0;
+ ring->rx_pending = tp->num_rx_allocd;
+ ring->tx_pending = tp->num_tx_allocd;
+ ring->rx_mini_pending = 0;
+ ring->rx_jumbo_pending = 0;
+}
+
+static int rtl8169_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct rtl8169_private *tp = netdev_priv(netdev);
+
+ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+ return -EINVAL;
+
+ /* I am not sure about closing and opening the NIC here
+ * so will leave the change pending for next open
+ */
+
+ tp->num_rx_requested = ((ring->rx_pending < MIN_NUM_RX_DESC) ?
+ MIN_NUM_RX_DESC :
+ ((ring->rx_pending > MAX_NUM_RX_DESC) ?
+ MAX_NUM_RX_DESC : ring->rx_pending));
+ tp->num_tx_requested = ((ring->tx_pending < MIN_NUM_TX_DESC) ?
+ MIN_NUM_TX_DESC :
+ ((ring->tx_pending > MAX_NUM_TX_DESC) ?
+ MAX_NUM_TX_DESC : ring->tx_pending));
+
+ netif_info(tp, drv, netdev,
+ "Ring sizes to be requested at next open: num rx: %d, num tx
%d\n",
+ tp->num_rx_requested, tp->num_tx_requested);
+
+ return 0;
+}
+
static int rtl8169_set_rx_csum(struct net_device *dev, u32 data)
{
struct rtl8169_private *tp = netdev_priv(dev);
@@ -1351,6 +1464,13 @@ static int rtl8169_get_settings(struct n
rc = tp->get_settings(dev, cmd);
+ /* inform about returning extended info - rx_copybreak
+ * and initialize so we can detect if set to new val by ethtool
+ */
+ cmd->rx_copybreak = rx_copybreak;
+ cmd->supported |= SUPPORTED_cmd_extension;
+ cmd->supported &= ~SUPPORTED_cmd_extension_rx_copybreak;
+
spin_unlock_irqrestore(&tp->lock, flags);
return rc;
}
@@ -1397,6 +1517,11 @@ static const char rtl8169_gstrings[][ETH
"multicast",
"tx_aborted",
"tx_underrun",
+ /* extras maintained in driver code */
+ "tot rx intrpts",
+ "tot rx copied",
+ "tot rx replenished",
+ "tot rx alloc_fail"
};
static int rtl8169_get_sset_count(struct net_device *dev, int sset)
@@ -1472,9 +1597,15 @@ static void rtl8169_get_ethtool_stats(st
data[10] = le32_to_cpu(tp->counters.rx_multicast);
data[11] = le16_to_cpu(tp->counters.tx_aborted);
data[12] = le16_to_cpu(tp->counters.tx_underun);
+ /* extras maintained in driver code */
+ data[13] = tp->cur_rx;
+ data[14] = tp->copied_rx_pkt_count;
+ data[15] = tp->totl_rx_replenished;
+ data[16] = tp->totl_rx_alloc_fail;
}
-static void rtl8169_get_strings(struct net_device *dev, u32 stringset,
u8 *data)
+static void rtl8169_get_strings(struct net_device *dev, u32 stringset,
+ u8 * data)
{
switch(stringset) {
case ETH_SS_STATS:
@@ -1516,6 +1647,8 @@ static const struct ethtool_ops rtl8169_
.get_rx_csum = rtl8169_get_rx_csum,
.set_rx_csum = rtl8169_set_rx_csum,
.set_tx_csum = ethtool_op_set_tx_csum,
+ .get_ringparam = rtl8169_get_ringparam,
+ .set_ringparam = rtl8169_set_ringparam,
.set_sg = ethtool_op_set_sg,
.set_tso = ethtool_op_set_tso,
.get_regs = rtl8169_get_regs,
@@ -3060,6 +3193,10 @@ rtl8169_init_one(struct pci_dev *pdev, c
tp->pci_dev = pdev;
tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
+ tp->num_rx_allocd = tp->num_tx_allocd = 0;
+ tp->num_rx_requested = MAX_NUM_RX_DESC;
+ tp->num_tx_requested = MAX_NUM_TX_DESC;
+
mii = &tp->mii;
mii->dev = dev;
mii->mdio_read = rtl_mdio_read;
@@ -3229,6 +3366,7 @@ rtl8169_init_one(struct pci_dev *pdev, c
dev->features |= NETIF_F_HW_VLAN_TX_RX | NETIF_F_GRO;
tp->intr_mask = 0xffff;
+ tp->align = cfg->align;
tp->hw_start = cfg->hw_start;
tp->intr_event = cfg->intr_event;
tp->napi_event = cfg->napi_event;
@@ -3326,7 +3464,7 @@ static int rtl8169_open(struct net_devic
if (!tp->RxDescArray)
goto err_free_tx_0;
- retval = rtl8169_init_ring(dev);
+ retval = rtl8169_init_ring(tp);
if (retval < 0)
goto err_free_rx_1;
@@ -4071,14 +4209,15 @@ static inline void rtl8169_make_unusable
desc->opts1 &= ~cpu_to_le32(DescOwn | RsvdMask);
}
-static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
- void **data_buff, struct RxDesc *desc)
+static void rtl8169_free_rx_skb(struct rtl8169_private *tp,
+ struct sk_buff **sk_buff, struct RxDesc *desc)
{
- dma_unmap_single(&tp->pci_dev->dev, le64_to_cpu(desc->addr), rx_buf_sz,
- DMA_FROM_DEVICE);
+ struct pci_dev *pdev = tp->pci_dev;
- kfree(*data_buff);
- *data_buff = NULL;
+ dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), rx_buf_sz,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(*sk_buff); /* also frees the data buffer! */
+ *sk_buff = NULL;
rtl8169_make_unusable_by_asic(desc);
}
@@ -4102,28 +4241,25 @@ static inline void *rtl8169_align(void *
return (void *)ALIGN((long)data, 16);
}
-static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
- struct RxDesc *desc)
+static struct sk_buff *rtl8169_alloc_rx_skb(struct rtl8169_private *tp,
+ struct RxDesc *desc, gfp_t gfp)
{
- void *data;
+ struct sk_buff *skb;
dma_addr_t mapping;
struct device *d = &tp->pci_dev->dev;
struct net_device *dev = tp->dev;
- int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
+ unsigned int pad;
- data = kmalloc_node(rx_buf_sz, GFP_KERNEL, node);
- if (!data)
- return NULL;
+ pad = tp->align ? tp->align : NET_IP_ALIGN;
- if (rtl8169_align(data) != data) {
- kfree(data);
- data = kmalloc_node(rx_buf_sz + 15, GFP_KERNEL, node);
- if (!data)
- return NULL;
- }
+ skb = __netdev_alloc_skb(dev, rx_buf_sz + pad, gfp);
+ if (!skb)
+ goto err_out;
+
+ skb_reserve(skb,
+ tp->align ? ((pad - 1) & (unsigned long)skb->data) : pad);
- mapping = dma_map_single(d, rtl8169_align(data), rx_buf_sz,
- DMA_FROM_DEVICE);
+ mapping = dma_map_single(d, skb->data, rx_buf_sz, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(d, mapping))) {
if (net_ratelimit())
netif_err(tp, drv, tp->dev, "Failed to map RX DMA!\n");
@@ -4131,23 +4267,25 @@ static struct sk_buff *rtl8169_alloc_rx_
}
rtl8169_map_to_asic(desc, mapping, rx_buf_sz);
- return data;
+out:
+ return skb;
err_out:
- kfree(data);
- return NULL;
+ rtl8169_make_unusable_by_asic(desc);
+ goto out;
}
static void rtl8169_rx_clear(struct rtl8169_private *tp)
{
unsigned int i;
- for (i = 0; i < NUM_RX_DESC; i++) {
- if (tp->Rx_databuff[i]) {
- rtl8169_free_rx_databuff(tp, tp->Rx_databuff + i,
+ for (i = 0; i < tp->num_rx_allocd; i++) {
+ if (tp->Rx_skbuff[i]) {
+ rtl8169_free_rx_skb(tp, tp->Rx_skbuff + i,
tp->RxDescArray + i);
}
}
+ tp->num_rx_allocd = 0; /* no rx descriptors allocated any more ! */
}
static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc)
@@ -4155,47 +4293,92 @@ static inline void rtl8169_mark_as_last_
desc->opts1 |= cpu_to_le32(RingEnd);
}
-static int rtl8169_rx_fill(struct rtl8169_private *tp)
+/* rtl8169_rx_fill :allocate num_to_alloc rx skb buffers to rx
descriptors
+ * starting with descriptor first_desc.
+ * this function operates in one of two slightly different modes,
+ * depending on whether the num_replenished parm is zero or not :
+ * zero - traverse a fixed number of buffers specified by
num_to_alloc,
+ * allocating those which are empty;
+ * non-zero - traverse as many buffers as needed
+ * to replenish num_replenished empty buffers,
+ * and update the parm with number actually replenished.
+ * in each case, stop if unable to allocate,
+ * and in each case return number of buffers traversed.
+ */
+static u32 rtl8169_rx_fill(struct rtl8169_private *tp, u32 first_desc,
+ u32 num_to_alloc, u32 * num_replenished, gfp_t gfp)
{
- unsigned int i;
+ unsigned int this_desc_index; /* loop through on this */
+ u32 count_allocd; /* count allocd */
+ u32 num_traversed; /* count num traversed */
+
+ for (count_allocd = 0, num_traversed = 0, this_desc_index = first_desc;
+ ((num_replenished && (count_allocd < *num_replenished))
+ || (num_traversed < num_to_alloc)
+ ); num_traversed++) {
+ struct sk_buff *skb;
- for (i = 0; i < NUM_RX_DESC; i++) {
- void *data;
+ if (tp->Rx_skbuff[this_desc_index] == (struct sk_buff *)0) {
/* bypass if allocd */
- if (tp->Rx_databuff[i])
- continue;
+ skb =
+ rtl8169_alloc_rx_skb(tp,
+ tp->RxDescArray +
+ this_desc_index, gfp);
+ if (!skb)
+ break;
- data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i);
- if (!data) {
- rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
- goto err_out;
- }
- tp->Rx_databuff[i] = data;
+ tp->Rx_skbuff[this_desc_index] = skb;
+ count_allocd++;
}
- rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1);
- return 0;
+ /* increment this_desc_index allowing for modulo num_rx_allocd
if latter is > 0
+ * also ensuring we stop after one complete circuit
+ */
+ this_desc_index++;
+ if (this_desc_index == tp->num_rx_allocd) {
+ this_desc_index = 0;
+ }
+ if (this_desc_index == first_desc) {
+ break;
+ }
+ }
-err_out:
- rtl8169_rx_clear(tp);
- return -ENOMEM;
+ if (num_replenished)
+ *num_replenished = count_allocd;
+ return num_traversed;
}
static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
{
- tp->dirty_tx = tp->dirty_rx = tp->cur_tx = tp->cur_rx = 0;
+ tp->dirty_tx = tp->totl_rx_replenished = tp->totl_rx_alloc_fail =
+ tp->cur_tx = tp->cur_rx = tp->replenish_rx_cursor = 0;
}
-static int rtl8169_init_ring(struct net_device *dev)
+static int rtl8169_init_ring(struct rtl8169_private *tp)
{
- struct rtl8169_private *tp = netdev_priv(dev);
rtl8169_init_ring_indexes(tp);
- memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info));
- memset(tp->Rx_databuff, 0x0, NUM_RX_DESC * sizeof(void *));
+ memset(tp->tx_skb, 0x0, MAX_NUM_TX_DESC * sizeof(struct ring_info));
+ memset(tp->Rx_skbuff, 0x0, MAX_NUM_RX_DESC * sizeof(struct sk_buff *));
+ tp->copied_rx_pkt_count = 0;
+
+ /* see comment preceding defn of num_tx_requested */
+ tp->num_tx_allocd = tp->num_tx_requested;
+ tp->num_rx_allocd =
+ rtl8169_rx_fill(tp, 0, (u32) tp->num_rx_requested, 0, GFP_KERNEL);
+ printk(KERN_INFO "%s num_rx_requested= %d num_rx_allocd= %d\n",
+ MODULENAME, (u32) tp->num_rx_requested, tp->num_rx_allocd);
+ if (tp->num_rx_allocd < MIN_NUM_RX_DESC)
+ goto err_out;
+
+ rtl8169_mark_as_last_descriptor(tp->RxDescArray + tp->num_rx_allocd
- 1);
- return rtl8169_rx_fill(tp);
+ return 0;
+
+err_out:
+ rtl8169_rx_clear(tp);
+ return -ENOMEM;
}
static void rtl8169_unmap_tx_skb(struct device *d, struct ring_info
*tx_skb,
@@ -4217,7 +4400,7 @@ static void rtl8169_tx_clear_range(struc
unsigned int i;
for (i = 0; i < n; i++) {
- unsigned int entry = (start + i) % NUM_TX_DESC;
+ unsigned int entry = (start + i) % tp->num_tx_allocd;
struct ring_info *tx_skb = tp->tx_skb + entry;
unsigned int len = tx_skb->len;
@@ -4237,7 +4420,7 @@ static void rtl8169_tx_clear_range(struc
static void rtl8169_tx_clear(struct rtl8169_private *tp)
{
- rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC);
+ rtl8169_tx_clear_range(tp, tp->dirty_tx, tp->num_tx_allocd);
tp->cur_tx = tp->dirty_tx = 0;
}
@@ -4310,7 +4493,7 @@ static void rtl8169_reset_task(struct wo
rtl8169_rx_interrupt(dev, tp, tp->mmio_addr, ~(u32)0);
rtl8169_tx_clear(tp);
- if (tp->dirty_rx == tp->cur_rx) {
+ if (tp->totl_rx_replenished == tp->cur_rx) {
rtl8169_init_ring_indexes(tp);
rtl_hw_start(dev);
netif_wake_queue(dev);
@@ -4350,7 +4533,7 @@ static int rtl8169_xmit_frags(struct rtl
u32 status, len;
void *addr;
- entry = (entry + 1) % NUM_TX_DESC;
+ entry = (entry + 1) % tp->num_tx_allocd;
txd = tp->TxDescArray + entry;
len = frag->size;
@@ -4364,7 +4547,9 @@ static int rtl8169_xmit_frags(struct rtl
}
/* anti gcc 2.95.3 bugware (sic) */
- status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
+ status =
+ opts1 | len | (RingEnd *
+ !((entry + 1) % tp->num_tx_allocd));
txd->opts1 = cpu_to_le32(status);
txd->addr = cpu_to_le64(mapping);
@@ -4408,7 +4593,7 @@ static netdev_tx_t rtl8169_start_xmit(st
struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- unsigned int entry = tp->cur_tx % NUM_TX_DESC;
+ unsigned int entry = tp->cur_tx % tp->num_tx_allocd;
struct TxDesc *txd = tp->TxDescArray + entry;
void __iomem *ioaddr = tp->mmio_addr;
struct device *d = &tp->pci_dev->dev;
@@ -4418,7 +4603,8 @@ static netdev_tx_t rtl8169_start_xmit(st
int frags;
if (unlikely(TX_BUFFS_AVAIL(tp) < skb_shinfo(skb)->nr_frags)) {
- netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n");
+ netif_err(tp, drv, dev,
+ "BUG! Tx Ring full when queue awake!\n");
goto err_stop_0;
}
@@ -4452,7 +4638,7 @@ static netdev_tx_t rtl8169_start_xmit(st
wmb();
/* anti gcc 2.95.3 bugware (sic) */
- status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
+ status = opts1 | len | (RingEnd * !((entry + 1) % tp->num_tx_allocd));
txd->opts1 = cpu_to_le32(status);
tp->cur_tx += frags + 1;
@@ -4512,11 +4698,13 @@ static void rtl8169_pcierr_interrupt(str
pci_write_config_word(pdev, PCI_STATUS,
pci_status & (PCI_STATUS_DETECTED_PARITY |
- PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_REC_MASTER_ABORT |
- PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT));
+ PCI_STATUS_SIG_SYSTEM_ERROR |
+ PCI_STATUS_REC_MASTER_ABORT |
+ PCI_STATUS_REC_TARGET_ABORT |
+ PCI_STATUS_SIG_TARGET_ABORT));
/* The infamous DAC f*ckup only happens at boot time */
- if ((tp->cp_cmd & PCIDAC) && !tp->dirty_rx && !tp->cur_rx) {
+ if ((tp->cp_cmd & PCIDAC) && !tp->totl_rx_replenished && !tp->cur_rx) {
void __iomem *ioaddr = tp->mmio_addr;
netif_info(tp, intr, dev, "disabling PCI DAC\n");
@@ -4541,7 +4729,7 @@ static void rtl8169_tx_interrupt(struct
tx_left = tp->cur_tx - dirty_tx;
while (tx_left > 0) {
- unsigned int entry = dirty_tx % NUM_TX_DESC;
+ unsigned int entry = dirty_tx % tp->num_tx_allocd;
struct ring_info *tx_skb = tp->tx_skb + entry;
u32 status;
@@ -4597,29 +4785,110 @@ static inline void rtl8169_rx_csum(struc
skb_checksum_none_assert(skb);
}
-static struct sk_buff *rtl8169_try_rx_copy(void *data,
- struct rtl8169_private *tp,
- int pkt_size,
- dma_addr_t addr)
+/* rtl8169_rx_deliver : delivers one rx skb up to higher netif layer
+ * and copies or replenishes the skb as needed.
+ * @tp -> private cb for this NIC
+ * @entry == index of rx descriptor in ring
+ * @polling == whether polling or not (see comments for rx_interrupt)
+ * we guarantee that the received packet will be passed up to the
higher layer.
+ * we also try to ensure that a buffer is available for next receive
on this skb,
+ * but do not guarantee that.
+ * This function does not write or read to the asic registers
+ * and does not return any return code - work is reported via the
descriptors.
+ * "original" skb means the one previously in the ring
+ * "returned" skb means the one passed up
+ * these may be the same or different :
+ * if packet size sufficiently small relative to rx_copybreak mod
parm,
+ * then try to copy the entire active skb to a new one, and,
+ * if successful, return the new and leave the original as active.
+ * otherwise, return the original and try to replenish the ring.
+ */
+
+void rtl8169_rx_deliver(struct rtl8169_private *tp, unsigned int entry,
+ int polling)
{
- struct sk_buff *skb;
- struct device *d = &tp->pci_dev->dev;
+ struct RxDesc *desc;
+ u32 opts1;
+ struct sk_buff *original_skb;
+ struct sk_buff *returned_skb;
+ dma_addr_t addr;
+ int pkt_size;
+ struct pci_dev *pdev;
+
+ desc = tp->RxDescArray + entry;
+ opts1 = le32_to_cpu(desc->opts1);
+ original_skb = tp->Rx_skbuff[entry];
+ addr = le64_to_cpu(desc->addr);
+ pkt_size = (opts1 & 0x00001FFF) - 4;
+ pdev = tp->pci_dev;
+
+ dprintk
+ ("rtl8169_rx_deliver entry= %d opts1= 0x%X pkt_size= %d
polling= 0x%X\n",
+ entry, opts1, pkt_size, polling);
+
+ if (pkt_size < rx_copybreak) {
+ returned_skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size);
+ if (returned_skb) {
+ dma_sync_single_for_cpu(&pdev->dev, addr, pkt_size,
+ PCI_DMA_FROMDEVICE);
+ prefetch(original_skb->data);
+ memcpy(returned_skb->data, original_skb->data,
+ pkt_size);
+ dma_sync_single_for_device(&pdev->dev, addr, pkt_size,
+ PCI_DMA_FROMDEVICE);
+ rtl8169_mark_to_asic(desc, rx_buf_sz);
+ tp->totl_rx_replenished++;
+ tp->copied_rx_pkt_count++;
+ } else {
+ /* can't replenish (out of storage ) */
+ rtl8169_make_unusable_by_asic(desc);
+ dma_unmap_single(&pdev->dev, addr, rx_buf_sz,
+ PCI_DMA_FROMDEVICE);
+ tp->Rx_skbuff[entry] = NULL;
+ returned_skb = original_skb;
+ tp->totl_rx_alloc_fail++;
+ }
+ } else {
+ returned_skb = original_skb;
+ dma_unmap_single(&pdev->dev, addr, rx_buf_sz,
+ PCI_DMA_FROMDEVICE);
+ /* following may fail in which case it sets the skbuff ptr to 0 */
+#ifdef RTL8169_DEBUG
+ /* to simulate alloc failure every n attempts */
+ if (simulate_alloc_fail && ((simulate_alloc_fail & entry) != 0))
+ tp->Rx_skbuff[entry] = 0;
+ else
+#endif /* RTL8169_DEBUG */
+ tp->Rx_skbuff[entry] =
+ rtl8169_alloc_rx_skb(tp, desc, GFP_ATOMIC);
+ if (tp->Rx_skbuff[entry]) {
+ tp->totl_rx_replenished++;
+ } else {
+ rtl8169_make_unusable_by_asic(desc);
+ tp->totl_rx_alloc_fail++;
+ }
+ }
- data = rtl8169_align(data);
- dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
- prefetch(data);
- skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size);
- if (skb)
- memcpy(skb->data, data, pkt_size);
- dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
+ rtl8169_rx_csum(returned_skb, opts1);
+ skb_put(returned_skb, pkt_size);
+ returned_skb->protocol = eth_type_trans(returned_skb, tp->dev);
+
+ rtl8169_rx_vlan_tag(desc, returned_skb);
+
+ if (likely(polling)) {
+ napi_gro_receive(&tp->napi, returned_skb);
+ dprintk("rtl8169_rx_deliver explicit napi_gro_receive\n");
+ } else {
+ netif_rx(returned_skb);
+ dprintk("rtl8169_rx_deliver explicit netif_rx\n");
+ }
- return skb;
}
/*
* Warning : rtl8169_rx_interrupt() might be called :
* 1) from NAPI (softirq) context
- * (polling = 1 : we should call netif_receive_skb())
+ * (polling = 1 : we should call napi_gro_receive())
* 2) from process context (rtl8169_reset_task())
* (polling = 0 : we must call netif_rx() instead)
*/
@@ -4628,71 +4897,55 @@ static int rtl8169_rx_interrupt(struct n
void __iomem *ioaddr, u32 budget)
{
unsigned int cur_rx, rx_left;
- unsigned int count;
+
+ unsigned int replenish_rx_cursor_delta; /* amount by which to
advance cursor */
+ unsigned int count; /* number of completed buffers handled in
this call */
+ unsigned int number_to_replenish; /* num buffers to replenish after
delivering */
int polling = (budget != ~(u32)0) ? 1 : 0;
cur_rx = tp->cur_rx;
- rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
+ rx_left = tp->num_rx_allocd + tp->totl_rx_replenished - cur_rx;
rx_left = min(rx_left, budget);
for (; rx_left > 0; rx_left--, cur_rx++) {
- unsigned int entry = cur_rx % NUM_RX_DESC;
+ unsigned int entry = cur_rx % tp->num_rx_allocd;
struct RxDesc *desc = tp->RxDescArray + entry;
- u32 status;
+ u32 opts1;
rmb();
- status = le32_to_cpu(desc->opts1);
+ opts1 = le32_to_cpu(desc->opts1);
- if (status & DescOwn)
+ if (opts1 & DescOwn)
break;
- if (unlikely(status & RxRES)) {
- netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
- status);
+ if (unlikely(opts1 & RxRES)) {
+ netif_info(tp, rx_err, dev, "Rx ERROR. opts1 = %08x\n",
+ opts1);
dev->stats.rx_errors++;
- if (status & (RxRWT | RxRUNT))
+ if (opts1 & (RxRWT | RxRUNT))
dev->stats.rx_length_errors++;
- if (status & RxCRC)
+ if (opts1 & RxCRC)
dev->stats.rx_crc_errors++;
- if (status & RxFOVF) {
+ if (opts1 & RxFOVF) {
rtl8169_schedule_work(dev, rtl8169_reset_task);
dev->stats.rx_fifo_errors++;
}
rtl8169_mark_to_asic(desc, rx_buf_sz);
} else {
- struct sk_buff *skb;
- dma_addr_t addr = le64_to_cpu(desc->addr);
- int pkt_size = (status & 0x00001FFF) - 4;
+ int pkt_size = (opts1 & 0x00001FFF) - 4;
/*
* The driver does not support incoming fragmented
* frames. They are seen as a symptom of over-mtu
* sized frames.
*/
- if (unlikely(rtl8169_fragmented_frame(status))) {
+ if (unlikely(rtl8169_fragmented_frame(opts1))) {
dev->stats.rx_dropped++;
dev->stats.rx_length_errors++;
rtl8169_mark_to_asic(desc, rx_buf_sz);
continue;
}
- skb = rtl8169_try_rx_copy(tp->Rx_databuff[entry],
- tp, pkt_size, addr);
- rtl8169_mark_to_asic(desc, rx_buf_sz);
- if (!skb) {
- dev->stats.rx_dropped++;
- continue;
- }
-
- rtl8169_rx_csum(skb, status);
- skb_put(skb, pkt_size);
- skb->protocol = eth_type_trans(skb, dev);
-
- rtl8169_rx_vlan_tag(desc, skb);
-
- if (likely(polling))
- napi_gro_receive(&tp->napi, skb);
- else
- netif_rx(skb);
+ rtl8169_rx_deliver(tp, entry, polling);
dev->stats.rx_bytes += pkt_size;
dev->stats.rx_packets++;
@@ -4706,10 +4959,36 @@ static int rtl8169_rx_interrupt(struct n
}
}
- count = cur_rx - tp->cur_rx;
+ replenish_rx_cursor_delta = count = cur_rx - tp->cur_rx;
tp->cur_rx = cur_rx;
- tp->dirty_rx += count;
+ /* try to replenish buffers that any previous rtl8169_rx_deliver
+ * failed to. Note that these may not be contiguous -
+ * alloc success and fail may be interleaved.
+ * replenish_rx_cursor marks the earliest unreplenished.
+ */
+
+ number_to_replenish = (tp->cur_rx - tp->totl_rx_replenished);
+
+ if (number_to_replenish > 0) {
+ replenish_rx_cursor_delta =
+ rtl8169_rx_fill(tp, tp->replenish_rx_cursor, 0,
+ &number_to_replenish, GFP_ATOMIC);
+ if (!replenish_rx_cursor_delta)
+ netif_info(tp, intr, dev, "no Rx buffer allocated\n");
+ tp->totl_rx_replenished += number_to_replenish;
+ }
+ tp->replenish_rx_cursor =
+ ((tp->replenish_rx_cursor +
+ replenish_rx_cursor_delta) % tp->num_rx_allocd);
+
+ /*
+ * exhaustion of available buffers may kill the Rx process.
+ * the previous code tries to replenish but may fail. To prevent that,
+ * set or let default rx_copybreak to maximum value to copy every
buffer.
+ */
+ if ((tp->totl_rx_replenished + tp->num_rx_allocd) == tp->cur_rx)
+ netif_emerg(tp, intr, dev, "Rx buffers exhausted\n");
return count;
}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists