lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Wed, 20 Apr 2011 23:52:25 -0400 From: John Lumby <johnlumby@...mail.com> To: Francois Romieu <romieu@...zoreil.com> CC: netdev@...r.kernel.org, Ben Hutchings <bhutchings@...arflare.com>, nic_swsd@...ltek.com Subject: Re: r8169 : always copying the rx buffer to new skb On 04/20/11 15:13, Francois Romieu wrote: > > Why don't you send the patch through the mailing list ? > > (hint, hint) > based on 2.6.39-rc2. also has changes for ethtool - . get and set ring parms (suggested by Ben) . get and set rx_copybreak - not sure if this is a good idea or not, as it's a driver parm, not NIC setting, but there are 22 net drivers that have the parm so I thought maybe useful. ------------------------------------------------------------------------------------- --- linux-2.6.39-rc2FCrtl/drivers/net/r8169.c.orig 2011-04-05 21:30:43.000000000 -0400 +++ linux-2.6.39-rc2FCrtl/drivers/net/r8169.c 2011-04-20 21:34:42.000000000 -0400 @@ -56,7 +56,7 @@ (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN) #define TX_BUFFS_AVAIL(tp) \ - (tp->dirty_tx + NUM_TX_DESC - tp->cur_tx - 1) + (tp->dirty_tx + tp->num_tx_allocd - tp->cur_tx - 1) /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). The RTL chips use a 64 element hash table based on the Ethernet CRC. */ @@ -74,11 +74,19 @@ static const int multicast_filter_limit #define R8169_REGS_SIZE 256 #define R8169_NAPI_WEIGHT 64 -#define NUM_TX_DESC 64 /* Number of Tx descriptor registers */ -#define NUM_RX_DESC 256 /* Number of Rx descriptor registers */ -#define RX_BUF_SIZE 1536 /* Rx Buffer size */ -#define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) -#define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc)) +/* #define NUM_TX_DESC 64 Number of Tx descriptor registers is now based on variable num_tx_allocd */ +/* #define NUM_RX_DESC 256 Number of in-use Rx descriptor registers is now based on variable num_rx_allocd : + see comments attached to definition of that variable */ +#define MIN_NUM_RX_DESC 16 /* minimum number of Rx descriptor registers with which the chip can operate ? */ +#define MAX_NUM_RX_DESC 256 /* maximum number of Rx descriptor registers with which the chip can operate ? */ +#define MIN_NUM_TX_DESC 16 /* minimum number of Tx descriptor registers with which the chip can operate ? */ +#define MAX_NUM_TX_DESC 64 /* maximum number of Tx descriptor registers with which the chip can operate ? */ + + /* number of in-use Rx descriptors is based on variable num_rx_allocd + ** and num_rx_allocd is always <= num_rx_requested value + */ +#define R8169_RX_RING_BYTES (tp->num_rx_requested * sizeof(struct RxDesc)) +#define R8169_TX_RING_BYTES (tp->num_tx_requested * sizeof(struct TxDesc)) #define RTL8169_TX_TIMEOUT (6*HZ) #define RTL8169_PHY_TIMEOUT (10*HZ) @@ -198,12 +206,23 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_p MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl); -static int rx_buf_sz = 16383; +static const int rx_buf_sz = 16383; +/* + * we set our default copybreak very high to eliminate + * the possibility of running out of receive buffers. + * HOWEVER lowering it will reduce memcpying + * and may improve performance significantly. + */ +static int rx_copybreak = 16383; static int use_dac; static struct { u32 msg_enable; -} debug = { -1 }; +} debug = { +-1}; +#ifdef RTL8169_DEBUG +static int simulate_alloc_fail = 0; /* set to (P-1) to fail alloc on all except every P attempts */ +#endif /* RTL8169_DEBUG */ enum rtl_registers { MAC0 = 0, /* Ethernet hardware address. */ MAC4 = 4, @@ -522,16 +541,50 @@ struct rtl8169_private { u32 msg_enable; int chipset; int mac_version; - u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ - u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ - u32 dirty_rx; - u32 dirty_tx; + + /* Note - re number of Rx/Tx descriptor buffers allocated : + ** we maintain two values per ring - requested and allocd. + ** requested can be set by ethtool and defaults to the max permitted + ** allocd is the number actually obtained at open and may be less than + ** requested, but provided it is at least the minimum required, we'll continue. + ** ethtool setting is asynchronous and takes effect at next open. + ** The num_xx_allocd count is used as modulus for + ** locating active entries in the array using logic like this snippet + ** in rtl8169_rx_interrupt : + ** entry = cur_rx % num_rx_allocd; + ** The size of each array of per-ring-element thingy is always the maximum. + ** + ** at present, with the tx ring info embedded in private, + ** it is a bit silly pretending to provide a settable tx_requested, + ** but if desired, at expense of extra ptr deref, + ** could change it to an array of pointers + */ + u32 num_tx_requested; /* num Tx buffers requested */ + u32 num_rx_requested; /* num Rx buffers requested */ + u32 num_tx_allocd; /* num Tx descriptor buffers allocated */ + u32 num_rx_allocd; /* num Rx descriptor buffers allocated */ + + /* note - the following two counters are monotonically-ascending - can be thought of + ** as the count of number of buffers which the hardware has accessed. + */ + u32 cur_rx; /* Index of next Rx pkt. */ + u32 cur_tx; /* Index of next Tx pkt. */ + + u32 totl_rx_replenished; /* monotonically-ascending count of replenished buffers */ + u32 replenish_rx_cursor; /* Index of next Rx pkt. to replenish (modulo, not monotonic) */ + /* the following counts pkts copied as opposed to uncopied (unhooked) */ + /* note - count of uncopied packets = cur_rx - copied_rx_pkt_count */ + u32 copied_rx_pkt_count; /* total pkts copied to new skb */ + u32 totl_rx_alloc_fail; /* rx alloc failures */ + u32 dirty_tx; /* monotonic count of transmitted packets (or fragments?) */ struct TxDesc *TxDescArray; /* 256-aligned Tx descriptor ring */ struct RxDesc *RxDescArray; /* 256-aligned Rx descriptor ring */ dma_addr_t TxPhyAddr; dma_addr_t RxPhyAddr; - void *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */ - struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */ + struct sk_buff *Rx_skbuff[MAX_NUM_RX_DESC]; /* Rx data buffers */ + struct ring_info tx_skb[MAX_NUM_TX_DESC]; /* Tx data buffers */ + + unsigned align; struct timer_list timer; u16 cp_cmd; u16 intr_event; @@ -569,6 +622,14 @@ struct rtl8169_private { MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@...r.kernel.org>"); MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver"); +module_param(rx_copybreak, int, 0); +MODULE_PARM_DESC(rx_copybreak, "Copy breakpoint for copy-only-tiny-frames"); +#ifdef RTL8169_DEBUG +module_param(simulate_alloc_fail, int, 0); +MODULE_PARM_DESC(simulate_alloc_fail, + "set to (2**P - 1) eg 15, to fail alloc rx skb on all except every 2**P attempts"); +#endif /* RTL8169_DEBUG */ + module_param(use_dac, int, 0); MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot."); module_param_named(debug, debug.msg_enable, int, 0); @@ -583,7 +644,7 @@ static int rtl8169_open(struct net_devic static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance); -static int rtl8169_init_ring(struct net_device *dev); +static int rtl8169_init_ring(struct rtl8169_private *tp); static void rtl_hw_start(struct net_device *dev); static int rtl8169_close(struct net_device *dev); static void rtl_set_rx_mode(struct net_device *dev); @@ -1242,6 +1303,15 @@ static int rtl8169_set_settings(struct n spin_lock_irqsave(&tp->lock, flags); ret = rtl8169_set_speed(dev, cmd->autoneg, cmd->speed, cmd->duplex, cmd->advertising); + + /* check that ethtool has set a copybreak value before accepting it */ + if ( (cmd->supported & (SUPPORTED_cmd_extension | + SUPPORTED_cmd_extension_rx_copybreak)) + && (cmd->rx_copybreak <= rx_buf_sz) ) { + rx_copybreak = cmd->rx_copybreak; + netif_info(tp, drv, dev, "set rx_copybreak to %d\n", + rx_copybreak); + } spin_unlock_irqrestore(&tp->lock, flags); return ret; @@ -1254,6 +1324,49 @@ static u32 rtl8169_get_rx_csum(struct ne return tp->cp_cmd & RxChkSum; } +static void rtl8169_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct rtl8169_private *tp = netdev_priv(netdev); + + ring->rx_max_pending = MAX_NUM_RX_DESC; + ring->tx_max_pending = MAX_NUM_TX_DESC; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = tp->num_rx_allocd; + ring->tx_pending = tp->num_tx_allocd; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int rtl8169_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct rtl8169_private *tp = netdev_priv(netdev); + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + + /* I am not sure about closing and opening the NIC here + * so will leave the change pending for next open + */ + + tp->num_rx_requested = ((ring->rx_pending < MIN_NUM_RX_DESC) ? + MIN_NUM_RX_DESC : + ((ring->rx_pending > MAX_NUM_RX_DESC) ? + MAX_NUM_RX_DESC : ring->rx_pending)); + tp->num_tx_requested = ((ring->tx_pending < MIN_NUM_TX_DESC) ? + MIN_NUM_TX_DESC : + ((ring->tx_pending > MAX_NUM_TX_DESC) ? + MAX_NUM_TX_DESC : ring->tx_pending)); + + netif_info(tp, drv, netdev, + "Ring sizes to be requested at next open: num rx: %d, num tx %d\n", + tp->num_rx_requested, tp->num_tx_requested); + + return 0; +} + static int rtl8169_set_rx_csum(struct net_device *dev, u32 data) { struct rtl8169_private *tp = netdev_priv(dev); @@ -1351,6 +1464,13 @@ static int rtl8169_get_settings(struct n rc = tp->get_settings(dev, cmd); + /* inform about returning extended info - rx_copybreak + * and initialize so we can detect if set to new val by ethtool + */ + cmd->rx_copybreak = rx_copybreak; + cmd->supported |= SUPPORTED_cmd_extension; + cmd->supported &= ~SUPPORTED_cmd_extension_rx_copybreak; + spin_unlock_irqrestore(&tp->lock, flags); return rc; } @@ -1397,6 +1517,11 @@ static const char rtl8169_gstrings[][ETH "multicast", "tx_aborted", "tx_underrun", + /* extras maintained in driver code */ + "tot rx intrpts", + "tot rx copied", + "tot rx replenished", + "tot rx alloc_fail" }; static int rtl8169_get_sset_count(struct net_device *dev, int sset) @@ -1472,9 +1597,15 @@ static void rtl8169_get_ethtool_stats(st data[10] = le32_to_cpu(tp->counters.rx_multicast); data[11] = le16_to_cpu(tp->counters.tx_aborted); data[12] = le16_to_cpu(tp->counters.tx_underun); + /* extras maintained in driver code */ + data[13] = tp->cur_rx; + data[14] = tp->copied_rx_pkt_count; + data[15] = tp->totl_rx_replenished; + data[16] = tp->totl_rx_alloc_fail; } -static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data) +static void rtl8169_get_strings(struct net_device *dev, u32 stringset, + u8 * data) { switch(stringset) { case ETH_SS_STATS: @@ -1516,6 +1647,8 @@ static const struct ethtool_ops rtl8169_ .get_rx_csum = rtl8169_get_rx_csum, .set_rx_csum = rtl8169_set_rx_csum, .set_tx_csum = ethtool_op_set_tx_csum, + .get_ringparam = rtl8169_get_ringparam, + .set_ringparam = rtl8169_set_ringparam, .set_sg = ethtool_op_set_sg, .set_tso = ethtool_op_set_tso, .get_regs = rtl8169_get_regs, @@ -3060,6 +3193,10 @@ rtl8169_init_one(struct pci_dev *pdev, c tp->pci_dev = pdev; tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT); + tp->num_rx_allocd = tp->num_tx_allocd = 0; + tp->num_rx_requested = MAX_NUM_RX_DESC; + tp->num_tx_requested = MAX_NUM_TX_DESC; + mii = &tp->mii; mii->dev = dev; mii->mdio_read = rtl_mdio_read; @@ -3229,6 +3366,7 @@ rtl8169_init_one(struct pci_dev *pdev, c dev->features |= NETIF_F_HW_VLAN_TX_RX | NETIF_F_GRO; tp->intr_mask = 0xffff; + tp->align = cfg->align; tp->hw_start = cfg->hw_start; tp->intr_event = cfg->intr_event; tp->napi_event = cfg->napi_event; @@ -3326,7 +3464,7 @@ static int rtl8169_open(struct net_devic if (!tp->RxDescArray) goto err_free_tx_0; - retval = rtl8169_init_ring(dev); + retval = rtl8169_init_ring(tp); if (retval < 0) goto err_free_rx_1; @@ -4071,14 +4209,15 @@ static inline void rtl8169_make_unusable desc->opts1 &= ~cpu_to_le32(DescOwn | RsvdMask); } -static void rtl8169_free_rx_databuff(struct rtl8169_private *tp, - void **data_buff, struct RxDesc *desc) +static void rtl8169_free_rx_skb(struct rtl8169_private *tp, + struct sk_buff **sk_buff, struct RxDesc *desc) { - dma_unmap_single(&tp->pci_dev->dev, le64_to_cpu(desc->addr), rx_buf_sz, - DMA_FROM_DEVICE); + struct pci_dev *pdev = tp->pci_dev; - kfree(*data_buff); - *data_buff = NULL; + dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), rx_buf_sz, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(*sk_buff); /* also frees the data buffer! */ + *sk_buff = NULL; rtl8169_make_unusable_by_asic(desc); } @@ -4102,28 +4241,25 @@ static inline void *rtl8169_align(void * return (void *)ALIGN((long)data, 16); } -static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp, - struct RxDesc *desc) +static struct sk_buff *rtl8169_alloc_rx_skb(struct rtl8169_private *tp, + struct RxDesc *desc, gfp_t gfp) { - void *data; + struct sk_buff *skb; dma_addr_t mapping; struct device *d = &tp->pci_dev->dev; struct net_device *dev = tp->dev; - int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; + unsigned int pad; - data = kmalloc_node(rx_buf_sz, GFP_KERNEL, node); - if (!data) - return NULL; + pad = tp->align ? tp->align : NET_IP_ALIGN; - if (rtl8169_align(data) != data) { - kfree(data); - data = kmalloc_node(rx_buf_sz + 15, GFP_KERNEL, node); - if (!data) - return NULL; - } + skb = __netdev_alloc_skb(dev, rx_buf_sz + pad, gfp); + if (!skb) + goto err_out; + + skb_reserve(skb, + tp->align ? ((pad - 1) & (unsigned long)skb->data) : pad); - mapping = dma_map_single(d, rtl8169_align(data), rx_buf_sz, - DMA_FROM_DEVICE); + mapping = dma_map_single(d, skb->data, rx_buf_sz, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(d, mapping))) { if (net_ratelimit()) netif_err(tp, drv, tp->dev, "Failed to map RX DMA!\n"); @@ -4131,23 +4267,25 @@ static struct sk_buff *rtl8169_alloc_rx_ } rtl8169_map_to_asic(desc, mapping, rx_buf_sz); - return data; +out: + return skb; err_out: - kfree(data); - return NULL; + rtl8169_make_unusable_by_asic(desc); + goto out; } static void rtl8169_rx_clear(struct rtl8169_private *tp) { unsigned int i; - for (i = 0; i < NUM_RX_DESC; i++) { - if (tp->Rx_databuff[i]) { - rtl8169_free_rx_databuff(tp, tp->Rx_databuff + i, + for (i = 0; i < tp->num_rx_allocd; i++) { + if (tp->Rx_skbuff[i]) { + rtl8169_free_rx_skb(tp, tp->Rx_skbuff + i, tp->RxDescArray + i); } } + tp->num_rx_allocd = 0; /* no rx descriptors allocated any more ! */ } static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc) @@ -4155,47 +4293,92 @@ static inline void rtl8169_mark_as_last_ desc->opts1 |= cpu_to_le32(RingEnd); } -static int rtl8169_rx_fill(struct rtl8169_private *tp) +/* rtl8169_rx_fill :allocate num_to_alloc rx skb buffers to rx descriptors + * starting with descriptor first_desc. + * this function operates in one of two slightly different modes, + * depending on whether the num_replenished parm is zero or not : + * zero - traverse a fixed number of buffers specified by num_to_alloc, + * allocating those which are empty; + * non-zero - traverse as many buffers as needed + * to replenish num_replenished empty buffers, + * and update the parm with number actually replenished. + * in each case, stop if unable to allocate, + * and in each case return number of buffers traversed. + */ +static u32 rtl8169_rx_fill(struct rtl8169_private *tp, u32 first_desc, + u32 num_to_alloc, u32 * num_replenished, gfp_t gfp) { - unsigned int i; + unsigned int this_desc_index; /* loop through on this */ + u32 count_allocd; /* count allocd */ + u32 num_traversed; /* count num traversed */ + + for (count_allocd = 0, num_traversed = 0, this_desc_index = first_desc; + ((num_replenished && (count_allocd < *num_replenished)) + || (num_traversed < num_to_alloc) + ); num_traversed++) { + struct sk_buff *skb; - for (i = 0; i < NUM_RX_DESC; i++) { - void *data; + if (tp->Rx_skbuff[this_desc_index] == (struct sk_buff *)0) { /* bypass if allocd */ - if (tp->Rx_databuff[i]) - continue; + skb = + rtl8169_alloc_rx_skb(tp, + tp->RxDescArray + + this_desc_index, gfp); + if (!skb) + break; - data = rtl8169_alloc_rx_data(tp, tp->RxDescArray + i); - if (!data) { - rtl8169_make_unusable_by_asic(tp->RxDescArray + i); - goto err_out; - } - tp->Rx_databuff[i] = data; + tp->Rx_skbuff[this_desc_index] = skb; + count_allocd++; } - rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1); - return 0; + /* increment this_desc_index allowing for modulo num_rx_allocd if latter is > 0 + * also ensuring we stop after one complete circuit + */ + this_desc_index++; + if (this_desc_index == tp->num_rx_allocd) { + this_desc_index = 0; + } + if (this_desc_index == first_desc) { + break; + } + } -err_out: - rtl8169_rx_clear(tp); - return -ENOMEM; + if (num_replenished) + *num_replenished = count_allocd; + return num_traversed; } static void rtl8169_init_ring_indexes(struct rtl8169_private *tp) { - tp->dirty_tx = tp->dirty_rx = tp->cur_tx = tp->cur_rx = 0; + tp->dirty_tx = tp->totl_rx_replenished = tp->totl_rx_alloc_fail = + tp->cur_tx = tp->cur_rx = tp->replenish_rx_cursor = 0; } -static int rtl8169_init_ring(struct net_device *dev) +static int rtl8169_init_ring(struct rtl8169_private *tp) { - struct rtl8169_private *tp = netdev_priv(dev); rtl8169_init_ring_indexes(tp); - memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info)); - memset(tp->Rx_databuff, 0x0, NUM_RX_DESC * sizeof(void *)); + memset(tp->tx_skb, 0x0, MAX_NUM_TX_DESC * sizeof(struct ring_info)); + memset(tp->Rx_skbuff, 0x0, MAX_NUM_RX_DESC * sizeof(struct sk_buff *)); + tp->copied_rx_pkt_count = 0; + + /* see comment preceding defn of num_tx_requested */ + tp->num_tx_allocd = tp->num_tx_requested; + tp->num_rx_allocd = + rtl8169_rx_fill(tp, 0, (u32) tp->num_rx_requested, 0, GFP_KERNEL); + printk(KERN_INFO "%s num_rx_requested= %d num_rx_allocd= %d\n", + MODULENAME, (u32) tp->num_rx_requested, tp->num_rx_allocd); + if (tp->num_rx_allocd < MIN_NUM_RX_DESC) + goto err_out; + + rtl8169_mark_as_last_descriptor(tp->RxDescArray + tp->num_rx_allocd - 1); - return rtl8169_rx_fill(tp); + return 0; + +err_out: + rtl8169_rx_clear(tp); + return -ENOMEM; } static void rtl8169_unmap_tx_skb(struct device *d, struct ring_info *tx_skb, @@ -4217,7 +4400,7 @@ static void rtl8169_tx_clear_range(struc unsigned int i; for (i = 0; i < n; i++) { - unsigned int entry = (start + i) % NUM_TX_DESC; + unsigned int entry = (start + i) % tp->num_tx_allocd; struct ring_info *tx_skb = tp->tx_skb + entry; unsigned int len = tx_skb->len; @@ -4237,7 +4420,7 @@ static void rtl8169_tx_clear_range(struc static void rtl8169_tx_clear(struct rtl8169_private *tp) { - rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC); + rtl8169_tx_clear_range(tp, tp->dirty_tx, tp->num_tx_allocd); tp->cur_tx = tp->dirty_tx = 0; } @@ -4310,7 +4493,7 @@ static void rtl8169_reset_task(struct wo rtl8169_rx_interrupt(dev, tp, tp->mmio_addr, ~(u32)0); rtl8169_tx_clear(tp); - if (tp->dirty_rx == tp->cur_rx) { + if (tp->totl_rx_replenished == tp->cur_rx) { rtl8169_init_ring_indexes(tp); rtl_hw_start(dev); netif_wake_queue(dev); @@ -4350,7 +4533,7 @@ static int rtl8169_xmit_frags(struct rtl u32 status, len; void *addr; - entry = (entry + 1) % NUM_TX_DESC; + entry = (entry + 1) % tp->num_tx_allocd; txd = tp->TxDescArray + entry; len = frag->size; @@ -4364,7 +4547,9 @@ static int rtl8169_xmit_frags(struct rtl } /* anti gcc 2.95.3 bugware (sic) */ - status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); + status = + opts1 | len | (RingEnd * + !((entry + 1) % tp->num_tx_allocd)); txd->opts1 = cpu_to_le32(status); txd->addr = cpu_to_le64(mapping); @@ -4408,7 +4593,7 @@ static netdev_tx_t rtl8169_start_xmit(st struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); - unsigned int entry = tp->cur_tx % NUM_TX_DESC; + unsigned int entry = tp->cur_tx % tp->num_tx_allocd; struct TxDesc *txd = tp->TxDescArray + entry; void __iomem *ioaddr = tp->mmio_addr; struct device *d = &tp->pci_dev->dev; @@ -4418,7 +4603,8 @@ static netdev_tx_t rtl8169_start_xmit(st int frags; if (unlikely(TX_BUFFS_AVAIL(tp) < skb_shinfo(skb)->nr_frags)) { - netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n"); + netif_err(tp, drv, dev, + "BUG! Tx Ring full when queue awake!\n"); goto err_stop_0; } @@ -4452,7 +4638,7 @@ static netdev_tx_t rtl8169_start_xmit(st wmb(); /* anti gcc 2.95.3 bugware (sic) */ - status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC)); + status = opts1 | len | (RingEnd * !((entry + 1) % tp->num_tx_allocd)); txd->opts1 = cpu_to_le32(status); tp->cur_tx += frags + 1; @@ -4512,11 +4698,13 @@ static void rtl8169_pcierr_interrupt(str pci_write_config_word(pdev, PCI_STATUS, pci_status & (PCI_STATUS_DETECTED_PARITY | - PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_REC_MASTER_ABORT | - PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT)); + PCI_STATUS_SIG_SYSTEM_ERROR | + PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_REC_TARGET_ABORT | + PCI_STATUS_SIG_TARGET_ABORT)); /* The infamous DAC f*ckup only happens at boot time */ - if ((tp->cp_cmd & PCIDAC) && !tp->dirty_rx && !tp->cur_rx) { + if ((tp->cp_cmd & PCIDAC) && !tp->totl_rx_replenished && !tp->cur_rx) { void __iomem *ioaddr = tp->mmio_addr; netif_info(tp, intr, dev, "disabling PCI DAC\n"); @@ -4541,7 +4729,7 @@ static void rtl8169_tx_interrupt(struct tx_left = tp->cur_tx - dirty_tx; while (tx_left > 0) { - unsigned int entry = dirty_tx % NUM_TX_DESC; + unsigned int entry = dirty_tx % tp->num_tx_allocd; struct ring_info *tx_skb = tp->tx_skb + entry; u32 status; @@ -4597,29 +4785,110 @@ static inline void rtl8169_rx_csum(struc skb_checksum_none_assert(skb); } -static struct sk_buff *rtl8169_try_rx_copy(void *data, - struct rtl8169_private *tp, - int pkt_size, - dma_addr_t addr) +/* rtl8169_rx_deliver : delivers one rx skb up to higher netif layer + * and copies or replenishes the skb as needed. + * @tp -> private cb for this NIC + * @entry == index of rx descriptor in ring + * @polling == whether polling or not (see comments for rx_interrupt) + * we guarantee that the received packet will be passed up to the higher layer. + * we also try to ensure that a buffer is available for next receive on this skb, + * but do not guarantee that. + * This function does not write or read to the asic registers + * and does not return any return code - work is reported via the descriptors. + * "original" skb means the one previously in the ring + * "returned" skb means the one passed up + * these may be the same or different : + * if packet size sufficiently small relative to rx_copybreak mod parm, + * then try to copy the entire active skb to a new one, and, + * if successful, return the new and leave the original as active. + * otherwise, return the original and try to replenish the ring. + */ + +void rtl8169_rx_deliver(struct rtl8169_private *tp, unsigned int entry, + int polling) { - struct sk_buff *skb; - struct device *d = &tp->pci_dev->dev; + struct RxDesc *desc; + u32 opts1; + struct sk_buff *original_skb; + struct sk_buff *returned_skb; + dma_addr_t addr; + int pkt_size; + struct pci_dev *pdev; + + desc = tp->RxDescArray + entry; + opts1 = le32_to_cpu(desc->opts1); + original_skb = tp->Rx_skbuff[entry]; + addr = le64_to_cpu(desc->addr); + pkt_size = (opts1 & 0x00001FFF) - 4; + pdev = tp->pci_dev; + + dprintk + ("rtl8169_rx_deliver entry= %d opts1= 0x%X pkt_size= %d polling= 0x%X\n", + entry, opts1, pkt_size, polling); + + if (pkt_size < rx_copybreak) { + returned_skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size); + if (returned_skb) { + dma_sync_single_for_cpu(&pdev->dev, addr, pkt_size, + PCI_DMA_FROMDEVICE); + prefetch(original_skb->data); + memcpy(returned_skb->data, original_skb->data, + pkt_size); + dma_sync_single_for_device(&pdev->dev, addr, pkt_size, + PCI_DMA_FROMDEVICE); + rtl8169_mark_to_asic(desc, rx_buf_sz); + tp->totl_rx_replenished++; + tp->copied_rx_pkt_count++; + } else { + /* can't replenish (out of storage ) */ + rtl8169_make_unusable_by_asic(desc); + dma_unmap_single(&pdev->dev, addr, rx_buf_sz, + PCI_DMA_FROMDEVICE); + tp->Rx_skbuff[entry] = NULL; + returned_skb = original_skb; + tp->totl_rx_alloc_fail++; + } + } else { + returned_skb = original_skb; + dma_unmap_single(&pdev->dev, addr, rx_buf_sz, + PCI_DMA_FROMDEVICE); + /* following may fail in which case it sets the skbuff ptr to 0 */ +#ifdef RTL8169_DEBUG + /* to simulate alloc failure every n attempts */ + if (simulate_alloc_fail && ((simulate_alloc_fail & entry) != 0)) + tp->Rx_skbuff[entry] = 0; + else +#endif /* RTL8169_DEBUG */ + tp->Rx_skbuff[entry] = + rtl8169_alloc_rx_skb(tp, desc, GFP_ATOMIC); + if (tp->Rx_skbuff[entry]) { + tp->totl_rx_replenished++; + } else { + rtl8169_make_unusable_by_asic(desc); + tp->totl_rx_alloc_fail++; + } + } - data = rtl8169_align(data); - dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE); - prefetch(data); - skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size); - if (skb) - memcpy(skb->data, data, pkt_size); - dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); + rtl8169_rx_csum(returned_skb, opts1); + skb_put(returned_skb, pkt_size); + returned_skb->protocol = eth_type_trans(returned_skb, tp->dev); + + rtl8169_rx_vlan_tag(desc, returned_skb); + + if (likely(polling)) { + napi_gro_receive(&tp->napi, returned_skb); + dprintk("rtl8169_rx_deliver explicit napi_gro_receive\n"); + } else { + netif_rx(returned_skb); + dprintk("rtl8169_rx_deliver explicit netif_rx\n"); + } - return skb; } /* * Warning : rtl8169_rx_interrupt() might be called : * 1) from NAPI (softirq) context - * (polling = 1 : we should call netif_receive_skb()) + * (polling = 1 : we should call napi_gro_receive()) * 2) from process context (rtl8169_reset_task()) * (polling = 0 : we must call netif_rx() instead) */ @@ -4628,71 +4897,55 @@ static int rtl8169_rx_interrupt(struct n void __iomem *ioaddr, u32 budget) { unsigned int cur_rx, rx_left; - unsigned int count; + + unsigned int replenish_rx_cursor_delta; /* amount by which to advance cursor */ + unsigned int count; /* number of completed buffers handled in this call */ + unsigned int number_to_replenish; /* num buffers to replenish after delivering */ int polling = (budget != ~(u32)0) ? 1 : 0; cur_rx = tp->cur_rx; - rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx; + rx_left = tp->num_rx_allocd + tp->totl_rx_replenished - cur_rx; rx_left = min(rx_left, budget); for (; rx_left > 0; rx_left--, cur_rx++) { - unsigned int entry = cur_rx % NUM_RX_DESC; + unsigned int entry = cur_rx % tp->num_rx_allocd; struct RxDesc *desc = tp->RxDescArray + entry; - u32 status; + u32 opts1; rmb(); - status = le32_to_cpu(desc->opts1); + opts1 = le32_to_cpu(desc->opts1); - if (status & DescOwn) + if (opts1 & DescOwn) break; - if (unlikely(status & RxRES)) { - netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n", - status); + if (unlikely(opts1 & RxRES)) { + netif_info(tp, rx_err, dev, "Rx ERROR. opts1 = %08x\n", + opts1); dev->stats.rx_errors++; - if (status & (RxRWT | RxRUNT)) + if (opts1 & (RxRWT | RxRUNT)) dev->stats.rx_length_errors++; - if (status & RxCRC) + if (opts1 & RxCRC) dev->stats.rx_crc_errors++; - if (status & RxFOVF) { + if (opts1 & RxFOVF) { rtl8169_schedule_work(dev, rtl8169_reset_task); dev->stats.rx_fifo_errors++; } rtl8169_mark_to_asic(desc, rx_buf_sz); } else { - struct sk_buff *skb; - dma_addr_t addr = le64_to_cpu(desc->addr); - int pkt_size = (status & 0x00001FFF) - 4; + int pkt_size = (opts1 & 0x00001FFF) - 4; /* * The driver does not support incoming fragmented * frames. They are seen as a symptom of over-mtu * sized frames. */ - if (unlikely(rtl8169_fragmented_frame(status))) { + if (unlikely(rtl8169_fragmented_frame(opts1))) { dev->stats.rx_dropped++; dev->stats.rx_length_errors++; rtl8169_mark_to_asic(desc, rx_buf_sz); continue; } - skb = rtl8169_try_rx_copy(tp->Rx_databuff[entry], - tp, pkt_size, addr); - rtl8169_mark_to_asic(desc, rx_buf_sz); - if (!skb) { - dev->stats.rx_dropped++; - continue; - } - - rtl8169_rx_csum(skb, status); - skb_put(skb, pkt_size); - skb->protocol = eth_type_trans(skb, dev); - - rtl8169_rx_vlan_tag(desc, skb); - - if (likely(polling)) - napi_gro_receive(&tp->napi, skb); - else - netif_rx(skb); + rtl8169_rx_deliver(tp, entry, polling); dev->stats.rx_bytes += pkt_size; dev->stats.rx_packets++; @@ -4706,10 +4959,36 @@ static int rtl8169_rx_interrupt(struct n } } - count = cur_rx - tp->cur_rx; + replenish_rx_cursor_delta = count = cur_rx - tp->cur_rx; tp->cur_rx = cur_rx; - tp->dirty_rx += count; + /* try to replenish buffers that any previous rtl8169_rx_deliver + * failed to. Note that these may not be contiguous - + * alloc success and fail may be interleaved. + * replenish_rx_cursor marks the earliest unreplenished. + */ + + number_to_replenish = (tp->cur_rx - tp->totl_rx_replenished); + + if (number_to_replenish > 0) { + replenish_rx_cursor_delta = + rtl8169_rx_fill(tp, tp->replenish_rx_cursor, 0, + &number_to_replenish, GFP_ATOMIC); + if (!replenish_rx_cursor_delta) + netif_info(tp, intr, dev, "no Rx buffer allocated\n"); + tp->totl_rx_replenished += number_to_replenish; + } + tp->replenish_rx_cursor = + ((tp->replenish_rx_cursor + + replenish_rx_cursor_delta) % tp->num_rx_allocd); + + /* + * exhaustion of available buffers may kill the Rx process. + * the previous code tries to replenish but may fail. To prevent that, + * set or let default rx_copybreak to maximum value to copy every buffer. + */ + if ((tp->totl_rx_replenished + tp->num_rx_allocd) == tp->cur_rx) + netif_emerg(tp, intr, dev, "Rx buffers exhausted\n"); return count; } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists