lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Tue, 3 Jun 2008 10:16:20 -0700 From: Stephen Hemminger <shemminger@...tta.com> To: "Subbu Seetharaman" <subbus@...verengines.com> Cc: netdev@...r.kernel.org Subject: Re: [PATCH 2/12] BE NIC driver - interrupt, ethtool, stack i/f functions On Tue, 03 Jun 2008 02:39:11 -0700 "Subbu Seetharaman" <subbus@...verengines.com> wrote: > Signed-off-by: Subbu Seetharaman <subbus@...verengines.com> > --- > drivers/net/benet/be_ethtool.c | 337 ++++++++++++++++ > drivers/net/benet/be_int.c | 843 ++++++++++++++++++++++++++++++++++++++++ > drivers/net/benet/be_netif.c | 693 +++++++++++++++++++++++++++++++++ > 3 files changed, 1873 insertions(+), 0 deletions(-) > create mode 100644 drivers/net/benet/be_ethtool.c > create mode 100644 drivers/net/benet/be_int.c > create mode 100644 drivers/net/benet/be_netif.c > > diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c > new file mode 100644 > index 0000000..0841580 > --- /dev/null > +++ b/drivers/net/benet/be_ethtool.c > @@ -0,0 +1,337 @@ > +/* > + * Copyright (C) 2005 - 2008 ServerEngines > + * All rights reserved. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version 2 > + * as published by the Free Software Foundation. The full GNU General > + * Public License is included in this distribution in the file called COPYING. > + * > + * Contact Information: > + * linux-drivers@...verengines.com > + * > + * ServerEngines > + * 209 N. Fair Oaks Ave > + * Sunnyvale, CA 94085 > + */ > +/* > + * be_ethtool.c > + * > + * This file contains various functions that ethtool can use > + * to talk to the driver and the BE H/W. > + */ > + > +#include <linux/pci.h> > +#include "benet.h" > + > +#include <linux/ethtool.h> > + > +static const char benet_gstrings_stats[][ETH_GSTRING_LEN] = { > +/* net_device_stats */ > + "rx_packets", > + "tx_packets", > + "rx_bytes", > + "tx_bytes", > + "rx_errors", > + "tx_errors", > + "rx_dropped", > + "tx_dropped", > + "multicast", > + "collisions", > + "rx_length_errors", > + "rx_over_errors", > + "rx_crc_errors", > + "rx_frame_errors", > + "rx_fifo_errors", > + "rx_missed_errors", > + "tx_aborted_errors", > + "tx_carrier_errors", > + "tx_fifo_errors", > + "tx_heartbeat_errors", > + "tx_window_errors", > + "rx_compressed", > + "tc_compressed", > +/* BE driver Stats */ > + "bes_tx_reqs", > + "bes_tx_fails", > + "bes_fwd_reqs", > + "bes_tx_wrbs", > + "bes_interrupts", > + "bes_events", > + "bes_tx_events", > + "bes_ucrx_events", > + "bes_bcrx_events", > + "bes_tx_compl", > + "bes_ucrx_compl", > + "bes_bcrx_compl", > + "bes_ethrx_post_fail", > + "bes_802_3_dropped_frames", > + "bes_802_3_malformed_frames", > + "bes_rx_misc_pkts", > + "bes_eth_tx_rate", > + "bes_eth_rx_rate", > + "Num Packets collected", > + "Num Times Flushed", > +}; > + > +#define NET_DEV_STATS_LEN \ > + (sizeof(struct net_device_stats)/sizeof(unsigned long)) > +#define BENET_STATS_LEN ARRAY_SIZE(benet_gstrings_stats) > + > +static void > +be_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + > + strncpy(drvinfo->driver, be_driver_name, 32); > + strncpy(drvinfo->version, be_drvr_ver, 32); > + strncpy(drvinfo->fw_version, be_fw_ver, 32); > + strcpy(drvinfo->bus_info, pci_name(adapter->pdev)); > + drvinfo->testinfo_len = 0; > + drvinfo->regdump_len = 0; > + drvinfo->eedump_len = 0; > +} > + > +static int > +be_get_coalesce(struct net_device *netdev, > + struct ethtool_coalesce *coalesce) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + > + coalesce->rx_max_coalesced_frames = adapter->max_rx_coal; > + > + coalesce->rx_coalesce_usecs = adapter->cur_eqd; > + coalesce->rx_coalesce_usecs_high = adapter->max_eqd; > + coalesce->rx_coalesce_usecs_low = adapter->min_eqd; > + > + coalesce->tx_coalesce_usecs = adapter->cur_eqd; > + coalesce->tx_coalesce_usecs_high = adapter->max_eqd; > + coalesce->tx_coalesce_usecs_low = adapter->min_eqd; > + > + coalesce->use_adaptive_rx_coalesce = adapter->enable_aic; > + coalesce->use_adaptive_tx_coalesce = adapter->enable_aic; > + > + return 0; > +} > + > +/* > + * This routine is used to set interrup coalescing delay *as well as* > + * the number of pkts to coalesce for LRO. > + */ > +static int > +be_set_coalesce(struct net_device *netdev, > + struct ethtool_coalesce *coalesce) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + u32 max, min, cur; > + > + adapter->max_rx_coal = coalesce->rx_max_coalesced_frames; > + if (adapter->max_rx_coal >= BE_LRO_MAX_PKTS) > + adapter->max_rx_coal = BE_LRO_MAX_PKTS; > + > + if (adapter->enable_aic == 0 && > + coalesce->use_adaptive_rx_coalesce == 1) { > + /* if AIC is being turned on now, start with an EQD of 0 */ > + adapter->cur_eqd = 0; > + } > + adapter->enable_aic = coalesce->use_adaptive_rx_coalesce; > + > + /* round off to nearest multiple of 8 */ > + max = (((coalesce->rx_coalesce_usecs_high + 4) >> 3) << 3); > + min = (((coalesce->rx_coalesce_usecs_low + 4) >> 3) << 3); > + cur = (((coalesce->rx_coalesce_usecs + 4) >> 3) << 3); > + > + if (adapter->enable_aic) { > + /* accept low and high if AIC is enabled */ > + if (max > MAX_EQD) > + min = MAX_EQD; > + if (min > max) > + min = max; > + adapter->max_eqd = max; > + adapter->min_eqd = min; > + if (adapter->cur_eqd > max) > + adapter->cur_eqd = max; > + if (adapter->cur_eqd < min) > + adapter->cur_eqd = min; > + } else { > + /* accept specified coalesce_usecs only if AIC is disabled */ > + if (cur > MAX_EQD) > + cur = MAX_EQD; > + if (bni_change_eqd(pnob, cur) == BE_SUCCESS) > + adapter->cur_eqd = cur; > + } > + > + return 0; > +} > + > +static u32 be_get_rx_csum(struct net_device *netdev) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + return adapter->rx_csum; > +} > + > +static int be_set_rx_csum(struct net_device *netdev, uint32_t data) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + if (data) > + adapter->rx_csum = 1; > + else > + adapter->rx_csum = 0; > + > + return 0; > +} > + > +static void > +be_get_strings(struct net_device *netdev, uint32_t stringset, > + uint8_t *data) > +{ > + > + switch (stringset) { > + case ETH_SS_STATS: > + memcpy(data, *benet_gstrings_stats, > + sizeof(benet_gstrings_stats)); > + break; > + } > +} > + > +static int be_get_stats_count(struct net_device *netdev) > +{ > + return BENET_STATS_LEN; > +} > + > +static void > +be_get_ethtool_stats(struct net_device *netdev, > + struct ethtool_stats *stats, uint64_t *data) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + int i; > + > + benet_get_stats(netdev); > + > + for (i = 0; i <= NET_DEV_STATS_LEN; i++) > + data[i] = ((unsigned long *)&adapter->benet_stats)[i]; > + > + data[i] = adapter->be_stat.bes_tx_reqs; You can use dev->stats rather than adapter->be_stat. > + data[i++] = adapter->be_stat.bes_tx_fails; > + data[i++] = adapter->be_stat.bes_fwd_reqs; > + data[i++] = adapter->be_stat.bes_tx_wrbs; > + > + data[i++] = adapter->be_stat.bes_ints; > + data[i++] = adapter->be_stat.bes_events; > + data[i++] = adapter->be_stat.bes_tx_events; > + data[i++] = adapter->be_stat.bes_ucrx_events; > + data[i++] = adapter->be_stat.bes_bcrx_events; > + data[i++] = adapter->be_stat.bes_tx_compl; > + data[i++] = adapter->be_stat.bes_ucrx_compl; > + data[i++] = adapter->be_stat.bes_bcrx_compl; > + data[i++] = adapter->be_stat.bes_ethrx_post_fail; > + data[i++] = adapter->be_stat.bes_802_3_dropped_frames; > + data[i++] = adapter->be_stat.bes_802_3_malformed_frames; > + data[i++] = adapter->be_stat.bes_rx_misc_pkts; > + data[i++] = adapter->be_stat.bes_eth_tx_rate; > + data[i++] = adapter->be_stat.bes_eth_rx_rate; > + data[i++] = adapter->be_stat.bes_rx_coal; > + data[i++] = adapter->be_stat.bes_rx_flush; > + > +} > + > +/* Get the Ring parameters from the pnob */ > +static void > +be_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) > +{ > + struct bni_net_object *pnob = netdev->priv; > + > + /* Pre Set Maxims */ > + ring->rx_max_pending = pnob->rx_q_len; > + ring->rx_mini_max_pending = ring->rx_mini_max_pending; > + ring->rx_jumbo_max_pending = ring->rx_jumbo_max_pending; > + ring->tx_max_pending = pnob->tx_q_len; > + > + /* Current hardware Settings */ > + ring->rx_pending = atomic_read(&pnob->rx_q_posted); > + ring->rx_mini_pending = ring->rx_mini_pending; > + ring->rx_jumbo_pending = ring->rx_jumbo_pending; > + ring->tx_pending = atomic_read(&pnob->tx_q_used); > + > +} > + > +static void > +be_get_pauseparam(struct net_device *netdev, > + struct ethtool_pauseparam *ecmd) > +{ > + struct bni_net_object *pnob = netdev->priv; > + bool rxfc = FALSE; > + bool txfc = FALSE; > + BESTATUS status; > + > + status = bni_get_flow_ctl(&pnob->fn_obj, &txfc, &rxfc); > + if (status != BE_SUCCESS) > + printk(KERN_WARNING "Unable to get pause frame settings\n"); > + > + if (txfc == TRUE) > + ecmd->tx_pause = 1; > + else > + ecmd->tx_pause = 0; > + > + if (rxfc == TRUE) > + ecmd->rx_pause = 1; > + else > + ecmd->rx_pause = 0; > + > + /* Always setting autoneg to TRUE */ > + ecmd->autoneg = 1; > +} > + > +static int > +be_set_pauseparam(struct net_device *netdev, > + struct ethtool_pauseparam *ecmd) > +{ > + struct bni_net_object *pnob = netdev->priv; > + bool txfc = FALSE; > + bool rxfc = FALSE; > + BESTATUS status; > + > + if (ecmd->tx_pause) > + txfc = TRUE; > + else > + txfc = FALSE; > + > + if (ecmd->rx_pause) > + rxfc = TRUE; > + else > + rxfc = FALSE; > + > + status = bni_set_flow_ctll(&pnob->fn_obj, txfc, rxfc); > + if (status != BE_SUCCESS) { > + printk(KERN_ERR "Unable to set pause frame settings\n"); > + return -1; > + } > + return 0; > +} > + > +struct ethtool_ops be_ethtool_ops = { > + .get_drvinfo = be_get_drvinfo, > + .get_link = ethtool_op_get_link, > + .get_coalesce = be_get_coalesce, > + .set_coalesce = be_set_coalesce, > + .get_ringparam = be_get_ringparam, > + .get_pauseparam = be_get_pauseparam, > + .set_pauseparam = be_set_pauseparam, > + .get_rx_csum = be_get_rx_csum, > + .set_rx_csum = be_set_rx_csum, > + .get_tx_csum = ethtool_op_get_tx_csum, > + .set_tx_csum = ethtool_op_set_tx_csum, > + .get_sg = ethtool_op_get_sg, > + .set_sg = ethtool_op_set_sg, > + .get_tso = ethtool_op_get_tso, > + .set_tso = ethtool_op_set_tso, > + .get_strings = be_get_strings, > + .get_stats_count = be_get_stats_count, > + .get_ethtool_stats = be_get_ethtool_stats, > +}; > diff --git a/drivers/net/benet/be_int.c b/drivers/net/benet/be_int.c > new file mode 100644 > index 0000000..1ec2a61 > --- /dev/null > +++ b/drivers/net/benet/be_int.c > @@ -0,0 +1,843 @@ > +/* > + * Copyright (C) 2005 - 2008 ServerEngines > + * All rights reserved. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version 2 > + * as published by the Free Software Foundation. The full GNU General > + * Public License is included in this distribution in the file called COPYING. > + * > + * Contact Information: > + * linux-drivers@...verengines.com > + * > + * ServerEngines > + * 209 N. Fair Oaks Ave > + * Sunnyvale, CA 94085 > + */ > +#include <linux/pci.h> > +#include <linux/if_vlan.h> > + > +#include <linux/inet_lro.h> > + > +#include "benet.h" > + > +/* number of bytes of RX frame that are copied to skb->data */ > +#define BE_HDR_LEN 64 > + > +#ifdef CONFIG_BENET_NAPI > +#define NETIF_RX(skb) netif_receive_skb(skb) > +#define VLAN_ACCEL_RX(skb, pnob, vt) \ > + vlan_hwaccel_rx(skb, OSM_NOB(pnob)->vlan_grp, vt) > +#else > +#define NETIF_RX(skb) netif_rx(skb) > +#define VLAN_ACCEL_RX(skb, pnob, vt) \ > + vlan_hwaccel_rx(skb, OSM_NOB(pnob)->vlan_grp, vt) > +#endif > + > +/* > + * adds additional receive frags indicated by BE starting from given > + * frag index (fi) to specified skb's frag list > + */ > +static void > +add_skb_frags(struct bni_net_object *pnob, struct sk_buff *skb, > + u32 nresid, u32 fi) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + u32 sk_frag_idx, n; > + struct be_rx_page_info *rx_page_info; > + u32 frag_sz = pnob->rx_buf_size; > + > + sk_frag_idx = skb_shinfo(skb)->nr_frags; > + while (nresid) { > + index_advance(&fi, pnob->rx_q_len); > + > + rx_page_info = (struct be_rx_page_info *)pnob->rx_ctxt[fi]; > + pnob->rx_ctxt[fi] = (void *)NULL; > + if ((rx_page_info->page_offset) || > + (OSM_NOB(pnob)->rx_pg_shared == FALSE)) { > + pci_unmap_page(adapter->pdev, > + pci_unmap_addr(rx_page_info, bus), > + frag_sz, PCI_DMA_FROMDEVICE); > + } > + > + n = min(nresid, frag_sz); > + skb_shinfo(skb)->frags[sk_frag_idx].page = rx_page_info->page; > + skb_shinfo(skb)->frags[sk_frag_idx].page_offset > + = rx_page_info->page_offset; > + skb_shinfo(skb)->frags[sk_frag_idx].size = n; > + > + sk_frag_idx++; > + skb->len += n; > + skb->data_len += n; > + skb_shinfo(skb)->nr_frags++; > + nresid -= n; > + > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + atomic_dec(&pnob->rx_q_posted); > + } > +} > + > +/* > + * This function processes incoming nic packets over various Rx queues. > + * This function takes the adapter, the current Rx status descriptor > + * entry and the Rx completion queue ID as argument. > + */ > +static inline int process_nic_rx_completion(struct bni_net_object *pnob, > + struct ETH_RX_COMPL_AMAP *rxcp) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + struct sk_buff *skb; > + int udpcksm, tcpcksm; > + int n, fi; > + u32 nresid; > + u32 frag_sz = pnob->rx_buf_size; > + u8 *va; > + struct be_rx_page_info *rx_page_info; > + u32 numfrags, vtp, vtm, vlan_tag, pktsize; > + > + fi = AMAP_GET_BITS_PTR(ETH_RX_COMPL, fragndx, rxcp); > + BUG_ON(fi >= (int)pnob->rx_q_len); > + BUG_ON(fi < 0); > + > + rx_page_info = (struct be_rx_page_info *)pnob->rx_ctxt[fi]; > + BUG_ON(!rx_page_info->page); > + pnob->rx_ctxt[fi] = NULL; > + > + /* > + * If one page is used per fragment or if this is the second half of > + * of the page, unmap the page here > + */ > + if ((rx_page_info->page_offset) || > + (OSM_NOB(pnob)->rx_pg_shared == FALSE)) { > + pci_unmap_page(adapter->pdev, > + pci_unmap_addr(rx_page_info, bus), frag_sz, > + PCI_DMA_FROMDEVICE); > + } > + > + atomic_dec(&pnob->rx_q_posted); > + udpcksm = AMAP_GET_BITS_PTR(ETH_RX_COMPL, udpcksm, rxcp); > + tcpcksm = AMAP_GET_BITS_PTR(ETH_RX_COMPL, tcpcksm, rxcp); > + pktsize = AMAP_GET_BITS_PTR(ETH_RX_COMPL, pktsize, rxcp); > + /* > + * get rid of RX flush completions first. > + */ > + if ((tcpcksm) && (udpcksm) && (pktsize == 32)) { > + put_page(rx_page_info->page); > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + return 0; > + } > + skb = alloc_skb(BE_HDR_LEN + 16, GFP_ATOMIC); Use netdev_allocskb, it addes padding necessary for bridging etc. > + if (skb == NULL) { > + printk(KERN_WARNING "alloc_skb() failed\n"); > + put_page(rx_page_info->page); > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + goto free_frags; > + } > + skb_reserve(skb, NET_IP_ALIGN); > + > + skb->dev = OSM_NOB(pnob)->netdev; > + > + n = min(pktsize, frag_sz); > + > + va = page_address(rx_page_info->page) + rx_page_info->page_offset; > + prefetch(va); > + > + skb->len = skb->data_len = n; > + if (n <= BE_HDR_LEN) { > + memcpy(skb->data, va, n); > + put_page(rx_page_info->page); > + skb->data_len -= n; > + skb->tail += n; > + } else { > + > + /* Setup the SKB with page buffer information */ > + skb_shinfo(skb)->frags[0].page = rx_page_info->page; > + skb_shinfo(skb)->nr_frags++; > + > + /* Copy the header into the skb_data */ > + memcpy(skb->data, va, BE_HDR_LEN); > + skb_shinfo(skb)->frags[0].page_offset = > + rx_page_info->page_offset + BE_HDR_LEN; > + skb_shinfo(skb)->frags[0].size = n - BE_HDR_LEN; > + skb->data_len -= BE_HDR_LEN; > + skb->tail += BE_HDR_LEN; > + } > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + nresid = pktsize - n; > + > + skb->protocol = eth_type_trans(skb, OSM_NOB(pnob)->netdev); > + > + if ((tcpcksm || udpcksm) && adapter->rx_csum) > + skb->ip_summed = CHECKSUM_UNNECESSARY; > + else > + skb->ip_summed = CHECKSUM_NONE; > + /* > + * if we have more bytes left, the frame has been > + * given to us in multiple fragments. This happens > + * with Jumbo frames. Add the remaining fragments to > + * skb->frags[] array. > + */ > + if (nresid) > + add_skb_frags(pnob, skb, nresid, fi); > + > + /* update the the true size of the skb. */ > + skb->truesize = skb->len + sizeof(struct sk_buff); > + > + /* > + * If a 802.3 frame or 802.2 LLC frame > + * (i.e) contains length field in MAC Hdr > + * and frame len is greater than 64 bytes > + */ > + if (((skb->protocol == ntohs(ETH_P_802_2)) || > + (skb->protocol == ntohs(ETH_P_802_3))) > + && (pktsize > BE_HDR_LEN)) { > + /* > + * If the length given in Mac Hdr is less than frame size > + * Erraneous frame, Drop it > + */ > + if ((ntohs(*(u16 *) (va + 12)) + ETH_HLEN) < pktsize) { > + /* Increment Non Ether type II frames dropped */ > + adapter->be_stat.bes_802_3_dropped_frames++; > + > + kfree_skb(skb); > + return 0; > + } > + /* > + * else if the length given in Mac Hdr is greater than > + * frame size, should not be seeing this sort of frames > + * dump the pkt and pass to stack > + */ > + else if ((ntohs(*(u16 *) (va + 12)) + ETH_HLEN) > pktsize) { > + /* Increment Non Ether type II frames malformed */ > + adapter->be_stat.bes_802_3_malformed_frames++; > + } > + } > + > + vtp = AMAP_GET_BITS_PTR(ETH_RX_COMPL, vtp, rxcp); > + vtm = AMAP_GET_BITS_PTR(ETH_RX_COMPL, vtm, rxcp); > + if (vtp && vtm) { > + /* Vlan tag present in pkt and BE found > + * that the tag matched an entry in VLAN table > + */ > + if (!(OSM_NOB(pnob)->vlan_grp) || > + OSM_NOB(pnob)->num_vlans == 0) { > + /* But we have no VLANs configured. > + * This should never happen. Drop the packet. > + */ > + printk(KERN_ERR > + "BladeEngine: Unexpected vlan tagged packet\n"); > + kfree_skb(skb); > + return 0; > + } > + /* pass the VLAN packet to stack */ > + vlan_tag = AMAP_GET_BITS_PTR(ETH_RX_COMPL, vlan_tag, rxcp); > + VLAN_ACCEL_RX(skb, pnob, be16_to_cpu(vlan_tag)); > + > + } else { > + NETIF_RX(skb); > + } > + > + return 0; > +free_frags: > + /* free all frags associated with the current rxcp */ > + numfrags = AMAP_GET_BITS_PTR(ETH_RX_COMPL, numfrags, rxcp); > + while (numfrags-- > 1) { > + index_advance(&fi, pnob->rx_q_len); > + > + rx_page_info = (struct be_rx_page_info *) > + pnob->rx_ctxt[fi]; > + pnob->rx_ctxt[fi] = (void *)NULL; > + if ((rx_page_info->page_offset) || > + (OSM_NOB(pnob)->rx_pg_shared == FALSE)) { > + pci_unmap_page(adapter->pdev, > + pci_unmap_addr(rx_page_info, bus), > + frag_sz, PCI_DMA_FROMDEVICE); > + } > + > + put_page(rx_page_info->page); > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + atomic_dec(&pnob->rx_q_posted); > + } > + return -ENOMEM; > +} > + > +static void process_nic_rx_completion_lro(struct bni_net_object *pnob, > + struct ETH_RX_COMPL_AMAP *rxcp) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + struct skb_frag_struct rx_frags[BE_MAX_FRAGS_PER_FRAME]; > + unsigned int udpcksm, tcpcksm; > + u32 numfrags, vlanf, vtm, vlan_tag, nresid; > + u16 vlant; > + unsigned int fi, idx, n; > + struct be_rx_page_info *rx_page_info; > + u32 frag_sz = pnob->rx_buf_size, pktsize; > + bool rx_coal = (adapter->max_rx_coal <= 1) ? 0 : 1; > + u8 err, *va; > + __wsum csum = 0; > + > + if (AMAP_GET_BITS_PTR(ETH_RX_COMPL, ipsec, rxcp)) { > + /* Drop the pkt and move to the next completion. */ > + adapter->be_stat.bes_rx_misc_pkts++; > + return; > + } > + err = AMAP_GET_BITS_PTR(ETH_RX_COMPL, err, rxcp); > + if (err || !rx_coal) { > + /* We won't coalesce Rx pkts if the err bit set. > + * take the path of normal completion processing */ > + process_nic_rx_completion(pnob, rxcp); > + return; > + } > + > + fi = AMAP_GET_BITS_PTR(ETH_RX_COMPL, fragndx, rxcp); > + BUG_ON(fi >= (int)pnob->rx_q_len); > + BUG_ON(fi < 0); > + rx_page_info = (struct be_rx_page_info *)pnob->rx_ctxt[fi]; > + BUG_ON(!rx_page_info->page); > + pnob->rx_ctxt[fi] = (void *)NULL; > + /* If one page is used per fragment or if this is the > + * second half of the page, unmap the page here > + */ > + if ((rx_page_info->page_offset) || > + (OSM_NOB(pnob)->rx_pg_shared == FALSE)) { > + pci_unmap_page(adapter->pdev, > + pci_unmap_addr(rx_page_info, bus), > + frag_sz, PCI_DMA_FROMDEVICE); > + } > + > + numfrags = AMAP_GET_BITS_PTR(ETH_RX_COMPL, numfrags, rxcp); > + udpcksm = AMAP_GET_BITS_PTR(ETH_RX_COMPL, udpcksm, rxcp); > + tcpcksm = AMAP_GET_BITS_PTR(ETH_RX_COMPL, tcpcksm, rxcp); > + vlan_tag = AMAP_GET_BITS_PTR(ETH_RX_COMPL, vlan_tag, rxcp); > + vlant = be16_to_cpu(vlan_tag); > + vlanf = AMAP_GET_BITS_PTR(ETH_RX_COMPL, vtp, rxcp); > + vtm = AMAP_GET_BITS_PTR(ETH_RX_COMPL, vtm, rxcp); > + pktsize = AMAP_GET_BITS_PTR(ETH_RX_COMPL, pktsize, rxcp); > + > + atomic_dec(&pnob->rx_q_posted); > + > + if (tcpcksm && udpcksm && pktsize == 32) { > + /* flush completion entries */ > + put_page(rx_page_info->page); > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + return; > + } > + /* Only one of udpcksum and tcpcksum can be set */ > + BUG_ON(udpcksm && tcpcksm); > + > + /* jumbo frames could come in multiple fragments */ > + BUG_ON(numfrags != ((pktsize + (frag_sz - 1)) / frag_sz)); > + n = min(pktsize, frag_sz); > + nresid = pktsize - n; /* will be useful for jumbo pkts */ > + idx = 0; > + > + va = page_address(rx_page_info->page) + rx_page_info->page_offset; > + prefetch(va); > + rx_frags[idx].page = rx_page_info->page; > + rx_frags[idx].page_offset = (rx_page_info->page_offset); > + rx_frags[idx].size = n; > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + > + /* If we got multiple fragments, we have more data. */ > + while (nresid) { > + idx++; > + index_advance(&fi, pnob->rx_q_len); > + > + rx_page_info = (struct be_rx_page_info *)pnob->rx_ctxt[fi]; > + pnob->rx_ctxt[fi] = (void *)NULL; > + if ((rx_page_info->page_offset) || > + (OSM_NOB(pnob)->rx_pg_shared == FALSE)) { > + pci_unmap_page(adapter->pdev, > + pci_unmap_addr(rx_page_info, bus), > + frag_sz, PCI_DMA_FROMDEVICE); > + } > + > + n = min(nresid, frag_sz); > + rx_frags[idx].page = rx_page_info->page; > + rx_frags[idx].page_offset = (rx_page_info->page_offset); > + rx_frags[idx].size = n; > + > + nresid -= n; > + memset(rx_page_info, 0, sizeof(struct be_rx_page_info)); > + atomic_dec(&pnob->rx_q_posted); > + } > + > + if (likely(!(vlanf && vtm))) { > + lro_receive_frags(&OSM_NOB(pnob)->lro_mgr, rx_frags, > + pktsize, pktsize, > + (void *)(unsigned long)csum, csum); > + } else { > + /* Vlan tag present in pkt and BE found > + * that the tag matched an entry in VLAN table > + */ > + if (unlikely(!(OSM_NOB(pnob)->vlan_grp) || > + OSM_NOB(pnob)->num_vlans == 0)) { > + /* But we have no VLANs configured. > + * This should never happen. Drop the packet. > + */ > + printk(KERN_ERR "BladeEngine: Unexpected" > + " vlan tagged packet\n"); > + return; > + } > + /* pass the VLAN packet to stack */ > + lro_vlan_hwaccel_receive_frags(&OSM_NOB(pnob)->lro_mgr, > + rx_frags, pktsize, pktsize, > + OSM_NOB(pnob)->vlan_grp, vlant, > + (void *)(unsigned long)csum, > + csum); > + } > + > + adapter->be_stat.bes_rx_coal++; > +} > + > +static void process_ucast_rx_completion(struct bni_net_object *pnob) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + struct ETH_RX_COMPL_AMAP *rxcp; > + u32 nc = 0; > + unsigned int pktsize; > + int rearm = 1; > + > +#ifdef CONFIG_BENET_NAPI > + if (OSM_NOB(pnob)->work_quota == 0) > + /* > + * We were called from process_events without quota > + * because the device is not open yet. Give ourselves > + * a large quota. > + */ > + OSM_NOB(pnob)->work_quota = 128; > + while ((OSM_NOB(pnob)->work_quota) && (rxcp = bni_get_ucrx_cmpl(pnob))) > +#else > + while ((rxcp = bni_get_ucrx_cmpl(pnob))) > +#endif > + { > + prefetch(rxcp); > + pktsize = AMAP_GET_BITS_PTR(ETH_RX_COMPL, pktsize, rxcp); > + process_nic_rx_completion_lro(pnob, rxcp); > + adapter->eth_rx_bytes += pktsize; > + > + /* RX rate calculation. */ > + update_rx_rate(adapter); > + nc++; /* number of cq entries that we have processed */ > + adapter->be_stat.bes_ucrx_compl++; > +#ifdef CONFIG_BENET_NAPI > + OSM_NOB(pnob)->work_quota--; > +#endif > + } > + if (likely(adapter->max_rx_coal > 1)) { > + adapter->be_stat.bes_rx_flush++; > + lro_flush_all(&OSM_NOB(pnob)->lro_mgr); > + } > + > +#ifdef CONFIG_BENET_NAPI > + if (OSM_NOB(pnob)->work_quota == 0) { > + /* we ran out of work budget */ > + rearm = 0; > + } else { > + /* we finished all work. We are in interrupt mode */ > + rearm = 1; > + } > +#endif > + /* > + * we call notfiy completions even when nc is zero, since > + * rearm value needs to take effect > + */ > + bni_notify_cmpl(pnob, nc, pnob->ucrx_cq_id, rearm); > +} > + > +/* > + * Process broadcast and multicat completions > + */ > +static void process_bcast_rx_completion(struct bni_net_object *pnob) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + struct ETH_RX_COMPL_AMAP *rxcp; > + > + u32 nc = 0; > + > + adapter->be_stat.bes_bcrx_events++; > + > + while ((rxcp = (bni_get_bcrx_cmpl(pnob)))) { > + process_nic_rx_completion(pnob, rxcp); > + nc++; > + adapter->be_stat.bes_bcrx_compl++; > + } > + bni_notify_cmpl(pnob, nc, pnob->bcrx_cq_id, 1); > + > +} > + > +/* Process NIC TX COMPLETIONS */ > +static void process_nic_tx_completions(struct bni_net_object *pnob) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + struct ETH_TX_COMPL_AMAP *txcp; /* Eth Tx completion entry */ > + struct net_device *netdev = (struct net_device *) > + OSM_NOB(pnob)->netdev; > + int num_processed = 0, cur_index, tx_wrbs_completed = 0, exp_index; > + struct sk_buff *skb; > + u64 busaddr, pa, pa_lo, pa_hi; > + struct ETH_WRB_AMAP *curr_wrb; > + u32 frag_len, wrb_index; > + > + adapter->be_stat.bes_tx_events++; > + /* > + * there is no need to take an SMP lock here since currently > + * we have only one instance of the tasklet that does completion > + * processing. > + */ > + > + /* process each valid completion entry */ > + while ((txcp = bni_get_tx_cmpl(pnob))) { > + /* Get the expected completion index */ > + exp_index = (pnob->tx_q_tl + > + ((int)pnob->tx_ctxt[pnob->tx_q_tl] - 1)) > + & (pnob->tx_q_len - 1); > + pnob->tx_ctxt[pnob->tx_q_tl] = NULL; > + wrb_index = AMAP_GET_BITS_PTR(ETH_TX_COMPL, wrb_index, txcp); > + if (exp_index != wrb_index) { > + printk(KERN_ERR "Expected Wrb Index (=%d) does not" > + "match with completion Wrb Index (=%d)\n", > + exp_index, wrb_index); > + } > + /* > + * All reqs in the TX ring from the current tail index upto > + * the one indicated in this completion entry's wrb_index > + * are now completed. > + */ > + do { > + cur_index = pnob->tx_q_tl; > + > + curr_wrb = &pnob->tx_q[cur_index]; > + pa_hi = AMAP_GET_BITS_PTR(ETH_WRB, frag_pa_hi, > + curr_wrb); > + pa_lo = AMAP_GET_BITS_PTR(ETH_WRB, frag_pa_lo, > + curr_wrb); > + frag_len = AMAP_GET_BITS_PTR(ETH_WRB, frag_len, > + curr_wrb); > + busaddr = (pa_hi << 32) | pa_lo; > + if (busaddr != 0) { > + pa = le64_to_cpu(busaddr); > + pci_unmap_single(adapter->pdev, pa, > + frag_len, PCI_DMA_TODEVICE); > + } > + /* > + * this Tx request is complete. The OSM context > + * we stored is the skb address. free this skb. > + */ > + skb = (struct sk_buff *)pnob->tx_ctxt[cur_index]; > + if (skb) { > + unsigned int j; > + > + for (j = 0; j < skb_shinfo(skb)->nr_frags; > + j++) { > + struct skb_frag_struct *frag; > + frag = &skb_shinfo(skb)->frags[j]; > + pci_unmap_page(adapter->pdev, > + (ulong) frag->page, > + frag->size, > + PCI_DMA_TODEVICE); > + } > + kfree_skb(skb); > + pnob->tx_ctxt[cur_index] = NULL; > + } > + > + tx_wrbs_completed++; > + bni_adv_txq_tl(pnob); > + } while (cur_index != wrb_index); > + > + num_processed++; > + adapter->be_stat.bes_tx_compl++; > + } > + atomic_sub(tx_wrbs_completed, &pnob->tx_q_used); > + bni_notify_cmpl(pnob, num_processed, pnob->tx_cq_id, 1); > + /* > + * We got Tx completions and have usable WRBs. > + * If the netdev's queue has been stopped > + * because we had run out of WRBs, wake it now. > + */ > + spin_lock(&adapter->txq_lock); > + if (netif_queue_stopped(netdev) > + && atomic_read(&pnob->tx_q_used) < pnob->tx_q_len / 2) { > + netif_wake_queue(netdev); > + } > + spin_unlock(&adapter->txq_lock); > +} > + > +/* > + * posts receive buffers to the Eth receive queue. > + */ > +void be_post_eth_rx_buffs(struct bni_net_object *pnob) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + u32 num_bufs, r; > + u64 busaddr = 0, tmp_pa; > + u32 max_bufs, pg_hd; > + u32 frag_size; > + struct bni_recv_buffer *rxbp; > + struct list_head rxbl; > + struct be_rx_page_info *rx_page_info; > + struct page *page = NULL; > + u32 page_order = 0; > + gfp_t alloc_flags = GFP_ATOMIC; > + > + BUG_ON(!adapter); > + > + max_bufs = 64; /* should be even # <= 255. */ > + > + frag_size = pnob->rx_buf_size; > + page_order = get_order(frag_size); > + > + if (frag_size == 8192) > + alloc_flags |= (gfp_t) __GFP_COMP; > + /* > + * Form a linked list of RECV_BUFFFER structure to be be posted. > + * We will post even number of buffer so that pages can be > + * shared. > + */ > + INIT_LIST_HEAD(&rxbl); > + > + for (num_bufs = 0; num_bufs < max_bufs; ++num_bufs) { > + > + rxbp = &(OSM_NOB(pnob)->eth_rx_bufs[num_bufs]); > + pg_hd = OSM_NOB(pnob)->rx_pg_info_hd; > + rx_page_info = &OSM_NOB(pnob)->rx_page_info[pg_hd]; > + > + if (!page) { > + /* > + * before we allocate a page make sure that we > + * have space in the RX queue to post the buffer. > + * We check for two vacant slots since with > + * 2K frags, we will need two slots. > + */ > + if ((pnob->rx_ctxt[(pnob->rx_q_hd + num_bufs) & > + (pnob->rx_q_len - 1)] != NULL) > + || (pnob->rx_ctxt[(pnob->rx_q_hd + num_bufs + 1) % > + pnob->rx_q_len] != NULL)) { > + break; > + } > + page = alloc_pages(alloc_flags, page_order); > + if (unlikely(page == NULL)) { > + adapter->be_stat.bes_ethrx_post_fail++; > + OSM_NOB(pnob)->rxbuf_post_fail++; > + break; > + } > + OSM_NOB(pnob)->rxbuf_post_fail = 0; > + busaddr = pci_map_page(adapter->pdev, page, 0, > + frag_size, PCI_DMA_FROMDEVICE); > + rx_page_info->page_offset = 0; > + rx_page_info->page = page; > + /* > + * If we are sharing a page among two skbs, > + * alloc a new one on the next iteration > + */ > + if (OSM_NOB(pnob)->rx_pg_shared == FALSE) > + page = NULL; > + } else { > + get_page(page); > + rx_page_info->page_offset += frag_size; > + rx_page_info->page = page; > + /* > + * We are finished with the alloced page, > + * Alloc a new one on the next iteration > + */ > + page = NULL; > + } > + rxbp->rxb_ctxt = (void *)rx_page_info; > + index_advance(&OSM_NOB(pnob)->rx_pg_info_hd, pnob->rx_q_len); > + > + pci_unmap_addr_set(rx_page_info, bus, busaddr); > + tmp_pa = busaddr + rx_page_info->page_offset; > + rxbp->rxb_pa_lo = (tmp_pa & 0xFFFFFFFF); > + rxbp->rxb_pa_hi = (tmp_pa >> 32); > + rxbp->rxb_len = frag_size; > + list_add_tail(&rxbp->rxb_list, &rxbl); > + } /* End of for */ > + > + r = bni_post_rx_buffs(pnob, &rxbl); > + BUG_ON(r != num_bufs); > + return; > +} > + > +/* > + * Interrupt service for network function. We just schedule the > + * tasklet which does all completion processing. > + */ > +irqreturn_t be_int(int irq, void *dev) > +{ > + struct net_device *netdev = dev; > + struct bni_net_object *pnob = (struct bni_net_object *)(netdev->priv); > + struct be_adapter *adapter = (struct be_adapter *) > + OSM_NOB(pnob)->adapter; > + u32 isr; > + > + /* > + * If not our interrupt, just return. > + */ > + isr = bni_get_isr(pnob); > + if (unlikely(!isr)) > + return 0; > + > + spin_lock(&adapter->int_lock); > + adapter->isr |= isr; > + spin_unlock(&adapter->int_lock); > + > + adapter->be_stat.bes_ints++; > + > + tasklet_schedule(&adapter->sts_handler); > + return 1; > +} > + > +#ifdef CONFIG_BENET_NAPI > +/* > + * Poll function called by NAPI with a work budget. > + * We process as many UC. BC and MC receive completions > + * as the budget allows and return the actual number of > + * RX ststutses processed. > + */ > +int be_poll(struct napi_struct *napi, int budget) > +{ > + struct net_device *netdev = napi->dev; > + struct bni_net_object *pnob = (struct bni_net_object *)netdev->priv; > + struct be_adapter *adapter = (struct be_adapter *) > + OSM_NOB(pnob)->adapter; > + u32 work_done; > + > + adapter->be_stat.bes_polls++; > + OSM_NOB(pnob)->work_quota = budget; > + process_ucast_rx_completion(pnob); > + process_bcast_rx_completion(pnob); > + if (atomic_read(&pnob->rx_q_posted) < 900) > + be_post_eth_rx_buffs(pnob); > + > + work_done = (budget - OSM_NOB(pnob)->work_quota); > + > + if (OSM_NOB(pnob)->work_quota) { > + netif_rx_complete(netdev, napi); > + > + /* If another rx was attempted while we were in poll, > + * schedule again */ > + spin_lock_bh(&OSM_NOB(pnob)->rx_lock); > + if (OSM_NOB(pnob)->rx_sched) { > + OSM_NOB(pnob)->rx_sched = FALSE; > + if (netif_rx_schedule_prep(netdev, napi)) > + __netif_rx_schedule(netdev, napi); > + } > + spin_unlock_bh(&OSM_NOB(pnob)->rx_lock); > + } > + return work_done; > +} > + > +static inline void napi_rx_schedule(struct bni_net_object *no, > + struct net_device *nd) > +{ > + spin_lock_bh(&OSM_NOB(no)->rx_lock); > + if (netif_rx_schedule_prep(nd, &OSM_NOB(no)->napi)) { > + __netif_rx_schedule(nd, &OSM_NOB(no)->napi); > + OSM_NOB(no)->rx_sched = FALSE; > + } else { > + OSM_NOB(no)->rx_sched = TRUE; > + } > + spin_unlock_bh(&OSM_NOB(no)->rx_lock); > +} > +#endif If you didn't keep rx_sched, you would not need the additional lock round trip. > + > +/* > + * Processes all valid events in the event ring associated with given > + * NetObject. Also, notifies BE the number of events processed. > + */ > +static inline u32 process_events(struct bni_net_object *pnob) > +{ > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + struct EQ_ENTRY_AMAP *eqp; > + u32 rid, num_events = 0; > + > +#ifdef CONFIG_BENET_NAPI > + struct net_device *netdev = OSM_NOB(pnob)->netdev; > +#endif > + > + while ((eqp = bni_get_event(pnob)) != NULL) { > + adapter->be_stat.bes_events++; > + rid = AMAP_GET_BITS_PTR(EQ_ENTRY, ResourceID, eqp); > + > + if (rid == pnob->ucrx_cq_id) { > + adapter->be_stat.bes_ucrx_events++; > +#ifdef CONFIG_BENET_NAPI > + if (adapter->dev_state == BE_DEV_STATE_OPEN) > + napi_rx_schedule(pnob, netdev); > + else > +#endif > + process_ucast_rx_completion(pnob); > + } else if (rid == pnob->bcrx_cq_id) { > + adapter->be_stat.bes_bcrx_events++; > +#ifdef CONFIG_BENET_NAPI > + if (adapter->dev_state == BE_DEV_STATE_OPEN) > + napi_rx_schedule(pnob, netdev); > + else > +#endif > + process_bcast_rx_completion(pnob); > + } else if (rid == pnob->tx_cq_id) { > + process_nic_tx_completions(pnob); > + } else if (rid == pnob->mcc_cq_id) { > + bni_process_mcc_cmpl(&pnob->mcc_q_obj); > + } else { > + printk("Invalid EQ ResourceID %d\n", rid); > + } > + AMAP_SET_BITS_PTR(EQ_ENTRY, Valid, eqp, 0); > + AMAP_SET_BITS_PTR(EQ_ENTRY, ResourceID, eqp, 0); > + num_events++; > + } > + return (num_events); > +} > + > +/* > + * Called from the tasklet scheduled by ISR. All real interrupt processing > + * is done here. > + */ > +void be_process_intr(unsigned long context) > +{ > + struct be_adapter *adapter = (struct be_adapter *)context; > + struct bni_net_object *pnob; > + u32 isr, n; > + ulong flags = 0; > + > + isr = adapter->isr; > + > + /* > + * we create only one NIC event queue in Linux. Event is > + * expected only in the first event queue > + */ > + BUG_ON(isr & 0xfffffffe); > + if ((isr & 1) == 0) > + return; /* not our interrupt */ > + pnob = adapter->net_obj; > + n = process_events(pnob); > + /* > + * Clear the event bit. adapter->isr is set by > + * hard interrupt. Prevent race with lock. > + */ > + spin_lock_irqsave(&adapter->int_lock, flags); > + adapter->isr &= ~1; > + spin_unlock_irqrestore(&adapter->int_lock, flags); > + bni_notify_event(pnob, n, 1); > + > +#ifdef CONFIG_BENET_NAPI > + /* > + * In NAPI, posting of rx bufs is normally done > + * in poll. However, if the device is not open > + * or if previous allocation attempts had failed and > + * BE has used up all posted buffers, we need to > + * post here, since be_poll may never be called. > + */ > + if ((adapter->dev_state != BE_DEV_STATE_OPEN && > + atomic_read(&pnob->rx_q_posted) < 900) || > + (OSM_NOB(pnob)->rxbuf_post_fail && > + atomic_read(&pnob->rx_q_posted) == 0)) { > + be_post_eth_rx_buffs(pnob); > + } > +#else > + if (atomic_read(&pnob->rx_q_posted) < 900) > + be_post_eth_rx_buffs(pnob); > +#endif > + update_eqd(adapter, pnob); > + return; > +} > diff --git a/drivers/net/benet/be_netif.c b/drivers/net/benet/be_netif.c > new file mode 100644 > index 0000000..dbd6895 > --- /dev/null > +++ b/drivers/net/benet/be_netif.c > @@ -0,0 +1,693 @@ > +/* > + * Copyright (C) 2005 - 2008 ServerEngines > + * All rights reserved. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version 2 > + * as published by the Free Software Foundation. The full GNU General > + * Public License is included in this distribution in the file called COPYING. > + * > + * Contact Information: > + * linux-drivers@...verengines.com > + * > + * ServerEngines > + * 209 N. Fair Oaks Ave > + * Sunnyvale, CA 94085 > + */ > +/* > + * be_netif.c > + * > + * This file contains various entry points of drivers seen by tcp/ip stack. > + */ > + > +#include <linux/pci.h> > +#include <linux/if_vlan.h> > +#include <linux/in.h> > +#include "benet.h" > +#include <linux/ip.h> > +#include <linux/inet_lro.h> > + > +/* Strings to print Link properties */ > +static char *link_speed[] = { static const char *link_speed[] > + "Invalid link Speed Value", > + "10 Mbps", > + "100 Mbps", > + "1 Gbps", > + "10 Gbps" > +}; > + > +static char *link_duplex[] = { > + "Invalid Duplex Value", > + "Half Duplex", > + "Full Duplex" > +}; > + > +static char *link_state[] = { > + "", > + "(active)" > +}; > + > + > +void be_print_link_info(struct BE_LINK_STATUS *lnk_status) > +{ > + u16 si, di, ai; > + > + /* Port 0 */ > + if (lnk_status->mac0_speed && lnk_status->mac0_duplex) { > + /* Port is up and running */ > + si = (lnk_status->mac0_speed < 5) ? > + lnk_status->mac0_speed : 0; > + di = (lnk_status->mac0_duplex < 3) ? > + lnk_status->mac0_duplex : 0; > + ai = (lnk_status->active_port == 0) ? 1 : 0; > + printk(KERN_INFO "PortNo. 0: Speed - %s %s %s\n", > + link_speed[si], link_duplex[di], link_state[ai]); > + } else > + printk(KERN_INFO "PortNo. 0: Down\n"); > + > + /* Port 1 */ > + if (lnk_status->mac1_speed && lnk_status->mac1_duplex) { > + /* Port is up and running */ > + si = (lnk_status->mac1_speed < 5) ? > + lnk_status->mac1_speed : 0; > + di = (lnk_status->mac1_duplex < 3) ? > + lnk_status->mac1_duplex : 0; > + ai = (lnk_status->active_port == 0) ? 1 : 0; > + printk(KERN_INFO "PortNo. 1: Speed - %s %s %s\n", > + link_speed[si], link_duplex[di], link_state[ai]); > + } else > + printk(KERN_INFO "PortNo. 1: Down\n"); > + > + return; > +} > + > +static int > +be_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr, > + void **ip_hdr, void **tcpudp_hdr, > + u64 *hdr_flags, void *priv) > +{ > + struct ethhdr *eh; > + struct vlan_ethhdr *veh; > + struct iphdr *iph; > + u8 *va = page_address(frag->page) + frag->page_offset; > + unsigned long ll_hlen; > + > + /* find the mac header, abort if not IPv4 */ > + > + prefetch(va); > + eh = (struct ethhdr *)va; > + *mac_hdr = eh; > + ll_hlen = ETH_HLEN; > + if (eh->h_proto != htons(ETH_P_IP)) { > + if (eh->h_proto == htons(ETH_P_8021Q)) { > + veh = (struct vlan_ethhdr *)va; > + if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) > + return -1; > + > + ll_hlen += VLAN_HLEN; > + > + } else { > + return -1; > + } > + } > + *hdr_flags = LRO_IPV4; > + > + iph = (struct iphdr *)(va + ll_hlen); > + *ip_hdr = iph; > + if (iph->protocol != IPPROTO_TCP) > + return -1; > + *hdr_flags |= LRO_TCP; > + *tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2); > + > + return 0; > +} > + > +static int benet_open(struct net_device *netdev) > +{ > + struct bni_net_object *pnob = (struct bni_net_object *) netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + struct net_lro_mgr *lro_mgr; > + > + if (adapter->dev_state < BE_DEV_STATE_INIT) > + return -EAGAIN; > + > + lro_mgr = &OSM_NOB(pnob)->lro_mgr; > + lro_mgr->dev = netdev; > + > +#ifdef CONFIG_BENET_NAPI > + lro_mgr->features = LRO_F_NAPI; > +#endif > + lro_mgr->ip_summed = CHECKSUM_UNNECESSARY; > + lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; > + lro_mgr->max_desc = BE_MAX_LRO_DESCRIPTORS; > + lro_mgr->lro_arr = OSM_NOB(pnob)->lro_desc; > + lro_mgr->get_frag_header = be_get_frag_header; > + lro_mgr->max_aggr = adapter->max_rx_coal; > + lro_mgr->frag_align_pad = 2; > + if (lro_mgr->max_aggr > MAX_SKB_FRAGS) > + lro_mgr->max_aggr = MAX_SKB_FRAGS; > + > + be_update_link_status(adapter); > + > + /* > + * Set carrier on only if Physical Link up > + * Either of the port link status up signifies this > + */ > + if ((adapter->port0_link_sts == BE_PORT_LINK_UP) || > + (adapter->port1_link_sts == BE_PORT_LINK_UP)) { > + netif_start_queue(netdev); > + netif_carrier_on(netdev); > + } > + > + bni_enable_eq_intr(pnob); > + adapter->dev_state = BE_DEV_STATE_OPEN; > + > +#ifdef CONFIG_BENET_NAPI > + napi_enable(&OSM_NOB(pnob)->napi); > +#endif > + return 0; > +} > + > +static int benet_close(struct net_device *netdev) > +{ > + struct bni_net_object *pnob = (struct bni_net_object *) netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + > + /* Stop Transmitting */ > + netif_stop_queue(netdev); > + > + synchronize_irq(netdev->irq); > + > + /* Wait until no more pending transmits */ > + be_wait_nic_tx_cmplx_cmpl(pnob); > + > + adapter->dev_state = BE_DEV_STATE_INIT; > + > + netif_carrier_off(netdev); > + > + adapter->port0_link_sts = BE_PORT_LINK_DOWN; > + adapter->port1_link_sts = BE_PORT_LINK_DOWN; > + > +#ifdef CONFIG_BENET_NAPI > + napi_disable(&OSM_NOB(pnob)->napi); > +#endif > + return 0; > +} > + > +/* > + * Setting a Mac Address for BE > + * Takes netdev and a void pointer as arguments. > + * The pointer holds the new addres to be used. > + */ > +static int benet_set_mac_addr(struct net_device *netdev, void *p) > +{ > + struct sockaddr *addr = p; > + struct bni_net_object *pnob; > + > + pnob = (struct bni_net_object *) netdev->priv; > + > + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); > + bni_set_uc_mac_adr(pnob, 0, 0, OSM_NOB(pnob)->devno, > + netdev->dev_addr, NULL, NULL); > + /* > + * Since we are doing Active-Passive failover, both > + * ports should have matching MAC addresses everytime. > + */ > + bni_set_uc_mac_adr(pnob, 1, 0, OSM_NOB(pnob)->devno, > + netdev->dev_addr, NULL, NULL); > + > + return 0; > +} > + > +void be_get_stats_timer_handler(unsigned long context) > +{ > + struct be_timer_ctxt *ctxt = (struct be_timer_ctxt *) context; > + > + if (atomic_read(&ctxt->get_stat_flag)) { > + atomic_dec(&ctxt->get_stat_flag); > + up((void *) ctxt->get_stat_sem_addr); > + } > + del_timer(&ctxt->get_stats_timer); > + return; > +} > + > +void be_get_stat_cb(void *context, BESTATUS status, > + struct MCC_WRB_AMAP *optional_wrb) > +{ > + struct be_timer_ctxt *ctxt = (struct be_timer_ctxt *) context; > + /* > + * just up the semaphore if the get_stat_flag > + * reads 1. so that the waiter can continue. > + * If it is 0, then it was handled by the timer handler. > + */ > + del_timer(&ctxt->get_stats_timer); > + if (atomic_read(&ctxt->get_stat_flag)) { > + atomic_dec(&ctxt->get_stat_flag); > + up((void *) ctxt->get_stat_sem_addr); > + } > +} > + > +struct net_device_stats *benet_get_stats(struct net_device *dev) > +{ > + struct bni_net_object *pnob = dev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + u64 pa; > + struct be_timer_ctxt *ctxt = &adapter->timer_ctxt; > + > + if (adapter->dev_state != BE_DEV_STATE_OPEN) { > + /* Return previously read stats */ > + return &(adapter->benet_stats); > + } > + /* Get Physical Addr */ > + pa = pci_map_single(adapter->pdev, adapter->eth_statsp, > + sizeof(struct FWCMD_ETH_GET_STATISTICS), > + PCI_DMA_FROMDEVICE); > + ctxt->get_stat_sem_addr = (unsigned long)&adapter->get_eth_stat_sem; > + atomic_inc(&ctxt->get_stat_flag); > + bni_get_stats(adapter->net_obj, adapter->eth_statsp, > + cpu_to_le64(pa), be_get_stat_cb, (void *) ctxt); > + ctxt->get_stats_timer.data = (unsigned long)ctxt; > + mod_timer(&ctxt->get_stats_timer, (jiffies + (HZ * 2))); > + down((void *) ctxt->get_stat_sem_addr); /* callback will unblock us */ > + > + /* Adding port0 and port1 stats. */ > + adapter->benet_stats.rx_packets = > + adapter->eth_statsp->params.response.p0recvdtotalframes + > + adapter->eth_statsp->params.response.p1recvdtotalframes; > + adapter->benet_stats.tx_packets = > + adapter->eth_statsp->params.response.p0xmitunicastframes + > + adapter->eth_statsp->params.response.p1xmitunicastframes; > + adapter->benet_stats.tx_bytes = > + adapter->eth_statsp->params.response.p0xmitbyteslsd + > + adapter->eth_statsp->params.response.p1xmitbyteslsd; > + adapter->benet_stats.rx_errors = > + adapter->eth_statsp->params.response.p0crcerrors + > + adapter->eth_statsp->params.response.p1crcerrors; > + adapter->benet_stats.rx_errors += > + adapter->eth_statsp->params.response.p0alignmentsymerrs + > + adapter->eth_statsp->params.response.p1alignmentsymerrs; > + adapter->benet_stats.rx_errors += > + adapter->eth_statsp->params.response.p0inrangelenerrors + > + adapter->eth_statsp->params.response.p1inrangelenerrors; > + adapter->benet_stats.rx_bytes = > + adapter->eth_statsp->params.response.p0recvdtotalbytesLSD + > + adapter->eth_statsp->params.response.p1recvdtotalbytesLSD; > + adapter->benet_stats.rx_crc_errors = > + adapter->eth_statsp->params.response.p0crcerrors + > + adapter->eth_statsp->params.response.p1crcerrors; > + > + adapter->benet_stats.tx_packets += > + adapter->eth_statsp->params.response.p0xmitmulticastframes + > + adapter->eth_statsp->params.response.p1xmitmulticastframes; > + adapter->benet_stats.tx_packets += > + adapter->eth_statsp->params.response.p0xmitbroadcastframes + > + adapter->eth_statsp->params.response.p1xmitbroadcastframes; > + adapter->benet_stats.tx_errors = 0; > + > + adapter->benet_stats.multicast = > + adapter->eth_statsp->params.response.p0xmitmulticastframes + > + adapter->eth_statsp->params.response.p1xmitmulticastframes; > + > + adapter->benet_stats.rx_fifo_errors = > + adapter->eth_statsp->params.response.p0rxfifooverflowdropped + > + adapter->eth_statsp->params.response.p1rxfifooverflowdropped; > + adapter->benet_stats.rx_frame_errors = > + adapter->eth_statsp->params.response.p0alignmentsymerrs + > + adapter->eth_statsp->params.response.p1alignmentsymerrs; > + adapter->benet_stats.rx_length_errors = > + adapter->eth_statsp->params.response.p0inrangelenerrors + > + adapter->eth_statsp->params.response.p1inrangelenerrors; > + adapter->benet_stats.rx_length_errors += > + adapter->eth_statsp->params.response.p0outrangeerrors + > + adapter->eth_statsp->params.response.p1outrangeerrors; > + adapter->benet_stats.rx_length_errors += > + adapter->eth_statsp->params.response.p0frametoolongerrors + > + adapter->eth_statsp->params.response.p1frametoolongerrors; > + > + pci_unmap_single(adapter->pdev, (ulong) adapter->eth_statsp, > + sizeof(struct FWCMD_ETH_GET_STATISTICS), > + PCI_DMA_FROMDEVICE); > + return &(adapter->benet_stats); > + > +} > + > +/* Transmit Function */ > +int betx_ether_frame(struct be_adapter *adapter, struct bni_net_object *pnob, > + struct sk_buff *skb, u8 proto, u8 forward, > + u16 lso_mss) > +{ > + unsigned int nfrags = 0, j, frame_size = 0; > + struct bni_tx_frag_list tx_frag_list[BE_MAX_TX_FRAG_COUNT]; > + unsigned int tx_flags; > + void *ctxtp; > + unsigned short vlant = 0; > + unsigned short tx_mss = 0; > + u64 busaddr; > + int status; > + > + tx_flags = ETHCOMPLETE; > + > + if (OSM_NOB(pnob)->vlan_grp && vlan_tx_tag_present(skb)) { > + tx_flags |= ETHVLAN; > + vlant = vlan_tx_tag_get(skb); > + } > + ctxtp = (void *)skb; > + > + if (proto == IPPROTO_TCP) > + tx_flags |= TCPCS; > + > + if (proto == IPPROTO_UDP) > + tx_flags |= UDPCS; > + > + if (forward) { > + tx_flags |= FORWARD; > + adapter->be_stat.bes_fwd_reqs++; > + } > + > + if (lso_mss) { > + tx_flags |= LSO; > + tx_mss = lso_mss; > + } > + > + adapter->be_stat.bes_tx_reqs++; > + /* populate the fragment (SG) list for this request */ > + while (skb) { > + /* > + * Check whether Fragment count goes above > + * BE_MAX_TX_FRAG_COUNT > + */ > + if ((nfrags + 1) > BE_MAX_TX_FRAG_COUNT) > + goto max_tx_frag_error; > + > + /* > + * Get required info from main fragment of skb > + * First get Quad Address > + */ > + busaddr = pci_map_single(adapter->pdev, skb->data, > + (skb->len - skb->data_len), > + PCI_DMA_TODEVICE); > + busaddr = cpu_to_le64(busaddr); > + tx_frag_list[nfrags].txb_pa_lo = (busaddr & 0xFFFFFFFF); > + tx_frag_list[nfrags].txb_pa_hi = busaddr >> 32; > + /* Next get Length */ > + tx_frag_list[nfrags].txb_len = skb->len - skb->data_len; > + frame_size += tx_frag_list[nfrags].txb_len; > + nfrags++; > + > + /* For all the data fragments in this skb */ > + for (j = 0; j < skb_shinfo(skb)->nr_frags; j++) { > + struct skb_frag_struct *frag; > + /* > + * Check whether Fragment count goes > + * above BE_MAX_TX_FRAG_COUNT > + */ > + if ((nfrags + 1) > BE_MAX_TX_FRAG_COUNT) > + goto max_tx_frag_error; > + > + /* For each fragment get required info */ > + frag = &skb_shinfo(skb)->frags[j]; > + /* First get Quad Address */ > + busaddr = pci_map_page(adapter->pdev, > + frag->page, > + frag->page_offset, > + frag->size, > + PCI_DMA_TODEVICE); > + busaddr = cpu_to_le64(busaddr); > + tx_frag_list[nfrags].txb_pa_lo = busaddr & 0xFFFFFFFF; > + tx_frag_list[nfrags].txb_pa_hi = busaddr >> 32; > + /* Next get Length */ > + tx_frag_list[nfrags].txb_len = frag->size; > + frame_size += tx_frag_list[nfrags].txb_len; > + nfrags++; > + } > + > + /* > + * If the skb shared info points to another > + * sk_buff then traverse this pointed > + * skbuff in the same way till the end of the list > + */ > + skb = skb_shinfo(skb)->frag_list; > + } > + > + spin_lock_bh(&adapter->txq_lock); > + > + /* Transmit the packet */ > + status = bni_tx_pkt(pnob, tx_frag_list, > + tx_flags, vlant, tx_mss, ctxtp, nfrags); > + if (status != BE_SUCCESS) { > + /* Tell the stack that Tx failed. */ > + netif_stop_queue((struct net_device *) > + OSM_NOB(pnob)->netdev); > + adapter->be_stat.bes_tx_fails++; > + spin_unlock_bh(&adapter->txq_lock); > + return BE_ETH_TX_ERROR; > + } > + adapter->eth_tx_bytes += frame_size; /* for rate calculation */ > + /* > + * TX rate calculation. If one second has passed since > + * last calculation update the rate now. > + */ > + update_tx_rate(adapter); > + if (nfrags & 1) > + nfrags++; > + > + adapter->be_stat.bes_tx_wrbs += nfrags; > + > + /* Ring the send doorbell */ > + bni_start_tx(pnob, nfrags); > + spin_unlock_bh(&adapter->txq_lock); > + > + return BE_SUCCESS; > + > +max_tx_frag_error: > + /* > + * This skb cannot be transmitted since it exceeds max tx frag count > + * Return with appropriate error > + */ > + printk(KERN_WARNING "%s: Exceeds Max Tx Frags\n", __func__); > + return BE_ETH_TX_ERROR; > +} > + > +/* > + * function called by the stack for transmitting an ether frame > + */ > +static int benet_xmit(struct sk_buff *skb, struct net_device *netdev) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + u8 proto; > + struct iphdr *ip; > + u16 lso_mss; > + u32 segs; > + > + lso_mss = skb_shinfo(skb)->gso_size; > + segs = skb_shinfo(skb)->gso_segs; > + /* > + * bug# 3356. > + * If a LSO request translates into a single segment, > + * it should be posted as a ethernet WRB with no LSO. > + */ > + if (segs == 1) > + lso_mss = 0; > + > + if (skb->ip_summed == CHECKSUM_PARTIAL) { > + ip = (struct iphdr *)ip_hdr(skb); > + proto = ip->protocol; > + } else { > + proto = 0; > + } > + > + if (betx_ether_frame(adapter, pnob, skb, proto, 0, lso_mss) != > + BE_SUCCESS) { > + return NETDEV_TX_BUSY; > + } > + > + netdev->trans_start = jiffies; > + return NETDEV_TX_OK; > + > +} > + > +/* > + * This is the driver entry point to change the mtu of the device > + * Returns 0 for success and errno for failure. > + */ > +static int benet_change_mtu(struct net_device *netdev, int new_mtu) > +{ > + /* > + * BE supports jumbo frame size upto 9000 bytes including the link layer > + * header. Considering the different variants of frame formats possible > + * like VLAN, SNAP/LLC, the maximum possible value for MTU is 8974 bytes > + */ > + > + if (new_mtu < (ETH_ZLEN + ETH_FCS_LEN) || (new_mtu > BE_MAX_MTU)) { > + printk(KERN_WARNING "Invalid MTU requested. " > + "Must be between %d and %d bytes\n", > + (ETH_ZLEN+ETH_FCS_LEN), BE_MAX_MTU); > + return -EINVAL; > + } > + printk(KERN_INFO "MTU changed from %d to %d\n", netdev->mtu, > + new_mtu); > + netdev->mtu = new_mtu; > + return 0; > +} > + > +/* > + * This is the driver entry point to register a vlan with the device > + */ > +static void benet_vlan_register(struct net_device *netdev, > + struct vlan_group *grp) > +{ > + struct bni_net_object *pnob = netdev->priv; > + > + bni_disable_eq_intr(pnob); > + OSM_NOB(pnob)->vlan_grp = grp; > + OSM_NOB(pnob)->num_vlans = 0; > + bni_enable_eq_intr(pnob); > +} > + > +/* > + * This is the driver entry point to add a vlan vlan_id > + * with the device netdev > + */ > +static void benet_vlan_add_vid(struct net_device *netdev, u16 vlan_id) > +{ > + struct bni_net_object *pnob = netdev->priv; > + > + if (OSM_NOB(pnob)->num_vlans == (BE_NUM_VLAN_SUPPORTED-1)) { > + /* no way to return an error */ > + printk(KERN_ERR > + "BladeEngine: Cannot configure more than %d Vlans\n", > + BE_NUM_VLAN_SUPPORTED); > + return; > + } > + /*The new vlan tag will be in the slot indicated by num_vlans. */ > + OSM_NOB(pnob)->vlan_tag[OSM_NOB(pnob)->num_vlans++] = vlan_id; > + bni_config_vlan(pnob, OSM_NOB(pnob)->vlan_tag, > + OSM_NOB(pnob)->num_vlans, NULL, NULL, 0); > +} > + > +/* > + * This is the driver entry point to remove a vlan vlan_id > + * with the device netdev > + */ > +static void benet_vlan_rem_vid(struct net_device *netdev, u16 vlan_id) > +{ > + struct bni_net_object *pnob = netdev->priv; > + > + u32 i, value; > + > + /* > + * In Blade Engine, we support 32 vlan tag filters across both ports. > + * To program a vlan tag, the RXF_RTPR_CSR register is used. > + * Each 32-bit value of RXF_RTDR_CSR can address 2 vlan tag entries. > + * The Vlan table is of depth 16. thus we support 32 tags. > + */ > + > + value = vlan_id | VLAN_VALID_BIT; > + for (i = 0; i < BE_NUM_VLAN_SUPPORTED; i++) { > + if (OSM_NOB(pnob)->vlan_tag[i] == vlan_id) > + break; > + } > + > + if (i == BE_NUM_VLAN_SUPPORTED) > + return; > + /* Now compact the vlan tag array by removing hole created. */ > + while ((i + 1) < BE_NUM_VLAN_SUPPORTED) { > + OSM_NOB(pnob)->vlan_tag[i] = OSM_NOB(pnob)->vlan_tag[i + 1]; > + i++; > + } > + if ((i + 1) == BE_NUM_VLAN_SUPPORTED) > + OSM_NOB(pnob)->vlan_tag[i] = (u16) 0x0; > + OSM_NOB(pnob)->num_vlans--; > + bni_config_vlan(pnob, OSM_NOB(pnob)->vlan_tag, > + OSM_NOB(pnob)->num_vlans, NULL, NULL, 0); > +} > + > +/* > + * This function is called to program multicast > + * address in the multicast filter of the ASIC. > + */ > +static void be_set_multicast_filter(struct net_device *netdev) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct dev_mc_list *mc_ptr; > + u8 mac_addr[32][ETH_ALEN]; > + int i; > + > + if (netdev->flags & IFF_ALLMULTI) { > + /* set BE in Multicast promiscuous */ > + bni_set_mc_filter(pnob, 0, TRUE, NULL, NULL, NULL); > + return; > + } > + > + for (mc_ptr = netdev->mc_list, i = 0; mc_ptr; > + mc_ptr = mc_ptr->next, i++) { > + memcpy(&mac_addr[i][0], mc_ptr->dmi_addr, ETH_ALEN); > + } > + /* reset the promiscuous mode also. */ > + bni_set_mc_filter(pnob, i, FALSE, &mac_addr[0][0], NULL, NULL); > + > +} > + > +/* > + * This is the driver entry point to set multicast list > + * with the device netdev. This function will be used to > + * set promiscuous mode or multicast promiscuous mode > + * or multicast mode.... > + */ > +static void benet_set_multicast_list(struct net_device *netdev) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + > + if (netdev->flags & IFF_PROMISC) { > + bni_set_promisc(adapter->net_obj); > + > + } else if (netdev->flags & IFF_ALLMULTI) { > + bni_reset_promisc(adapter->net_obj); > + be_set_multicast_filter(netdev); > + } else { > + bni_reset_promisc(adapter->net_obj); > + be_set_multicast_filter(netdev); > + } > +} > + > + > +/* > + * standard entry point functions for all Linux network interface drivers > + */ > +int benet_probe(struct net_device *netdev) > +{ > + struct bni_net_object *pnob = netdev->priv; > + struct be_adapter *adapter = OSM_NOB(pnob)->adapter; > + > + ether_setup(netdev); > + > + netdev->open = &benet_open; > + netdev->stop = &benet_close; > + netdev->hard_start_xmit = &benet_xmit; > + > + netdev->get_stats = &benet_get_stats; > + > + netdev->set_multicast_list = &benet_set_multicast_list; > + > + netdev->change_mtu = &benet_change_mtu; > + netdev->set_mac_address = &benet_set_mac_addr; > + > + netdev->vlan_rx_register = benet_vlan_register; > + netdev->vlan_rx_add_vid = benet_vlan_add_vid; > + netdev->vlan_rx_kill_vid = benet_vlan_rem_vid; > + > + netdev->features = > + NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_RX | NETIF_F_TSO | > + NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER | NETIF_F_IP_CSUM; > + > + netdev->flags |= IFF_MULTICAST; > + > + /* If device is DAC Capable, set the HIGHDMA flag for netdevice. */ > + if (adapter->dma_64bit_cap) > + netdev->features |= NETIF_F_HIGHDMA; > + > + SET_ETHTOOL_OPS(netdev, &be_ethtool_ops); > + return 0; > +} -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists