[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <21593.1298070371@death>
Date: Fri, 18 Feb 2011 15:06:11 -0800
From: Jay Vosburgh <fubar@...ibm.com>
To: Jiri Pirko <jpirko@...hat.com>
cc: David Miller <davem@...emloft.net>, kaber@...sh.net,
eric.dumazet@...il.com, netdev@...r.kernel.org,
shemminger@...ux-foundation.org, nicolas.2p.debian@...il.com,
andy@...yhouse.net
Subject: Re: [patch net-next-2.6 V2] net: convert bonding to use rx_handler
Jiri Pirko <jpirko@...hat.com> wrote:
>This patch converts bonding to use rx_handler. Results in cleaner
>__netif_receive_skb() with much less exceptions needed. Also bond-specific
>work is moved into bond code.
>
>Signed-off-by: Jiri Pirko <jpirko@...hat.com>
>
>v1->v2:
> using skb_iif instead of new input_dev to remember original device
>
>---
> drivers/net/bonding/bond_main.c | 75 ++++++++++++++++++++++++++-
> net/core/dev.c | 111 ++++++++-------------------------------
> 2 files changed, 97 insertions(+), 89 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 77e3c6a..a856a11 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1423,6 +1423,68 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
> bond->setup_by_slave = 1;
> }
>
>+/* On bonding slaves other than the currently active slave, suppress
>+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
>+ * ARP on active-backup slaves with arp_validate enabled.
>+ */
>+static bool bond_should_deliver_exact_match(struct sk_buff *skb,
>+ struct net_device *slave_dev,
>+ struct net_device *bond_dev)
>+{
>+ if (slave_dev->priv_flags & IFF_SLAVE_INACTIVE) {
>+ if (slave_dev->priv_flags & IFF_SLAVE_NEEDARP &&
>+ skb->protocol == __cpu_to_be16(ETH_P_ARP))
>+ return false;
>+
>+ if (bond_dev->priv_flags & IFF_MASTER_ALB &&
>+ skb->pkt_type != PACKET_BROADCAST &&
>+ skb->pkt_type != PACKET_MULTICAST)
>+ return false;
>+
>+ if (bond_dev->priv_flags & IFF_MASTER_8023AD &&
>+ skb->protocol == __cpu_to_be16(ETH_P_SLOW))
>+ return false;
Since this is all in the bonding code now, it should be possible
to do away with using priv_flags for all (or at least most) of this.
Perhaps in a follow-on patch.
>+
>+ return true;
>+ }
>+ return false;
>+}
>+
>+static struct sk_buff *bond_handle_frame(struct sk_buff *skb)
>+{
>+ struct net_device *slave_dev;
>+ struct net_device *bond_dev;
>+
>+ skb = skb_share_check(skb, GFP_ATOMIC);
>+ if (unlikely(!skb))
>+ return NULL;
>+ slave_dev = skb->dev;
>+ bond_dev = ACCESS_ONCE(slave_dev->master);
>+ if (unlikely(!bond_dev))
>+ return skb;
>+
>+ if (bond_dev->priv_flags & IFF_MASTER_ARPMON)
>+ slave_dev->last_rx = jiffies;
The last_rx field could probably move into bonding as well,
although it looks like there are a couple of drivers using last_rx for
something (more than just setting it).
>+ if (bond_should_deliver_exact_match(skb, slave_dev, bond_dev)) {
>+ skb->deliver_no_wcard = 1;
>+ return skb;
>+ }
>+
>+ skb->dev = bond_dev;
>+
>+ if (bond_dev->priv_flags & IFF_MASTER_ALB &&
>+ bond_dev->priv_flags & IFF_BRIDGE_PORT &&
>+ skb->pkt_type == PACKET_HOST) {
>+ u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
>+
>+ memcpy(dest, bond_dev->dev_addr, ETH_ALEN);
>+ }
>+
>+ netif_rx(skb);
>+ return NULL;
>+}
>+
> /* enslave device <slave> to bond device <master> */
> int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
> {
>@@ -1599,11 +1661,17 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
> pr_debug("Error %d calling netdev_set_bond_master\n", res);
> goto err_restore_mac;
> }
>+ res = netdev_rx_handler_register(slave_dev, bond_handle_frame, NULL);
>+ if (res) {
>+ pr_debug("Error %d calling netdev_rx_handler_register\n", res);
>+ goto err_unset_master;
>+ }
>+
> /* open the slave since the application closed it */
> res = dev_open(slave_dev);
> if (res) {
> pr_debug("Opening slave %s failed\n", slave_dev->name);
>- goto err_unset_master;
>+ goto err_unreg_rxhandler;
> }
>
> new_slave->dev = slave_dev;
>@@ -1811,6 +1879,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
> err_close:
> dev_close(slave_dev);
>
>+err_unreg_rxhandler:
>+ netdev_rx_handler_unregister(slave_dev);
>+
> err_unset_master:
> netdev_set_bond_master(slave_dev, NULL);
>
>@@ -1992,6 +2063,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
> netif_addr_unlock_bh(bond_dev);
> }
>
>+ netdev_rx_handler_unregister(slave_dev);
> netdev_set_bond_master(slave_dev, NULL);
>
> #ifdef CONFIG_NET_POLL_CONTROLLER
>@@ -2114,6 +2186,7 @@ static int bond_release_all(struct net_device *bond_dev)
> netif_addr_unlock_bh(bond_dev);
> }
>
>+ netdev_rx_handler_unregister(slave_dev);
> netdev_set_bond_master(slave_dev, NULL);
>
> /* close slave before restoring its mac address */
>diff --git a/net/core/dev.c b/net/core/dev.c
>index 4f69439..580cff1 100644
>--- a/net/core/dev.c
>+++ b/net/core/dev.c
>@@ -3092,63 +3092,31 @@ void netdev_rx_handler_unregister(struct net_device *dev)
> }
> EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
>
>-static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
>- struct net_device *master)
>+static void vlan_on_bond_hook(struct sk_buff *skb)
> {
>- if (skb->pkt_type == PACKET_HOST) {
>- u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
>-
>- memcpy(dest, master->dev_addr, ETH_ALEN);
>- }
>-}
>-
>-/* On bonding slaves other than the currently active slave, suppress
>- * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
>- * ARP on active-backup slaves with arp_validate enabled.
>- */
>-static int __skb_bond_should_drop(struct sk_buff *skb,
>- struct net_device *master)
>-{
>- struct net_device *dev = skb->dev;
>-
>- if (master->priv_flags & IFF_MASTER_ARPMON)
>- dev->last_rx = jiffies;
>-
>- if ((master->priv_flags & IFF_MASTER_ALB) &&
>- (master->priv_flags & IFF_BRIDGE_PORT)) {
>- /* Do address unmangle. The local destination address
>- * will be always the one master has. Provides the right
>- * functionality in a bridge.
>- */
>- skb_bond_set_mac_by_master(skb, master);
>- }
>-
>- if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
>- if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
>- skb->protocol == __cpu_to_be16(ETH_P_ARP))
>- return 0;
>-
>- if (master->priv_flags & IFF_MASTER_ALB) {
>- if (skb->pkt_type != PACKET_BROADCAST &&
>- skb->pkt_type != PACKET_MULTICAST)
>- return 0;
>- }
>- if (master->priv_flags & IFF_MASTER_8023AD &&
>- skb->protocol == __cpu_to_be16(ETH_P_SLOW))
>- return 0;
>+ /*
>+ * Make sure ARP frames received on VLAN interfaces stacked on
>+ * bonding interfaces still make their way to any base bonding
>+ * device that may have registered for a specific ptype.
>+ */
>+ if (skb->dev->priv_flags & IFF_802_1Q_VLAN &&
>+ vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING &&
>+ skb->protocol == htons(ETH_P_ARP)) {
>+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
>
>- return 1;
>+ if (!skb2)
>+ return;
>+ skb2->dev = vlan_dev_real_dev(skb->dev);
>+ netif_rx(skb2);
> }
>- return 0;
> }
>
> static int __netif_receive_skb(struct sk_buff *skb)
> {
> struct packet_type *ptype, *pt_prev;
> rx_handler_func_t *rx_handler;
>+ struct net_device *null_or_dev;
> struct net_device *orig_dev;
>- struct net_device *null_or_orig;
>- struct net_device *orig_or_bond;
> int ret = NET_RX_DROP;
> __be16 type;
>
>@@ -3164,30 +3132,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
> if (!skb->skb_iif)
> skb->skb_iif = skb->dev->ifindex;
>
>- /*
>- * bonding note: skbs received on inactive slaves should only
>- * be delivered to pkt handlers that are exact matches. Also
>- * the deliver_no_wcard flag will be set. If packet handlers
>- * are sensitive to duplicate packets these skbs will need to
>- * be dropped at the handler.
>- */
>- null_or_orig = NULL;
>- orig_dev = skb->dev;
>- if (skb->deliver_no_wcard)
>- null_or_orig = orig_dev;
>- else if (netif_is_bond_slave(orig_dev)) {
>- struct net_device *bond_master = ACCESS_ONCE(orig_dev->master);
>-
>- if (likely(bond_master)) {
>- if (__skb_bond_should_drop(skb, bond_master)) {
>- skb->deliver_no_wcard = 1;
>- /* deliver only exact match */
>- null_or_orig = orig_dev;
>- } else
>- skb->dev = bond_master;
>- }
>- }
>-
> __this_cpu_inc(softnet_data.processed);
> skb_reset_network_header(skb);
> skb_reset_transport_header(skb);
>@@ -3196,6 +3140,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
> pt_prev = NULL;
>
> rcu_read_lock();
>+ orig_dev = dev_get_by_index_rcu(dev_net(skb->dev), skb->skb_iif);
Aren't most packets going to have orig_dev == skb->dev at this
point? Can this be combined with the skb_iif test a few lines above
this in __netif_receive_skb, looking something like:
if (!skb->skb_iif) {
skb->skb_iif = skb->dev->ifindex;
orig_dev = skb->dev;
else {
orig_dev = dev_get_by_index_rcu(...);
}
Presumably moving the whole thing down inside the rcu_read_lock.
VLAN packets should come through here twice, but the first time
through is before the call to vlan_hwaccel_do_receive, so skb->dev
hasn't been set to the VLAN's dev yet.
Unless, of course, you find a place to store the orig_dev.
-J
> #ifdef CONFIG_NET_CLS_ACT
> if (skb->tc_verd & TC_NCLS) {
>@@ -3205,8 +3150,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
> #endif
>
> list_for_each_entry_rcu(ptype, &ptype_all, list) {
>- if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
>- ptype->dev == orig_dev) {
>+ if (!ptype->dev || ptype->dev == skb->dev) {
> if (pt_prev)
> ret = deliver_skb(skb, pt_prev, orig_dev);
> pt_prev = ptype;
>@@ -3220,7 +3164,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
> ncls:
> #endif
>
>- /* Handle special case of bridge or macvlan */
> rx_handler = rcu_dereference(skb->dev->rx_handler);
> if (rx_handler) {
> if (pt_prev) {
>@@ -3244,24 +3187,16 @@ ncls:
> goto out;
> }
>
>- /*
>- * Make sure frames received on VLAN interfaces stacked on
>- * bonding interfaces still make their way to any base bonding
>- * device that may have registered for a specific ptype. The
>- * handler may have to adjust skb->dev and orig_dev.
>- */
>- orig_or_bond = orig_dev;
>- if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
>- (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
>- orig_or_bond = vlan_dev_real_dev(skb->dev);
>- }
>+ vlan_on_bond_hook(skb);
>+
>+ /* deliver only exact match when indicated */
>+ null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL;
>
> type = skb->protocol;
> list_for_each_entry_rcu(ptype,
> &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
>- if (ptype->type == type && (ptype->dev == null_or_orig ||
>- ptype->dev == skb->dev || ptype->dev == orig_dev ||
>- ptype->dev == orig_or_bond)) {
>+ if (ptype->type == type &&
>+ (ptype->dev == null_or_dev || ptype->dev == skb->dev)) {
> if (pt_prev)
> ret = deliver_skb(skb, pt_prev, orig_dev);
> pt_prev = ptype;
>--
>1.7.3.4
>
---
-Jay Vosburgh, IBM Linux Technology Center, fubar@...ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists