[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20110701124850.GA2313@minipsycho.brq.redhat.com>
Date: Fri, 1 Jul 2011 14:48:51 +0200
From: Jiri Pirko <jpirko@...hat.com>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, shemminger@...ux-foundation.org,
kaber@...sh.net, fubar@...ibm.com, eric.dumazet@...il.com,
nicolas.2p.debian@...il.com, andy@...yhouse.net
Subject: Re: [RFC patch net-next-2.6] net: allow multiple rx_handler
registration
btw I measured the performance using LNST and recipe added by following
commit:
http://git.fedorahosted.org/git/?p=lnst.git;a=commitdiff;h=d3293bf2d0da59a2f7956d3356f4bb0f0ea9cd33
Thu, Jun 30, 2011 at 05:16:49PM CEST, jpirko@...hat.com wrote:
>For some net topos it is necessary to have multiple "soft-net-devices"
>hooked on one netdev. For example very common is to have
>eth<->(br+vlan). Vlan is not using rh_handler (yet) but also for example
>macvlan would be useful to have hooked on same netdev as br.
>
>This patch introduces rx_handler list. size struct net_device stays
>intact. Measured performance regression on eth-br topo is ~1% (on received
>pkts generated by pktgen) and on eth-bond topo it is ~0.25%
>
>On br I think that the performance can be brought back maybe by using per-cpu
>variables to store port in rx_path (I must check this)
>
>Please comment.
>
>Signed-off-by: Jiri Pirko <jpirko@...hat.com>
>---
> drivers/net/bonding/bond_main.c | 14 ++++---
> drivers/net/bonding/bonding.h | 9 +++-
> drivers/net/macvlan.c | 35 +++++++++++-----
> include/linux/netdevice.h | 63 +++++++++++++++++++++++++---
> net/bridge/br_if.c | 5 +-
> net/bridge/br_input.c | 5 +-
> net/bridge/br_private.h | 28 ++++++++++---
> net/core/dev.c | 87 +++++++++++++++++++++++++++++++--------
> 8 files changed, 193 insertions(+), 53 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 61265f7..f18af47 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -1482,7 +1482,8 @@ static bool bond_should_deliver_exact_match(struct sk_buff *skb,
> return false;
> }
>
>-static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
>+static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb,
>+ struct rx_handler *rx_handler)
> {
> struct sk_buff *skb = *pskb;
> struct slave *slave;
>@@ -1494,7 +1495,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
>
> *pskb = skb;
>
>- slave = bond_slave_get_rcu(skb->dev);
>+ slave = bond_slave_get(rx_handler);
> bond = slave->bond;
>
> if (bond->params.arp_interval)
>@@ -1897,8 +1898,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
> if (res)
> goto err_close;
>
>- res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
>- new_slave);
>+ res = netdev_rx_handler_register(slave_dev, &new_slave->rx_handler,
>+ bond_handle_frame,
>+ RX_HANDLER_PRIO_BOND);
> if (res) {
> pr_debug("Error %d calling netdev_rx_handler_register\n", res);
> goto err_dest_symlinks;
>@@ -1988,7 +1990,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
> /* unregister rx_handler early so bond_handle_frame wouldn't be called
> * for this slave anymore.
> */
>- netdev_rx_handler_unregister(slave_dev);
>+ netdev_rx_handler_unregister(slave_dev, &slave->rx_handler);
> write_unlock_bh(&bond->lock);
> synchronize_net();
> write_lock_bh(&bond->lock);
>@@ -2189,7 +2191,7 @@ static int bond_release_all(struct net_device *bond_dev)
> /* unregister rx_handler early so bond_handle_frame wouldn't
> * be called for this slave anymore.
> */
>- netdev_rx_handler_unregister(slave_dev);
>+ netdev_rx_handler_unregister(slave_dev, &slave->rx_handler);
> synchronize_net();
>
> if (bond_is_lb(bond)) {
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 2936171..e732e16 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -172,6 +172,7 @@ struct vlan_entry {
>
> struct slave {
> struct net_device *dev; /* first - useful for panic debug */
>+ struct rx_handler rx_handler;
> struct slave *next;
> struct slave *prev;
> struct bonding *bond; /* our master */
>@@ -196,6 +197,11 @@ struct slave {
> #endif
> };
>
>+#define bond_slave_get(rx_handler) \
>+ netdev_rx_handler_get_priv(rx_handler, \
>+ struct slave, \
>+ rx_handler)
>+
> /*
> * Link pseudo-state only used internally by monitors
> */
>@@ -253,9 +259,6 @@ struct bonding {
> #endif /* CONFIG_DEBUG_FS */
> };
>
>-#define bond_slave_get_rcu(dev) \
>- ((struct slave *) rcu_dereference(dev->rx_handler_data))
>-
> /**
> * Returns NULL if the net_device does not belong to any of the bond's slaves
> *
>diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
>index cc67cbe..49ca58b 100644
>--- a/drivers/net/macvlan.c
>+++ b/drivers/net/macvlan.c
>@@ -34,19 +34,28 @@
> #define MACVLAN_HASH_SIZE (1 << BITS_PER_BYTE)
>
> struct macvlan_port {
>+ struct rx_handler rx_handler;
> struct net_device *dev;
> struct hlist_head vlan_hash[MACVLAN_HASH_SIZE];
> struct list_head vlans;
> struct rcu_head rcu;
>- bool passthru;
>+ bool passthru;
> int count;
> };
>
>+#define macvlan_port_get(rx_handler) \
>+ netdev_rx_handler_get_priv(rx_handler, \
>+ struct macvlan_port, \
>+ rx_handler)
>+
>+#define macvlan_port_get_by_dev(dev) \
>+ netdev_rx_handler_get_priv_by_prio(dev, \
>+ RX_HANDLER_PRIO_MACVLAN, \
>+ struct macvlan_port, \
>+ rx_handler)
>+
> static void macvlan_port_destroy(struct net_device *dev);
>
>-#define macvlan_port_get_rcu(dev) \
>- ((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
>-#define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
> #define macvlan_port_exists(dev) (dev->priv_flags & IFF_MACVLAN_PORT)
>
> static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
>@@ -156,7 +165,8 @@ static void macvlan_broadcast(struct sk_buff *skb,
> }
>
> /* called under rcu_read_lock() from netif_receive_skb */
>-static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
>+static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb,
>+ struct rx_handler *rx_handler)
> {
> struct macvlan_port *port;
> struct sk_buff *skb = *pskb;
>@@ -167,7 +177,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
> unsigned int len = 0;
> int ret = NET_RX_DROP;
>
>- port = macvlan_port_get_rcu(skb->dev);
>+ port = macvlan_port_get(rx_handler);
> if (is_multicast_ether_addr(eth->h_dest)) {
> src = macvlan_hash_lookup(port, eth->h_source);
> if (!src)
>@@ -617,7 +627,9 @@ static int macvlan_port_create(struct net_device *dev)
> for (i = 0; i < MACVLAN_HASH_SIZE; i++)
> INIT_HLIST_HEAD(&port->vlan_hash[i]);
>
>- err = netdev_rx_handler_register(dev, macvlan_handle_frame, port);
>+ err = netdev_rx_handler_register(dev, &port->rx_handler,
>+ macvlan_handle_frame,
>+ RX_HANDLER_PRIO_MACVLAN);
> if (err)
> kfree(port);
> else
>@@ -627,10 +639,11 @@ static int macvlan_port_create(struct net_device *dev)
>
> static void macvlan_port_destroy(struct net_device *dev)
> {
>- struct macvlan_port *port = macvlan_port_get(dev);
>+ struct macvlan_dev *vlan = netdev_priv(dev);
>+ struct macvlan_port *port = vlan->port;
>
> dev->priv_flags &= ~IFF_MACVLAN_PORT;
>- netdev_rx_handler_unregister(dev);
>+ netdev_rx_handler_unregister(dev, &port->rx_handler);
> kfree_rcu(port, rcu);
> }
>
>@@ -696,7 +709,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
> if (err < 0)
> return err;
> }
>- port = macvlan_port_get(lowerdev);
>+ port = macvlan_port_get_by_dev(lowerdev);
>
> /* Only 1 macvlan device can be created in passthru mode */
> if (port->passthru)
>@@ -818,7 +831,7 @@ static int macvlan_device_event(struct notifier_block *unused,
> if (!macvlan_port_exists(dev))
> return NOTIFY_DONE;
>
>- port = macvlan_port_get(dev);
>+ port = macvlan_port_get_by_dev(dev);
>
> switch (event) {
> case NETDEV_CHANGE:
>diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>index 011eb89..126cd07 100644
>--- a/include/linux/netdevice.h
>+++ b/include/linux/netdevice.h
>@@ -437,7 +437,51 @@ enum rx_handler_result {
> RX_HANDLER_PASS,
> };
> typedef enum rx_handler_result rx_handler_result_t;
>-typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
>+
>+struct rx_handler;
>+typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb,
>+ struct rx_handler *rx_handler);
>+
>+enum rx_handler_prio {
>+ RX_HANDLER_PRIO_BRIDGE,
>+ RX_HANDLER_PRIO_BOND,
>+ RX_HANDLER_PRIO_MACVLAN,
>+};
>+
>+/*
>+ * struct rx_handler should be embedded into
>+ * private struct used by rx_handler
>+ */
>+struct rx_handler {
>+ struct list_head list;
>+ rx_handler_func_t *callback;
>+ unsigned int prio;
>+};
>+
>+/**
>+ * netdev_rx_handler_get_priv - get containing private structure of given
>+ * receive handler
>+ * @rx_handler: receive_handler
>+ * @type: the type of the container struct this is embedded in
>+ * @member: the name of the member within the struct
>+ */
>+#define netdev_rx_handler_get_priv(rx_handler, type, member) \
>+ container_of(rx_handler, type, member)
>+
>+/**
>+ * netdev_rx_handler_get_priv_by_prio, netdev_rx_handler_get_priv_by_prio_rcu
>+ * - get containing private structure of given receive handler priority
>+ * @dev: netdevice
>+ * @type: the type of the container struct this is embedded in
>+ * @member: the name of the member within the struct
>+ */
>+#define netdev_rx_handler_get_priv_by_prio(dev, prio, type, member) \
>+ netdev_rx_handler_get_priv(netdev_rx_handler_get_by_prio(dev, prio), \
>+ type, member)
>+
>+#define netdev_rx_handler_get_priv_by_prio_rcu(dev, prio, type, member) \
>+ netdev_rx_handler_get_priv(netdev_rx_handler_get_by_prio_rcu(dev, prio),\
>+ type, member)
>
> extern void __napi_schedule(struct napi_struct *n);
>
>@@ -1238,8 +1282,7 @@ struct net_device {
> #endif
> #endif
>
>- rx_handler_func_t __rcu *rx_handler;
>- void __rcu *rx_handler_data;
>+ struct list_head rx_handler_list;
>
> struct netdev_queue __rcu *ingress_queue;
>
>@@ -2082,10 +2125,18 @@ static inline void napi_free_frags(struct napi_struct *napi)
> napi->skb = NULL;
> }
>
>+extern struct rx_handler *
>+netdev_rx_handler_get_by_prio(const struct net_device *dev,
>+ unsigned int prio);
>+extern struct rx_handler *
>+netdev_rx_handler_get_by_prio_rcu(const struct net_device *dev,
>+ unsigned int prio);
> extern int netdev_rx_handler_register(struct net_device *dev,
>- rx_handler_func_t *rx_handler,
>- void *rx_handler_data);
>-extern void netdev_rx_handler_unregister(struct net_device *dev);
>+ struct rx_handler *rx_handler,
>+ rx_handler_func_t *callback,
>+ unsigned int prio);
>+extern void netdev_rx_handler_unregister(struct net_device *dev,
>+ struct rx_handler *rx_handler);
>
> extern int dev_valid_name(const char *name);
> extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
>diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
>index 1bacca4..4ee5d78 100644
>--- a/net/bridge/br_if.c
>+++ b/net/bridge/br_if.c
>@@ -146,7 +146,7 @@ static void del_nbp(struct net_bridge_port *p)
>
> dev->priv_flags &= ~IFF_BRIDGE_PORT;
>
>- netdev_rx_handler_unregister(dev);
>+ netdev_rx_handler_unregister(dev, &p->rx_handler);
> synchronize_net();
>
> netdev_set_master(dev, NULL);
>@@ -365,7 +365,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
> if (err)
> goto err3;
>
>- err = netdev_rx_handler_register(dev, br_handle_frame, p);
>+ err = netdev_rx_handler_register(dev, &p->rx_handler, br_handle_frame,
>+ RX_HANDLER_PRIO_BRIDGE);
> if (err)
> goto err4;
>
>diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
>index f3ac1e8..5f396d8 100644
>--- a/net/bridge/br_input.c
>+++ b/net/bridge/br_input.c
>@@ -140,7 +140,8 @@ static inline int is_link_local(const unsigned char *dest)
> * Return NULL if skb is handled
> * note: already called with rcu_read_lock
> */
>-rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
>+rx_handler_result_t br_handle_frame(struct sk_buff **pskb,
>+ struct rx_handler *rx_handler)
> {
> struct net_bridge_port *p;
> struct sk_buff *skb = *pskb;
>@@ -157,7 +158,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
> if (!skb)
> return RX_HANDLER_CONSUMED;
>
>- p = br_port_get_rcu(skb->dev);
>+ p = br_port_get(rx_handler);
>
> if (unlikely(is_link_local(dest))) {
> /* Pause frames shouldn't be passed up by driver anyway */
>diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
>index 54578f2..1a1ea40 100644
>--- a/net/bridge/br_private.h
>+++ b/net/bridge/br_private.h
>@@ -108,6 +108,7 @@ struct net_bridge_mdb_htable
>
> struct net_bridge_port
> {
>+ struct rx_handler rx_handler;
> struct net_bridge *br;
> struct net_device *dev;
> struct list_head list;
>@@ -152,18 +153,32 @@ struct net_bridge_port
> #endif
> };
>
>+#define br_port_get(rx_handler) \
>+ netdev_rx_handler_get_priv(rx_handler, \
>+ struct net_bridge_port, \
>+ rx_handler)
>+
> #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
>
>-static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
>+static inline struct net_bridge_port *
>+br_port_get_rcu(const struct net_device *dev)
> {
>- struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data);
>- return br_port_exists(dev) ? port : NULL;
>+ if (unlikely(!br_port_exists(dev)))
>+ return NULL;
>+ return netdev_rx_handler_get_priv_by_prio_rcu(dev,
>+ RX_HANDLER_PRIO_BRIDGE,
>+ struct net_bridge_port,
>+ rx_handler);
> }
>
> static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
> {
>- return br_port_exists(dev) ?
>- rtnl_dereference(dev->rx_handler_data) : NULL;
>+ if (unlikely(!br_port_exists(dev)))
>+ return NULL;
>+ return netdev_rx_handler_get_priv_by_prio(dev,
>+ RX_HANDLER_PRIO_BRIDGE,
>+ struct net_bridge_port,
>+ rx_handler);
> }
>
> struct br_cpu_netstats {
>@@ -382,7 +397,8 @@ extern u32 br_features_recompute(struct net_bridge *br, u32 features);
>
> /* br_input.c */
> extern int br_handle_frame_finish(struct sk_buff *skb);
>-extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
>+extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb,
>+ struct rx_handler *rx_handler);
>
> /* br_ioctl.c */
> extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
>diff --git a/net/core/dev.c b/net/core/dev.c
>index 6b6ef14..92d9007 100644
>--- a/net/core/dev.c
>+++ b/net/core/dev.c
>@@ -3043,10 +3043,55 @@ out:
> #endif
>
> /**
>+ * netdev_rx_handler_get_by_prio - get receive handler struct by priority
>+ * @dev: net device
>+ * @prio: receive handler priority
>+ *
>+ * Find and return receive handler for given priority.
>+ *
>+ * The caller must hold the rtnl_mutex.
>+ */
>+struct rx_handler *
>+netdev_rx_handler_get_by_prio(const struct net_device *dev, unsigned int prio)
>+{
>+ struct rx_handler *rx_handler;
>+
>+ ASSERT_RTNL();
>+ list_for_each_entry(rx_handler, &dev->rx_handler_list, list)
>+ if (rx_handler->prio == prio)
>+ return rx_handler;
>+ return NULL;
>+}
>+EXPORT_SYMBOL_GPL(netdev_rx_handler_get_by_prio);
>+
>+/**
>+ * netdev_rx_handler_get_by_prio_rcu - get receive handler struct by priority
>+ * @dev: net device
>+ * @prio: receive handler priority
>+ *
>+ * RCU variant to find and return receive handler for given priority.
>+ *
>+ * The caller must hold the rcu_read_lock.
>+ */
>+struct rx_handler *
>+netdev_rx_handler_get_by_prio_rcu(const struct net_device *dev,
>+ unsigned int prio)
>+{
>+ struct rx_handler *rx_handler;
>+
>+ list_for_each_entry_rcu(rx_handler, &dev->rx_handler_list, list)
>+ if (rx_handler->prio == prio)
>+ return rx_handler;
>+ return NULL;
>+}
>+EXPORT_SYMBOL_GPL(netdev_rx_handler_get_by_prio_rcu);
>+
>+/**
> * netdev_rx_handler_register - register receive handler
> * @dev: device to register a handler for
>- * @rx_handler: receive handler to register
>- * @rx_handler_data: data pointer that is used by rx handler
>+ * @rx_handler: receive handler structure to register
>+ * @callback: receive handler callback function to register
>+ * @prio: receive handler priority
> *
> * Register a receive hander for a device. This handler will then be
> * called from __netif_receive_skb. A negative errno code is returned
>@@ -3057,17 +3102,24 @@ out:
> * For a general description of rx_handler, see enum rx_handler_result.
> */
> int netdev_rx_handler_register(struct net_device *dev,
>- rx_handler_func_t *rx_handler,
>- void *rx_handler_data)
>+ struct rx_handler *rx_handler,
>+ rx_handler_func_t *callback, unsigned int prio)
> {
>- ASSERT_RTNL();
>+ struct list_head *pos;
>
>- if (dev->rx_handler)
>+ ASSERT_RTNL();
>+ if (netdev_rx_handler_get_by_prio(dev, prio))
> return -EBUSY;
>+ list_for_each(pos, &dev->rx_handler_list) {
>+ struct rx_handler *entry;
>
>- rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
>- rcu_assign_pointer(dev->rx_handler, rx_handler);
>-
>+ entry = list_entry(pos, struct rx_handler, list);
>+ if (prio > entry->prio)
>+ break;
>+ }
>+ rx_handler->callback = callback;
>+ rx_handler->prio = prio;
>+ list_add_rcu(&rx_handler->list, pos);
> return 0;
> }
> EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
>@@ -3075,24 +3127,24 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
> /**
> * netdev_rx_handler_unregister - unregister receive handler
> * @dev: device to unregister a handler from
>+ * @prio: handler priority
> *
> * Unregister a receive hander from a device.
> *
> * The caller must hold the rtnl_mutex.
> */
>-void netdev_rx_handler_unregister(struct net_device *dev)
>+void netdev_rx_handler_unregister(struct net_device *dev,
>+ struct rx_handler *rx_handler)
> {
>-
> ASSERT_RTNL();
>- rcu_assign_pointer(dev->rx_handler, NULL);
>- rcu_assign_pointer(dev->rx_handler_data, NULL);
>+ list_del_rcu(&rx_handler->list);
> }
> EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
>
> static int __netif_receive_skb(struct sk_buff *skb)
> {
> struct packet_type *ptype, *pt_prev;
>- rx_handler_func_t *rx_handler;
>+ struct rx_handler *rx_handler;
> struct net_device *orig_dev;
> struct net_device *null_or_dev;
> bool deliver_exact = false;
>@@ -3152,13 +3204,12 @@ another_round:
> ncls:
> #endif
>
>- rx_handler = rcu_dereference(skb->dev->rx_handler);
>- if (rx_handler) {
>+ list_for_each_entry_rcu(rx_handler, &skb->dev->rx_handler_list, list) {
> if (pt_prev) {
> ret = deliver_skb(skb, pt_prev, orig_dev);
> pt_prev = NULL;
> }
>- switch (rx_handler(&skb)) {
>+ switch (rx_handler->callback(&skb, rx_handler)) {
> case RX_HANDLER_CONSUMED:
> goto out;
> case RX_HANDLER_ANOTHER:
>@@ -5870,6 +5921,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
> INIT_LIST_HEAD(&dev->napi_list);
> INIT_LIST_HEAD(&dev->unreg_list);
> INIT_LIST_HEAD(&dev->link_watch_list);
>+ INIT_LIST_HEAD(&dev->rx_handler_list);
>+
> dev->priv_flags = IFF_XMIT_DST_RELEASE;
> setup(dev);
>
>--
>1.7.5.4
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists