lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1309447009-8898-1-git-send-email-jpirko@redhat.com>
Date:	Thu, 30 Jun 2011 17:16:49 +0200
From:	Jiri Pirko <jpirko@...hat.com>
To:	netdev@...r.kernel.org
Cc:	davem@...emloft.net, shemminger@...ux-foundation.org,
	kaber@...sh.net, fubar@...ibm.com, eric.dumazet@...il.com,
	nicolas.2p.debian@...il.com, andy@...yhouse.net
Subject: [RFC patch net-next-2.6] net: allow multiple rx_handler registration

For some net topos it is necessary to have multiple "soft-net-devices"
hooked on one netdev. For example very common is to have
eth<->(br+vlan). Vlan is not using rh_handler (yet) but also for example
macvlan would be useful to have hooked on same netdev as br.

This patch introduces rx_handler list. size struct net_device stays
intact. Measured performance regression on eth-br topo is ~1% (on received
pkts generated by pktgen) and on eth-bond topo it is ~0.25%

On br I think that the performance can be brought back maybe by using per-cpu
variables to store port in rx_path (I must check this)

Please comment.

Signed-off-by: Jiri Pirko <jpirko@...hat.com>
---
 drivers/net/bonding/bond_main.c |   14 ++++---
 drivers/net/bonding/bonding.h   |    9 +++-
 drivers/net/macvlan.c           |   35 +++++++++++-----
 include/linux/netdevice.h       |   63 +++++++++++++++++++++++++---
 net/bridge/br_if.c              |    5 +-
 net/bridge/br_input.c           |    5 +-
 net/bridge/br_private.h         |   28 ++++++++++---
 net/core/dev.c                  |   87 +++++++++++++++++++++++++++++++--------
 8 files changed, 193 insertions(+), 53 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 61265f7..f18af47 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1482,7 +1482,8 @@ static bool bond_should_deliver_exact_match(struct sk_buff *skb,
 	return false;
 }
 
-static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
+static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb,
+					     struct rx_handler *rx_handler)
 {
 	struct sk_buff *skb = *pskb;
 	struct slave *slave;
@@ -1494,7 +1495,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
 
 	*pskb = skb;
 
-	slave = bond_slave_get_rcu(skb->dev);
+	slave = bond_slave_get(rx_handler);
 	bond = slave->bond;
 
 	if (bond->params.arp_interval)
@@ -1897,8 +1898,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	if (res)
 		goto err_close;
 
-	res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
-					 new_slave);
+	res = netdev_rx_handler_register(slave_dev, &new_slave->rx_handler,
+					 bond_handle_frame,
+					 RX_HANDLER_PRIO_BOND);
 	if (res) {
 		pr_debug("Error %d calling netdev_rx_handler_register\n", res);
 		goto err_dest_symlinks;
@@ -1988,7 +1990,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	/* unregister rx_handler early so bond_handle_frame wouldn't be called
 	 * for this slave anymore.
 	 */
-	netdev_rx_handler_unregister(slave_dev);
+	netdev_rx_handler_unregister(slave_dev, &slave->rx_handler);
 	write_unlock_bh(&bond->lock);
 	synchronize_net();
 	write_lock_bh(&bond->lock);
@@ -2189,7 +2191,7 @@ static int bond_release_all(struct net_device *bond_dev)
 		/* unregister rx_handler early so bond_handle_frame wouldn't
 		 * be called for this slave anymore.
 		 */
-		netdev_rx_handler_unregister(slave_dev);
+		netdev_rx_handler_unregister(slave_dev, &slave->rx_handler);
 		synchronize_net();
 
 		if (bond_is_lb(bond)) {
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 2936171..e732e16 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -172,6 +172,7 @@ struct vlan_entry {
 
 struct slave {
 	struct net_device *dev; /* first - useful for panic debug */
+	struct rx_handler rx_handler;
 	struct slave *next;
 	struct slave *prev;
 	struct bonding *bond; /* our master */
@@ -196,6 +197,11 @@ struct slave {
 #endif
 };
 
+#define bond_slave_get(rx_handler)			\
+	netdev_rx_handler_get_priv(rx_handler,		\
+				   struct slave,	\
+				   rx_handler)
+
 /*
  * Link pseudo-state only used internally by monitors
  */
@@ -253,9 +259,6 @@ struct bonding {
 #endif /* CONFIG_DEBUG_FS */
 };
 
-#define bond_slave_get_rcu(dev) \
-	((struct slave *) rcu_dereference(dev->rx_handler_data))
-
 /**
  * Returns NULL if the net_device does not belong to any of the bond's slaves
  *
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cc67cbe..49ca58b 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -34,19 +34,28 @@
 #define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
 
 struct macvlan_port {
+	struct rx_handler	rx_handler;
 	struct net_device	*dev;
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
 	struct rcu_head		rcu;
-	bool 			passthru;
+	bool			passthru;
 	int			count;
 };
 
+#define macvlan_port_get(rx_handler)				\
+	netdev_rx_handler_get_priv(rx_handler,			\
+				   struct macvlan_port,		\
+				   rx_handler)
+
+#define macvlan_port_get_by_dev(dev)					\
+	netdev_rx_handler_get_priv_by_prio(dev,				\
+					   RX_HANDLER_PRIO_MACVLAN,	\
+					   struct macvlan_port,		\
+					   rx_handler)
+
 static void macvlan_port_destroy(struct net_device *dev);
 
-#define macvlan_port_get_rcu(dev) \
-	((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
-#define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
 #define macvlan_port_exists(dev) (dev->priv_flags & IFF_MACVLAN_PORT)
 
 static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
@@ -156,7 +165,8 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
+static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb,
+						struct rx_handler *rx_handler)
 {
 	struct macvlan_port *port;
 	struct sk_buff *skb = *pskb;
@@ -167,7 +177,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 	unsigned int len = 0;
 	int ret = NET_RX_DROP;
 
-	port = macvlan_port_get_rcu(skb->dev);
+	port = macvlan_port_get(rx_handler);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -617,7 +627,9 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 
-	err = netdev_rx_handler_register(dev, macvlan_handle_frame, port);
+	err = netdev_rx_handler_register(dev, &port->rx_handler,
+					 macvlan_handle_frame,
+					 RX_HANDLER_PRIO_MACVLAN);
 	if (err)
 		kfree(port);
 	else
@@ -627,10 +639,11 @@ static int macvlan_port_create(struct net_device *dev)
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
-	struct macvlan_port *port = macvlan_port_get(dev);
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_port *port = vlan->port;
 
 	dev->priv_flags &= ~IFF_MACVLAN_PORT;
-	netdev_rx_handler_unregister(dev);
+	netdev_rx_handler_unregister(dev, &port->rx_handler);
 	kfree_rcu(port, rcu);
 }
 
@@ -696,7 +709,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		if (err < 0)
 			return err;
 	}
-	port = macvlan_port_get(lowerdev);
+	port = macvlan_port_get_by_dev(lowerdev);
 
 	/* Only 1 macvlan device can be created in passthru mode */
 	if (port->passthru)
@@ -818,7 +831,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 	if (!macvlan_port_exists(dev))
 		return NOTIFY_DONE;
 
-	port = macvlan_port_get(dev);
+	port = macvlan_port_get_by_dev(dev);
 
 	switch (event) {
 	case NETDEV_CHANGE:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 011eb89..126cd07 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -437,7 +437,51 @@ enum rx_handler_result {
 	RX_HANDLER_PASS,
 };
 typedef enum rx_handler_result rx_handler_result_t;
-typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
+
+struct rx_handler;
+typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb,
+					      struct rx_handler *rx_handler);
+
+enum rx_handler_prio {
+	RX_HANDLER_PRIO_BRIDGE,
+	RX_HANDLER_PRIO_BOND,
+	RX_HANDLER_PRIO_MACVLAN,
+};
+
+/*
+ * struct rx_handler should be embedded into
+ * private struct used by rx_handler
+ */
+struct rx_handler {
+	struct list_head	list;
+	rx_handler_func_t	*callback;
+	unsigned int		prio;
+};
+
+/**
+ * netdev_rx_handler_get_priv - get containing private structure of given
+ *				receive handler
+ * @rx_handler: receive_handler
+ * @type: the type of the container struct this is embedded in
+ * @member: the name of the member within the struct
+ */
+#define netdev_rx_handler_get_priv(rx_handler, type, member) \
+	container_of(rx_handler, type, member)
+
+/**
+ * netdev_rx_handler_get_priv_by_prio, netdev_rx_handler_get_priv_by_prio_rcu
+ *	- get containing private structure of given receive handler priority
+ * @dev: netdevice
+ * @type: the type of the container struct this is embedded in
+ * @member: the name of the member within the struct
+ */
+#define netdev_rx_handler_get_priv_by_prio(dev, prio, type, member)		\
+	netdev_rx_handler_get_priv(netdev_rx_handler_get_by_prio(dev, prio),	\
+				   type, member)
+
+#define netdev_rx_handler_get_priv_by_prio_rcu(dev, prio, type, member)		\
+	netdev_rx_handler_get_priv(netdev_rx_handler_get_by_prio_rcu(dev, prio),\
+				   type, member)
 
 extern void __napi_schedule(struct napi_struct *n);
 
@@ -1238,8 +1282,7 @@ struct net_device {
 #endif
 #endif
 
-	rx_handler_func_t __rcu	*rx_handler;
-	void __rcu		*rx_handler_data;
+	struct list_head	rx_handler_list;
 
 	struct netdev_queue __rcu *ingress_queue;
 
@@ -2082,10 +2125,18 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern struct rx_handler *
+netdev_rx_handler_get_by_prio(const struct net_device *dev,
+			      unsigned int prio);
+extern struct rx_handler *
+netdev_rx_handler_get_by_prio_rcu(const struct net_device *dev,
+				  unsigned int prio);
 extern int netdev_rx_handler_register(struct net_device *dev,
-				      rx_handler_func_t *rx_handler,
-				      void *rx_handler_data);
-extern void netdev_rx_handler_unregister(struct net_device *dev);
+				      struct rx_handler *rx_handler,
+			              rx_handler_func_t *callback,
+				      unsigned int prio);
+extern void netdev_rx_handler_unregister(struct net_device *dev,
+					 struct rx_handler *rx_handler);
 
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 1bacca4..4ee5d78 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -146,7 +146,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 
-	netdev_rx_handler_unregister(dev);
+	netdev_rx_handler_unregister(dev, &p->rx_handler);
 	synchronize_net();
 
 	netdev_set_master(dev, NULL);
@@ -365,7 +365,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err3;
 
-	err = netdev_rx_handler_register(dev, br_handle_frame, p);
+	err = netdev_rx_handler_register(dev, &p->rx_handler, br_handle_frame,
+					 RX_HANDLER_PRIO_BRIDGE);
 	if (err)
 		goto err4;
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f3ac1e8..5f396d8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -140,7 +140,8 @@ static inline int is_link_local(const unsigned char *dest)
  * Return NULL if skb is handled
  * note: already called with rcu_read_lock
  */
-rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
+rx_handler_result_t br_handle_frame(struct sk_buff **pskb,
+				    struct rx_handler *rx_handler)
 {
 	struct net_bridge_port *p;
 	struct sk_buff *skb = *pskb;
@@ -157,7 +158,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 	if (!skb)
 		return RX_HANDLER_CONSUMED;
 
-	p = br_port_get_rcu(skb->dev);
+	p = br_port_get(rx_handler);
 
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 54578f2..1a1ea40 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -108,6 +108,7 @@ struct net_bridge_mdb_htable
 
 struct net_bridge_port
 {
+	struct rx_handler		rx_handler;
 	struct net_bridge		*br;
 	struct net_device		*dev;
 	struct list_head		list;
@@ -152,18 +153,32 @@ struct net_bridge_port
 #endif
 };
 
+#define br_port_get(rx_handler)					\
+	netdev_rx_handler_get_priv(rx_handler,			\
+				   struct net_bridge_port,	\
+				   rx_handler)
+
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
 
-static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
+static inline struct net_bridge_port *
+br_port_get_rcu(const struct net_device *dev)
 {
-	struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data);
-	return br_port_exists(dev) ? port : NULL;
+	if (unlikely(!br_port_exists(dev)))
+		return NULL;
+	return netdev_rx_handler_get_priv_by_prio_rcu(dev,
+						      RX_HANDLER_PRIO_BRIDGE,
+						      struct net_bridge_port,
+						      rx_handler);
 }
 
 static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
 {
-	return br_port_exists(dev) ?
-		rtnl_dereference(dev->rx_handler_data) : NULL;
+	if (unlikely(!br_port_exists(dev)))
+		return NULL;
+	return netdev_rx_handler_get_priv_by_prio(dev,
+						  RX_HANDLER_PRIO_BRIDGE,
+						  struct net_bridge_port,
+						  rx_handler);
 }
 
 struct br_cpu_netstats {
@@ -382,7 +397,8 @@ extern u32 br_features_recompute(struct net_bridge *br, u32 features);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
+extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb,
+					   struct rx_handler *rx_handler);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6b6ef14..92d9007 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3043,10 +3043,55 @@ out:
 #endif
 
 /**
+ *	netdev_rx_handler_get_by_prio - get receive handler struct by priority
+ *	@dev: net device
+ *	@prio: receive handler priority
+ *
+ *	Find and return receive handler for given priority.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+struct rx_handler *
+netdev_rx_handler_get_by_prio(const struct net_device *dev, unsigned int prio)
+{
+	struct rx_handler *rx_handler;
+
+	ASSERT_RTNL();
+	list_for_each_entry(rx_handler, &dev->rx_handler_list, list)
+		if (rx_handler->prio == prio)
+			return rx_handler;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_get_by_prio);
+
+/**
+ *	netdev_rx_handler_get_by_prio_rcu - get receive handler struct by priority
+ *	@dev: net device
+ *	@prio: receive handler priority
+ *
+ *	RCU variant to find and return receive handler for given priority.
+ *
+ *	The caller must hold the rcu_read_lock.
+ */
+struct rx_handler *
+netdev_rx_handler_get_by_prio_rcu(const struct net_device *dev,
+				  unsigned int prio)
+{
+	struct rx_handler *rx_handler;
+
+	list_for_each_entry_rcu(rx_handler, &dev->rx_handler_list, list)
+		if (rx_handler->prio == prio)
+			return rx_handler;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_get_by_prio_rcu);
+
+/**
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
- *	@rx_handler: receive handler to register
- *	@rx_handler_data: data pointer that is used by rx handler
+ *	@rx_handler: receive handler structure to register
+ *	@callback: receive handler callback function to register
+ *	@prio: receive handler priority
  *
  *	Register a receive hander for a device. This handler will then be
  *	called from __netif_receive_skb. A negative errno code is returned
@@ -3057,17 +3102,24 @@ out:
  *	For a general description of rx_handler, see enum rx_handler_result.
  */
 int netdev_rx_handler_register(struct net_device *dev,
-			       rx_handler_func_t *rx_handler,
-			       void *rx_handler_data)
+			       struct rx_handler *rx_handler,
+			       rx_handler_func_t *callback, unsigned int prio)
 {
-	ASSERT_RTNL();
+	struct list_head *pos;
 
-	if (dev->rx_handler)
+	ASSERT_RTNL();
+	if (netdev_rx_handler_get_by_prio(dev, prio))
 		return -EBUSY;
+	list_for_each(pos, &dev->rx_handler_list) {
+		struct rx_handler *entry;
 
-	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
-	rcu_assign_pointer(dev->rx_handler, rx_handler);
-
+		entry = list_entry(pos, struct rx_handler, list);
+		if (prio > entry->prio)
+			break;
+	}
+	rx_handler->callback = callback;
+	rx_handler->prio = prio;
+	list_add_rcu(&rx_handler->list, pos);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
@@ -3075,24 +3127,24 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
 /**
  *	netdev_rx_handler_unregister - unregister receive handler
  *	@dev: device to unregister a handler from
+ *	@prio: handler priority
  *
  *	Unregister a receive hander from a device.
  *
  *	The caller must hold the rtnl_mutex.
  */
-void netdev_rx_handler_unregister(struct net_device *dev)
+void netdev_rx_handler_unregister(struct net_device *dev,
+				  struct rx_handler *rx_handler)
 {
-
 	ASSERT_RTNL();
-	rcu_assign_pointer(dev->rx_handler, NULL);
-	rcu_assign_pointer(dev->rx_handler_data, NULL);
+	list_del_rcu(&rx_handler->list);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
-	rx_handler_func_t *rx_handler;
+	struct rx_handler *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *null_or_dev;
 	bool deliver_exact = false;
@@ -3152,13 +3204,12 @@ another_round:
 ncls:
 #endif
 
-	rx_handler = rcu_dereference(skb->dev->rx_handler);
-	if (rx_handler) {
+	list_for_each_entry_rcu(rx_handler, &skb->dev->rx_handler_list, list) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		switch (rx_handler(&skb)) {
+		switch (rx_handler->callback(&skb, rx_handler)) {
 		case RX_HANDLER_CONSUMED:
 			goto out;
 		case RX_HANDLER_ANOTHER:
@@ -5870,6 +5921,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
+	INIT_LIST_HEAD(&dev->rx_handler_list);
+
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ