lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1278098421-21296-7-git-send-email-sebastian@breakpoint.cc>
Date:	Fri,  2 Jul 2010 21:20:19 +0200
From:	Sebastian Andrzej Siewior <sebastian@...akpoint.cc>
To:	netdev@...r.kernel.org
Cc:	tglx@...utronix.de,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Subject: [PATCH 6/8] net: implement emergency pools

From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>

This patch implements emergency pools which are bound to a specific
network device. They can be activated via the socket interface and used
for a specific socket.
The pools are built on top of rx-recycling. The socket interface allows
to set the number of skbs in the pool and to active the pool.
The size of the skb which are accepted / added to the pool can not be
changed. It is set by the network driver and get altered on MTU change.
This requires to drop the current pool and re-allocate it. If the driver
does not set the skb size, the emergency pools can not be used.
Once the emergency pools are activated all rx-skbs allocation by the
network driver are taken from the pool. tx-skbs are allocated from the
emergency pool only for the relevant socket, i.e. that one which
activated the emergency mode.
Since the socket _and_ the driver can add/remove skbs to/from the pool
the list operations are using now skb_queue_head() and skb_dequeue().
There is patch later in the series which tries to bring the old unlock
behavior back if the emergency pools are not used by the user.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
 arch/alpha/include/asm/socket.h   |    4 +
 arch/arm/include/asm/socket.h     |    3 +
 arch/avr32/include/asm/socket.h   |    3 +
 arch/cris/include/asm/socket.h    |    5 +-
 arch/frv/include/asm/socket.h     |    4 +-
 arch/h8300/include/asm/socket.h   |    3 +
 arch/ia64/include/asm/socket.h    |    3 +
 arch/m32r/include/asm/socket.h    |    3 +
 arch/m68k/include/asm/socket.h    |    3 +
 arch/mips/include/asm/socket.h    |    3 +
 arch/mn10300/include/asm/socket.h |    3 +
 arch/parisc/include/asm/socket.h  |    3 +
 arch/powerpc/include/asm/socket.h |    3 +
 arch/s390/include/asm/socket.h    |    3 +
 arch/sparc/include/asm/socket.h   |    3 +
 arch/xtensa/include/asm/socket.h  |    3 +
 include/asm-generic/socket.h      |    4 +
 include/linux/netdevice.h         |   52 +++++++------
 include/linux/skbuff.h            |    1 +
 include/net/sock.h                |    2 +
 net/core/skbuff.c                 |    8 ++
 net/core/sock.c                   |  142 +++++++++++++++++++++++++++++++++++++
 22 files changed, 234 insertions(+), 27 deletions(-)

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 06edfef..ea49db3 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -69,6 +69,10 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 90ffd04..b827010 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index c8d1fae..64a7d45 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index 1a4a619..9b8e7ed 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -64,6 +64,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_SOCKET_H */
-
-
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index a6b2688..15a262f 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -62,5 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_SOCKET_H */
-
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index 04c0f45..d46d64e 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 51427ea..04983aa 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -71,4 +71,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index 469787c..a0e5431 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index 9bf49c8..7018ceb 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 9de5190..9f9d93a 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -82,6 +82,9 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #ifdef __KERNEL__
 
 /** sock_type - Socket types
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index 4e60c42..70476eb 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 225b7d6..a4706d0 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -61,6 +61,9 @@
 
 #define SO_RXQ_OVFL             0x4021
 
+#define SO_EPOOL_QLEN		0x4022
+#define SO_EPOOL_SIZE		0x4023
+#define SO_EPOOL_MODE		0x4024
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 866f760..dce10f9 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -69,4 +69,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN           41
+#define SO_EPOOL_SIZE           42
+#define SO_EPOOL_MODE           43
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index fdff1e9..73d0117 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -70,4 +70,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN           41
+#define SO_EPOOL_SIZE           42
+#define SO_EPOOL_MODE           43
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 9d3fefc..39eea91 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -58,6 +58,9 @@
 
 #define SO_RXQ_OVFL             0x0024
 
+#define SO_EPOOL_QLEN           0x0025
+#define SO_EPOOL_SIZE           0x0026
+#define SO_EPOOL_MODE           0x0027
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index cbdf2ff..161a2e5 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -73,4 +73,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_EPOOL_QLEN           41
+#define SO_EPOOL_SIZE           42
+#define SO_EPOOL_MODE           43
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 9a6115e..fa9ccbb 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -64,4 +64,8 @@
 #define SO_DOMAIN		39
 
 #define SO_RXQ_OVFL             40
+
+#define SO_EPOOL_QLEN		41
+#define SO_EPOOL_SIZE		42
+#define SO_EPOOL_MODE		43
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4fa400b..fa7e951 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1095,6 +1095,28 @@ struct net_device {
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
+/**
+ *	dev_put - release reference to device
+ *	@dev: network device
+ *
+ * Release reference to device to allow it to be freed.
+ */
+static inline void dev_put(struct net_device *dev)
+{
+	atomic_dec(&dev->refcnt);
+}
+
+/**
+ *	dev_hold - get reference to device
+ *	@dev: network device
+ *
+ * Hold reference to device to keep it from being freed.
+ */
+static inline void dev_hold(struct net_device *dev)
+{
+	atomic_inc(&dev->refcnt);
+}
+
 static inline void net_recycle_init(struct net_device *dev, u32 qlen, u32 size)
 {
 	dev->rx_rec_skbs_max = qlen;
@@ -1118,9 +1140,13 @@ static inline void net_recycle_cleanup(struct net_device *dev)
 
 static inline void net_recycle_add(struct net_device *dev, struct sk_buff *skb)
 {
+	if (skb->emerg_dev) {
+		dev_put(skb->emerg_dev);
+		skb->emerg_dev = NULL;
+	}
 	if (skb_queue_len(&dev->rx_recycle) < dev->rx_rec_skbs_max &&
 			skb_recycle_check(skb, dev->rx_rec_skb_size))
-		__skb_queue_head(&dev->rx_recycle, skb);
+		skb_queue_head(&dev->rx_recycle, skb);
 	else
 		dev_kfree_skb_any(skb);
 }
@@ -1129,7 +1155,7 @@ static inline struct sk_buff *net_recycle_get(struct net_device *dev)
 {
 	struct sk_buff *skb;
 
-	skb = __skb_dequeue(&dev->rx_recycle);
+	skb = skb_dequeue(&dev->rx_recycle);
 	if (skb)
 		return skb;
 	return netdev_alloc_skb(dev, dev->rx_rec_skb_size);
@@ -1783,28 +1809,6 @@ extern int		netdev_budget;
 /* Called by rtnetlink.c:rtnl_unlock() */
 extern void netdev_run_todo(void);
 
-/**
- *	dev_put - release reference to device
- *	@dev: network device
- *
- * Release reference to device to allow it to be freed.
- */
-static inline void dev_put(struct net_device *dev)
-{
-	atomic_dec(&dev->refcnt);
-}
-
-/**
- *	dev_hold - get reference to device
- *	@dev: network device
- *
- * Hold reference to device to keep it from being freed.
- */
-static inline void dev_hold(struct net_device *dev)
-{
-	atomic_inc(&dev->refcnt);
-}
-
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
  * who is responsible for serialization of these calls.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ac74ee0..caee62c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -319,6 +319,7 @@ struct sk_buff {
 
 	struct sock		*sk;
 	struct net_device	*dev;
+	struct net_device	*emerg_dev;
 
 	/*
 	 * This is the control buffer. It is free to use for every
diff --git a/include/net/sock.h b/include/net/sock.h
index 4f26f2f..3f3518a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -314,6 +314,8 @@ struct sock {
 #endif
 	__u32			sk_mark;
 	u32			sk_classid;
+	u32			emerg_en;
+	/* XXX 4 bytes hole on 64 bit */
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk, int bytes);
 	void			(*sk_write_space)(struct sock *sk);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 34432b4..f02737d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -425,6 +425,13 @@ static void skb_release_all(struct sk_buff *skb)
 
 void __kfree_skb(struct sk_buff *skb)
 {
+	struct net_device *ndev = skb->emerg_dev;
+
+	if (ndev) {
+		net_recycle_add(ndev, skb);
+		return;
+	}
+
 	skb_release_all(skb);
 	kfree_skbmem(skb);
 }
@@ -563,6 +570,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 {
 #define C(x) n->x = skb->x
 
+	n->emerg_dev = NULL;
 	n->next = n->prev = NULL;
 	n->sk = NULL;
 	__copy_skb_header(n, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index fef2434..33aa1a5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -472,6 +472,71 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
 		sock_reset_flag(sk, bit);
 }
 
+static int sock_epool_set_qlen(struct sock *sk, int val)
+{
+	struct net *net = sock_net(sk);
+	struct net_device *dev;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!sk->sk_bound_dev_if)
+		return -ENODEV;
+	dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+	if (!dev)
+		return -ENODEV;
+
+	net_recycle_qlen(dev, val);
+	dev_put(dev);
+	return 0;
+}
+
+static int sock_epool_set_mode(struct sock *sk, int val)
+{
+	int ret;
+	struct net *net = sock_net(sk);
+	struct net_device *dev;
+
+	if (!val) {
+		sk->emerg_en = 0;
+		return 0;
+	}
+	if (sk->emerg_en && val)
+		return -EBUSY;
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+	if (!sk->sk_bound_dev_if)
+		return -ENODEV;
+	dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+	if (!dev)
+		return -ENODEV;
+	ret = -ENODEV;
+	if (!dev->rx_rec_skb_size)
+		goto out;
+
+	do {
+		struct sk_buff *skb;
+
+		if (skb_queue_len(&dev->rx_recycle) >= dev->rx_rec_skbs_max) {
+			ret = 0;
+			break;
+		}
+
+		skb = __netdev_alloc_skb(dev, dev->rx_rec_skb_size, GFP_KERNEL);
+		if (!skb) {
+			ret = -ENOMEM;
+			break;
+		}
+		net_recycle_add(dev, skb);
+	} while (1);
+
+	if (!ret)
+		sk->emerg_en = 1;
+out:
+	dev_put(dev);
+	return ret;
+}
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -740,6 +805,15 @@ set_rcvbuf:
 		else
 			sock_reset_flag(sk, SOCK_RXQ_OVFL);
 		break;
+	case SO_EPOOL_QLEN:
+		ret = sock_epool_set_qlen(sk, val);
+		break;
+	case SO_EPOOL_SIZE:
+		ret = -EINVAL;
+		break;
+	case SO_EPOOL_MODE:
+		ret = sock_epool_set_mode(sk, valbool);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -961,6 +1035,35 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
 		break;
 
+	case SO_EPOOL_QLEN:
+	{
+		struct net *net = sock_net(sk);
+		struct net_device *dev;
+
+		if (!sk->sk_bound_dev_if)
+			return -ENODEV;
+		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+		if (!dev)
+			return -ENODEV;
+		v.val = dev->rx_rec_skbs_max;
+		break;
+	}
+	case SO_EPOOL_SIZE:
+	{
+		struct net *net = sock_net(sk);
+		struct net_device *dev;
+
+		if (!sk->sk_bound_dev_if)
+			return -ENODEV;
+		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+		if (!dev)
+			return -ENODEV;
+		v.val = dev->rx_rec_skb_size;
+		break;
+	}
+	case SO_EPOOL_MODE:
+		v.val = sk->emerg_en;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1459,6 +1562,37 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 	return timeo;
 }
 
+static struct sk_buff *alloc_emerg_skb(struct sock *sk, unsigned int skb_len)
+{
+	struct net *net = sock_net(sk);
+	struct net_device *dev;
+	int err;
+	struct sk_buff *skb;
+
+	err = -ENODEV;
+	if (!sk->sk_bound_dev_if)
+		return ERR_PTR(err);
+	dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+	if (!dev)
+		return ERR_PTR(err);
+	err = -EINVAL;
+	if (dev->rx_rec_skb_size < skb_len) {
+		dev_put(dev);
+		return ERR_PTR(err);
+	}
+	skb = skb_dequeue(&dev->rx_recycle);
+	if (!skb) {
+		dev_put(dev);
+		err = -ENOBUFS;
+		return ERR_PTR(err);
+	}
+	/*
+	 * dev will be put once the skb is back from
+	 * its journey.
+	 */
+	skb->emerg_dev = dev;
+	return skb;
+}
 
 /*
  *	Generic send/receive buffer handlers
@@ -1488,6 +1622,14 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 			goto failure;
 
 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+			if (sk->emerg_en) {
+				skb = alloc_emerg_skb(sk, header_len + data_len);
+				if (IS_ERR(skb)) {
+					err = PTR_ERR(skb);
+					goto failure;
+				}
+				break;
+			}
 			skb = alloc_skb(header_len, gfp_mask);
 			if (skb) {
 				int npages;
-- 
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ