lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1432658049-3400132-4-git-send-email-tom@herbertland.com>
Date:	Tue, 26 May 2015 09:34:09 -0700
From:	Tom Herbert <tom@...bertland.com>
To:	<davem@...emloft.net>, <netdev@...r.kernel.org>
Subject: [PATCH v2 net-next 3/3] net: Add incoming CPU mask to sockets

Added matching of CPU to a socket CPU mask. This is useful for TCP
listeners and unconnected UDP. This works with SO_REUSEPORT to steer
packets to listener sockets based on CPU affinity.

In this patch:
 - Add SO_INCOMING_CPU_MASK
 - Add a CPU mask pointer to struct sock
 - Get/setsockopt to get/set a the mask on a socket
 - Compat functions for the sockopts
 - Add sk_match_incoming_cpu_mask to check is running CPU is in a mask
   for a socket
 - Call sk_match_incoming_cpu_mask from inet compute_score and UDP
   functions for IPv4 and IPv6

Signed-off-by: Tom Herbert <tom@...bertland.com>
---
 arch/alpha/include/uapi/asm/socket.h   |  2 +
 arch/avr32/include/uapi/asm/socket.h   |  2 +
 arch/cris/include/uapi/asm/socket.h    |  2 +
 arch/frv/include/uapi/asm/socket.h     |  2 +
 arch/ia64/include/uapi/asm/socket.h    |  2 +
 arch/m32r/include/uapi/asm/socket.h    |  2 +
 arch/mips/include/uapi/asm/socket.h    |  2 +
 arch/mn10300/include/uapi/asm/socket.h |  2 +
 arch/parisc/include/uapi/asm/socket.h  |  2 +
 arch/powerpc/include/uapi/asm/socket.h |  2 +
 arch/s390/include/uapi/asm/socket.h    |  2 +
 arch/sparc/include/uapi/asm/socket.h   |  2 +
 arch/xtensa/include/uapi/asm/socket.h  |  2 +
 include/net/sock.h                     | 31 +++++++++++++
 include/uapi/asm-generic/socket.h      |  2 +
 net/compat.c                           | 56 ++++++++++++++++++++++++
 net/core/sock.c                        | 80 ++++++++++++++++++++++++++++++++++
 net/ipv4/inet_hashtables.c             |  3 ++
 net/ipv4/udp.c                         |  6 +++
 net/ipv6/inet6_hashtables.c            |  3 ++
 net/ipv6/udp.c                         |  3 ++
 21 files changed, 210 insertions(+)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9a20821..eae65a2 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -92,4 +92,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 2b65ed6..89515e3 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index e2503d9f..65fcf0e 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -87,6 +87,8 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 4823ad1..1af3b78 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -85,5 +85,7 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 59be3d8..7ef59d3 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -94,4 +94,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 7bc4cb2..53a697c 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index dec3c85..063d59d 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -103,4 +103,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index cab7d6d..3c9f8e9 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index a5cd40c..557a09b 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -84,4 +84,6 @@
 #define SO_ATTACH_BPF		0x402B
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	0x402C
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index c046666..a72fac6 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -92,4 +92,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 296942d..b901044 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -91,4 +91,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index e6a16c4..95835a1 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -81,6 +81,8 @@
 #define SO_ATTACH_BPF		0x0034
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	0x0035
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 4120af0..0167812 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -96,4 +96,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index bcf6114..8407c3b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -123,6 +123,11 @@ typedef struct {
 #endif
 } socket_lock_t;
 
+struct rcu_cpumask {
+	struct rcu_head rcu;
+	unsigned long cpumask[0];
+};
+
 struct sock;
 struct proto;
 struct net;
@@ -150,6 +155,7 @@ typedef __u64 __bitwise __addrpair;
  *	@skc_node: main hash linkage for various protocol lookup tables
  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_incoming_cpu_mask: CPU mask for listeners
  *	@skc_refcnt: reference count
  *
  *	This is the minimal network layer representation of sockets, the header
@@ -212,9 +218,12 @@ struct sock_common {
 		struct hlist_nulls_node skc_nulls_node;
 	};
 
+	struct rcu_cpumask __rcu *skc_incoming_cpu_mask;
+
 	/* Cachelines above this point are read mostly and are used in socket
 	 * lookup.
 	 */
+
 	int			skc_tx_queue_mapping
 				____cacheline_aligned_in_smp;
 
@@ -314,6 +323,7 @@ struct sock {
 #define sk_node			__sk_common.skc_node
 #define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_refcnt		__sk_common.skc_refcnt
+#define sk_incoming_cpu_mask	__sk_common.skc_incoming_cpu_mask
 #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
 
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
@@ -2220,6 +2230,27 @@ static inline bool sk_fullsock(const struct sock *sk)
 	return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
 }
 
+static inline bool sk_match_incoming_cpu_mask(const struct sock *sk)
+{
+	struct rcu_cpumask *mask;
+	bool ret = false;
+
+	if (!sk->sk_incoming_cpu_mask)
+		return ret;
+
+	rcu_read_lock();
+
+	mask = rcu_dereference(sk->sk_incoming_cpu_mask);
+	if (likely(mask) &&
+	    cpumask_test_cpu(raw_smp_processor_id(),
+			     to_cpumask(mask->cpumask)))
+		ret = true;
+
+	rcu_read_unlock();
+
+	return ret;
+}
+
 void sock_enable_timestamp(struct sock *sk, int flag);
 int sock_get_timestamp(struct sock *, struct timeval __user *);
 int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 5c15c2a..d41c8b9 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -87,4 +87,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_INCOMING_CPU_MASK	51
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/compat.c b/net/compat.c
index 5cfd26a..f9fc5ce 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -351,6 +351,23 @@ static int do_set_sock_timeout(struct socket *sock, int level,
 	return err;
 }
 
+static int do_set_incoming_cpu_mask(struct socket *sock, int level,
+		int optname, char __user *optval, unsigned int optlen)
+{
+	compat_ulong_t __user *user_mask_ptr =
+	    (compat_ulong_t __user *)optval;
+	struct cpumask __user *mask = compat_alloc_user_space(cpumask_size());
+	int err;
+
+	err = compat_get_user_cpu_mask(user_mask_ptr, optlen, mask);
+	if (err)
+		return err;
+
+	return sock_setsockopt(sock, level, optname,
+			       (char __user *)cpumask_bits(mask),
+			       cpumask_size());
+}
+
 static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
 				char __user *optval, unsigned int optlen)
 {
@@ -360,6 +377,10 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
 	if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
 		return do_set_sock_timeout(sock, level, optname, optval, optlen);
 
+	if (optname == SO_INCOMING_CPU_MASK)
+		return do_set_incoming_cpu_mask(sock, level, optname,
+						optval, optlen);
+
 	return sock_setsockopt(sock, level, optname, optval, optlen);
 }
 
@@ -419,11 +440,46 @@ static int do_get_sock_timeout(struct socket *sock, int level, int optname,
 	return err;
 }
 
+static int do_get_incoming_cpu_mask(struct socket *sock, int level,
+		int optname, char __user *optval, unsigned int __user *optlen)
+{
+	compat_ulong_t __user *user_mask_ptr =
+	    (compat_ulong_t __user *)optval;
+	struct cpumask __user *mask = compat_alloc_user_space(cpumask_size());
+	int len, err;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
+		return -EINVAL;
+	if (len & (sizeof(compat_ulong_t) - 1))
+		return -EINVAL;
+
+	if (put_user(cpumask_size(), optlen))
+		return -EFAULT;
+
+	err = sock_getsockopt(sock, level, optname,
+			      (char __user *)cpumask_bits(mask), optlen);
+	if (err == 0)
+		if (get_user(len, optlen) ||
+		    compat_put_bitmap(user_mask_ptr,
+				      cpumask_bits(mask), len * 8))
+			err = -EFAULT;
+
+	return err;
+}
+
 static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
 	if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
 		return do_get_sock_timeout(sock, level, optname, optval, optlen);
+
+	if (optname == SO_INCOMING_CPU_MASK)
+		return do_get_incoming_cpu_mask(sock, level, optname,
+						optval, optlen);
+
 	return sock_getsockopt(sock, level, optname, optval, optlen);
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 29124fc..25fc8a7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -672,6 +672,71 @@ bool sk_mc_loop(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_mc_loop);
 
+static int do_set_incoming_cpu_mask(struct sock *sk, char __user *optval,
+				    unsigned int optlen)
+{
+	struct rcu_cpumask *new_mask, *old_mask;
+	unsigned long *k;
+
+	old_mask = rcu_dereference_protected(sk->sk_incoming_cpu_mask,
+					     sock_owned_by_user(sk));
+
+	if (optlen == 0) {
+		RCU_INIT_POINTER(sk->sk_incoming_cpu_mask, NULL);
+		if (old_mask) {
+			kfree_rcu(old_mask, rcu);
+			return 0;
+		}
+	}
+
+	if (optlen & (sizeof(unsigned long) - 1))
+		return -EINVAL;
+
+	new_mask = kzalloc(sizeof(*new_mask) + cpumask_size(), GFP_KERNEL);
+	if (!new_mask)
+		return -ENOMEM;
+
+	k = cpumask_bits(to_cpumask(new_mask->cpumask));
+	if (copy_from_user(k, optval, min_t(int, optlen, cpumask_size())))
+		return -EFAULT;
+
+	rcu_assign_pointer(sk->sk_incoming_cpu_mask, new_mask);
+
+	if (old_mask)
+		kfree_rcu(old_mask, rcu);
+
+	return 0;
+}
+
+static int do_get_incoming_cpu_mask(struct sock *sk, char __user *optval,
+				    unsigned int __user *optlen,
+				    unsigned int len)
+{
+	struct rcu_cpumask *mask;
+	unsigned long *k;
+	int err = 0;
+
+	if (len < cpumask_size())
+		return -EINVAL;
+
+	if (len & (sizeof(unsigned long) - 1))
+		return -EINVAL;
+
+	rcu_read_lock();
+
+	mask = rcu_dereference(sk->sk_incoming_cpu_mask);
+
+	k = cpumask_bits(to_cpumask(mask->cpumask));
+	if (copy_to_user(optval, k, cpumask_size()))
+		err = -EFAULT;
+	else
+		put_user(cpumask_size(), optlen);
+
+	rcu_read_unlock();
+
+	return err;
+}
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -990,6 +1055,10 @@ set_rcvbuf:
 					 sk->sk_max_pacing_rate);
 		break;
 
+	case SO_INCOMING_CPU_MASK:
+		ret = do_set_incoming_cpu_mask(sk, optval, optlen);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1250,6 +1319,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_incoming_cpu;
 		break;
 
+	case SO_INCOMING_CPU_MASK:
+		return do_get_incoming_cpu_mask(sk, optval, optlen, len);
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -1429,6 +1501,7 @@ EXPORT_SYMBOL(sk_alloc);
 static void __sk_free(struct sock *sk)
 {
 	struct sk_filter *filter;
+	struct rcu_cpumask *incoming_cpu_mask;
 
 	if (sk->sk_destruct)
 		sk->sk_destruct(sk);
@@ -1440,6 +1513,12 @@ static void __sk_free(struct sock *sk)
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
 	}
 
+	incoming_cpu_mask = rcu_dereference(sk->sk_incoming_cpu_mask);
+	if (incoming_cpu_mask) {
+		kfree_rcu(incoming_cpu_mask, rcu);
+		RCU_INIT_POINTER(sk->sk_incoming_cpu_mask, NULL);
+	}
+
 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
 
 	if (atomic_read(&sk->sk_omem_alloc))
@@ -1543,6 +1622,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
 		newsk->sk_incoming_cpu = raw_smp_processor_id();
+		RCU_INIT_POINTER(newsk->sk_incoming_cpu_mask, NULL);
 		atomic64_set(&newsk->sk_cookie, 0);
 		/*
 		 * Before updating sk_refcnt, we must commit prior changes to memory
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3766bdd..2e9a95f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -184,6 +184,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score += 4;
 		}
+
+		if (sk_match_incoming_cpu_mask(sk))
+			score += 4;
 	}
 	return score;
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d10b7e0..dc6a3da 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -375,6 +375,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
 		score += 4;
 	}
 
+	if (sk_match_incoming_cpu_mask(sk))
+		score += 4;
+
 	return score;
 }
 
@@ -418,6 +421,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 		score += 4;
 	}
 
+	if (sk_match_incoming_cpu_mask(sk))
+		score += 4;
+
 	return score;
 }
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 871641b..8cc4ba9 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -114,6 +114,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score++;
 		}
+
+		if (sk_match_incoming_cpu_mask(sk))
+			score += 4;
 	}
 	return score;
 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c2ec416..a0c9a80 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -182,6 +182,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
 		score++;
 	}
 
+	if (sk_match_incoming_cpu_mask(sk))
+		score++;
+
 	return score;
 }
 
-- 
1.8.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ