[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190517155625.117835-1-willemdebruijn.kernel@gmail.com>
Date: Fri, 17 May 2019 11:56:25 -0400
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: netdev@...r.kernel.org
Cc: Willem de Bruijn <willemb@...gle.com>
Subject: [PATCH net-next RFC] ipv6: elide flowlabel check if no exclusive leases exist
From: Willem de Bruijn <willemb@...gle.com>
Processes can request ipv6 flowlabels with cmsg IPV6_FLOWINFO.
If not set, by default an autogenerated flowlabel is selected.
Explicit flowlabels require a control operation per label plus a
datapath check on every connection (every datagram if unconnected).
This is particularly expensive on unconnected sockets with many
connections, such as QUIC.
In the common case, where no lease is exclusive, the check can be
safely elided, as both lease request and check trivially succeed.
Indeed, autoflowlabel does the same (even with exclusive leases).
Elide the check if no process has requested an exclusive lease.
This is an optimization. Robust applications still have to revert to
requesting leases if the fast path fails due to an exclusive lease.
This is decidedly an RFC patch:
- need to update all fl6_sock_lookup callers, not just udp
- behavior should be per-netns isolated
Other approaches considered:
- a single "get all flowlabels, non-exclusive" flowlabel get request
if set, elide fl6_sock_lookup and fail exclusive lease requests
- sysctls (only useful if on by default, with static_branch)
A) "non-exclusive mode", failing all exclusive lease requests:
processes already have to be robust against lease failure
B) just bypass check in fl6_sock_lookup, like autoflowlabel
Signed-off-by: Willem de Bruijn <willemb@...gle.com>
---
include/net/ipv6.h | 11 +++++++++++
net/ipv6/ip6_flowlabel.c | 6 ++++++
net/ipv6/udp.c | 8 ++++----
3 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index daf80863d3a50..8881cee572410 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -17,6 +17,7 @@
#include <linux/hardirq.h>
#include <linux/jhash.h>
#include <linux/refcount.h>
+#include <linux/jump_label.h>
#include <net/if_inet6.h>
#include <net/ndisc.h>
#include <net/flow.h>
@@ -343,7 +344,17 @@ static inline void txopt_put(struct ipv6_txoptions *opt)
kfree_rcu(opt, rcu);
}
+extern struct static_key_false ipv6_flowlabel_exclusive;
struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
+static inline struct ip6_flowlabel *fl6_sock_verify(struct sock *sk,
+ __be32 label)
+{
+ if (static_branch_unlikely(&ipv6_flowlabel_exclusive))
+ return fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT);
+
+ return NULL;
+}
+
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl,
struct ipv6_txoptions *fopt);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index be5f3d7ceb966..d5f4233b04e0c 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -57,6 +57,8 @@ static DEFINE_SPINLOCK(ip6_fl_lock);
static DEFINE_SPINLOCK(ip6_sk_fl_lock);
+DEFINE_STATIC_KEY_FALSE(ipv6_flowlabel_exclusive);
+
#define for_each_fl_rcu(hash, fl) \
for (fl = rcu_dereference_bh(fl_ht[(hash)]); \
fl != NULL; \
@@ -98,6 +100,8 @@ static void fl_free_rcu(struct rcu_head *head)
{
struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+ if (fl->share != IPV6_FL_S_NONE && fl->share != IPV6_FL_S_ANY)
+ static_branch_dec(&ipv6_flowlabel_exclusive);
if (fl->share == IPV6_FL_S_PROCESS)
put_pid(fl->owner.pid);
kfree(fl->opt);
@@ -423,6 +427,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
}
fl->dst = freq->flr_dst;
atomic_set(&fl->users, 1);
+ if (fl->share != IPV6_FL_S_ANY)
+ static_branch_inc(&ipv6_flowlabel_exclusive);
switch (fl->share) {
case IPV6_FL_S_EXCL:
case IPV6_FL_S_ANY:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 07fa579dfb96c..859a1cbd54906 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1331,8 +1331,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (np->sndflow) {
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
- if (!flowlabel)
+ flowlabel = fl6_sock_verify(sk, fl6.flowlabel);
+ if (IS_ERR(flowlabel))
return -EINVAL;
}
}
@@ -1383,8 +1383,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return err;
}
if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
- if (!flowlabel)
+ flowlabel = fl6_sock_verify(sk, fl6.flowlabel);
+ if (IS_ERR(flowlabel))
return -EINVAL;
}
if (!(opt->opt_nflen|opt->opt_flen))
--
2.21.0.1020.gf2820cf01a-goog
Powered by blists - more mailing lists