[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1548699775-3015-6-git-send-email-tom@quantonium.net>
Date: Mon, 28 Jan 2019 10:22:55 -0800
From: Tom Herbert <tom@...bertland.com>
To: davem@...emloft.net, netdev@...r.kernel.org
Cc: Tom Herbert <tom@...ntonium.net>
Subject: [PATCH v2 net-next 5/5] ip6tlvs: API to set and remove individual TLVs from DO or HBH EH
Add functions and socket options that allows setting and removing
individual TLVs from Hop-by-Hop, Destination, or Routing Header
Destination options that are set in txoptions of a socket. When an
individual TLV optiosn is set it is merged into the existing options
at the position in the list described by preferred order attribute in
the TLV parameters table.
This code is based in part on the TLV option handling in calipso.c.
Signed-off-by: Tom Herbert <tom@...ntonium.net>
---
include/net/ipv6.h | 13 ++
include/uapi/linux/in6.h | 9 +
net/ipv6/exthdrs_options.c | 516 +++++++++++++++++++++++++++++++++++++++++++++
net/ipv6/ipv6_sockglue.c | 80 +++++++
4 files changed, 618 insertions(+)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c5692d6..898f5e3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -378,6 +378,8 @@ struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
struct ipv6_opt_hdr *newopt);
struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
struct ipv6_txoptions *opt);
+int ipv6_opt_update(struct sock *sk, struct ipv6_txoptions *opt,
+ int which, struct ipv6_opt_hdr *new);
int ipv6_opt_validate_tlvs(struct net *net, struct ipv6_opt_hdr *opt,
unsigned int optname, bool admin);
@@ -386,6 +388,17 @@ int ipv6_opt_validate_single_tlv(struct net *net, unsigned int optname,
bool deleting, bool admin);
int ipv6_opt_check_perm(struct net *net, struct sock *sk,
int optname, bool admin);
+
+int ipv6_opt_tlv_find(struct ipv6_opt_hdr *opt, unsigned char *targ_tlv,
+ unsigned int *start, unsigned int *end);
+struct ipv6_opt_hdr *ipv6_opt_tlv_insert(struct net *net,
+ struct ipv6_opt_hdr *opt,
+ int optname, unsigned char *tlv,
+ bool admin);
+struct ipv6_opt_hdr *ipv6_opt_tlv_delete(struct net *net,
+ struct ipv6_opt_hdr *opt,
+ unsigned char *tlv, bool admin);
+
struct tlv_tx_param {
unsigned char preferred_order;
unsigned char admin_perm : 2;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index a54cf96..f6edf31 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -288,6 +288,15 @@ struct in6_flowlabel_req {
#define IPV6_RECVFRAGSIZE 77
#define IPV6_FREEBIND 78
+/* API to set single Destination or Hop-by-Hop options */
+
+#define IPV6_HOPOPTS_TLV 79
+#define IPV6_RTHDRDSTOPTS_TLV 80
+#define IPV6_DSTOPTS_TLV 81
+#define IPV6_HOPOPTS_DEL_TLV 82
+#define IPV6_RTHDRDSTOPTS_DEL_TLV 83
+#define IPV6_DSTOPTS_DEL_TLV 84
+
/*
* Multicast Routing:
* see include/uapi/linux/mroute6.h.
diff --git a/net/ipv6/exthdrs_options.c b/net/ipv6/exthdrs_options.c
index f334af5..673cd31 100644
--- a/net/ipv6/exthdrs_options.c
+++ b/net/ipv6/exthdrs_options.c
@@ -162,6 +162,35 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
}
EXPORT_SYMBOL_GPL(ipv6_fixup_options);
+/**
+ * ipv6_opt_update - Replaces socket's options with a new set
+ * @sk: the socket
+ * @opt: TX options from socket
+ * @which: which set of options
+ * @new: new extension header for the options
+ *
+ * Description:
+ * Replaces @sk's options with @new for type @which. @new may be NULL to
+ * leave the socket with no options for the given type.
+ *
+ */
+int ipv6_opt_update(struct sock *sk, struct ipv6_txoptions *opt,
+ int which, struct ipv6_opt_hdr *new)
+{
+ opt = ipv6_renew_options(sk, opt, which, new);
+ if (IS_ERR(opt))
+ return PTR_ERR(opt);
+
+ opt = ipv6_update_options(sk, opt);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ipv6_opt_update);
+
/* TLV validation functions */
/* Validate a single non-padding TLV */
@@ -460,6 +489,493 @@ int ipv6_opt_check_perm(struct net *net, struct sock *sk, int optname,
}
EXPORT_SYMBOL(ipv6_opt_check_perm);
+/* Functions to manage individual TLVs */
+
+/**
+ * __ipv6_opt_tlv_find - Finds a particular TLV in an IPv6 options header
+ * (destinaton or hop-by-hop options). If TLV is not present, then the
+ * preferred insertion point is determined.
+ * @opt: the options header (an EH header followed by data)
+ * @targ_tlv: Prototype of TLV to find
+ * @start: on return holds the offset of any leading padding if option
+ * is present, or offset at which option is inserted.
+ * @end: on return holds the offset of the first non-pad TLV after option
+ * if the option was found, else points to the first TLV after
+ * padding at intsertion point.
+ *
+ * Description:
+ * Finds the space occupied by particular option (including any leading and
+ * trailing padding), or the perferred position for insertion if the
+ * TLV is not present.
+ *
+ * If the option is found then @start and @end are set to the offsets within
+ * @opt of the start of padding before the first found option and the end of
+ * padding after the first found option. In this case the function returns
+ * the offset in @opt of the found option (a value >= 2 since the TLV
+ * must be after the option header).
+ *
+ * In the absence of the searched option, @start is set to offset in @opt at
+ * which the option may be inserted per the ordering and alignment rules
+ * in the TLV parameter table, and @end is set to the end + 1 of any
+ * padding at the @start offset. When the option is not found -ENOENT is
+ * returned.
+ *
+ * rcu_read_lock assumed held.
+ */
+static int __ipv6_opt_tlv_find(struct ipv6_opt_hdr *opt,
+ unsigned char *targ_tlv,
+ unsigned int *start, unsigned int *end)
+{
+ unsigned int offset_s = 0, offset_e = 0, last_s = 0;
+ unsigned char *tlv = (unsigned char *)opt;
+ unsigned int pad_e = sizeof(*opt);
+ int ret_val = -ENOENT, tlv_len;
+ unsigned int opt_len, offset;
+ struct tlv_tx_param *tptx;
+ unsigned int targ_order;
+ bool found_cand = false;
+
+ opt_len = ipv6_optlen(opt);
+ offset = sizeof(*opt);
+
+ tptx = tlv_deref_tx_params(targ_tlv[0]);
+
+ targ_order = tptx->preferred_order;
+
+ while (offset < opt_len) {
+ switch (tlv[offset]) {
+ case IPV6_TLV_PAD1:
+ if (offset_e)
+ offset_e = offset;
+ tlv_len = 1;
+ break;
+ case IPV6_TLV_PADN:
+ if (offset_e)
+ offset_e = offset;
+ tlv_len = tlv[offset + 1] + 2;
+ break;
+ default:
+ if (ret_val >= 0)
+ goto out;
+
+ /* Not found yet */
+
+ if (tlv[offset] == targ_tlv[0]) {
+ /* Found it */
+
+ ret_val = offset;
+ offset_e = offset;
+ offset_s = last_s;
+ found_cand = true;
+ } else {
+ struct tlv_tx_param *tptx1;
+
+ tptx1 = tlv_deref_tx_params(tlv[offset]);
+
+ if (targ_order < tptx1->preferred_order &&
+ !found_cand) {
+ /* Found candidate for insert location
+ */
+
+ pad_e = offset;
+ offset_s = last_s;
+ found_cand = true;
+ }
+ }
+
+ last_s = offset;
+ tlv_len = tlv[offset + 1] + 2;
+ break;
+ }
+
+ offset += tlv_len;
+ }
+
+ if (!found_cand) {
+ /* Not found and insert point is after all options */
+ offset_s = last_s;
+ pad_e = opt_len;
+ }
+
+out:
+ if (offset_s)
+ *start = offset_s +
+ (tlv[offset_s] ? tlv[offset_s + 1] + 2 : 1);
+ else
+ *start = sizeof(*opt);
+
+ if (ret_val >= 0)
+ *end = offset_e +
+ (tlv[offset_e] ? tlv[offset_e + 1] + 2 : 1);
+ else
+ *end = pad_e;
+
+ return ret_val;
+}
+
+int ipv6_opt_tlv_find(struct ipv6_opt_hdr *opt, unsigned char *targ_tlv,
+ unsigned int *start, unsigned int *end)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __ipv6_opt_tlv_find(opt, targ_tlv, start, end);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_opt_tlv_find);
+
+/**
+ * ipv6_opt_tlv_pad_write - Writes pad bytes in TLV format
+ * @buf: the buffer
+ * @offset: offset from start of buffer to write padding
+ * @count: number of pad bytes to write
+ *
+ * Description:
+ * Write @count bytes of TLV padding into @buffer starting at offset @offset.
+ * @count should be less than 8 - see RFC 4942.
+ *
+ */
+static int ipv6_opt_tlv_pad_write(unsigned char *buf, unsigned int offset,
+ unsigned int count)
+{
+ if (WARN_ON_ONCE(count >= 8))
+ return -EINVAL;
+
+ switch (count) {
+ case 0:
+ break;
+ case 1:
+ buf[offset] = IPV6_TLV_PAD1;
+ break;
+ default:
+ buf[offset] = IPV6_TLV_PADN;
+ buf[offset + 1] = count - 2;
+ if (count > 2)
+ memset(buf + offset + 2, 0, count - 2);
+ break;
+ }
+ return 0;
+}
+
+static unsigned int compute_padding(unsigned int offset, unsigned int mult,
+ unsigned int moff)
+{
+ return (mult - ((offset - moff) % mult)) % mult;
+}
+
+static int tlv_find_next(unsigned char *tlv, unsigned int offset,
+ unsigned int optlen)
+{
+ while (offset < optlen) {
+ switch (tlv[offset]) {
+ case IPV6_TLV_PAD1:
+ offset++;
+ break;
+ case IPV6_TLV_PADN:
+ offset += tlv[offset + 1] + 2;
+ break;
+ default:
+ return offset;
+ }
+ }
+
+ return (optlen);
+}
+
+/* __tlv_sum_alignment assumes ruc_read_lock is held */
+static size_t __tlv_sum_alignment(unsigned char *tlv, unsigned int offset,
+ unsigned int optlen)
+{
+ int sum = 0;
+
+ offset = tlv_find_next(tlv, offset, optlen);
+
+ while (offset < optlen) {
+ struct tlv_tx_param *tptx;
+
+ tptx = tlv_deref_tx_params(tlv[offset]);
+ sum += tptx->align_mult;
+ offset += tlv[offset + 1] + 2;
+ offset = tlv_find_next(tlv, offset, optlen);
+ }
+
+ return sum;
+}
+
+/* __copy_and_align_tlvs assumes ruc_read_lock is held */
+static int __copy_and_align_tlvs(unsigned int src_off, unsigned char *src,
+ unsigned int dst_off, unsigned char *dst,
+ unsigned int optlen)
+{
+ unsigned int padding, len;
+ struct tlv_tx_param *tptx;
+
+ if (!src)
+ return dst_off;
+
+ src_off = tlv_find_next(src, src_off, optlen);
+
+ while (src_off < optlen) {
+ tptx = tlv_deref_tx_params(src[src_off]);
+
+ padding = compute_padding(dst_off, tptx->align_mult + 1,
+ tptx->align_off);
+ ipv6_opt_tlv_pad_write(dst, dst_off, padding);
+ dst_off += padding;
+
+ len = src[src_off + 1] + 2;
+ memcpy(&dst[dst_off], &src[src_off], len);
+
+ src_off += len;
+ dst_off += len;
+ src_off = tlv_find_next(src, src_off, optlen);
+ }
+
+ return dst_off;
+}
+
+static int count_tlvs(struct ipv6_opt_hdr *opt)
+{
+ unsigned char *tlv = (unsigned char *)opt;
+ unsigned int opt_len, tlv_len, offset, cnt = 0;
+
+ opt_len = ipv6_optlen(opt);
+ offset = sizeof(*opt);
+
+ while (offset < opt_len) {
+ switch (tlv[offset]) {
+ case IPV6_TLV_PAD1:
+ tlv_len = 1;
+ break;
+ case IPV6_TLV_PADN:
+ tlv_len = tlv[offset + 1] + 2;
+ break;
+ default:
+ cnt++;
+ tlv_len = tlv[offset + 1] + 2;
+ break;
+ }
+ offset += tlv_len;
+ }
+
+ return cnt;
+}
+
+#define IPV6_OPT_MAX_END_PAD 7
+
+/**
+ * ipv6_opt_tlv_insert - Inserts a TLV into an IPv6 destination options
+ * or Hop-by-Hop options extension header.
+ *
+ * @net: Current net
+ * @opt: the original options extensions header
+ * @optname: IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, or IPV6_DSTOPTS
+ * @tlv: the new TLV being inserted
+ * @admin: Set for privileged user
+ *
+ * Description:
+ * Creates a new options header based on @opt with the specified option
+ * in @tlv option added to it. If @opt already contains the same type
+ * of TLV, then the TLV is overwritten, otherwise the new TLV is appended
+ * after any existing TLVs. If @opt is NULL then the new header
+ * will contain just the new option and any needed padding.
+ *
+ * Assumes option has been validated.
+ */
+struct ipv6_opt_hdr *ipv6_opt_tlv_insert(struct net *net,
+ struct ipv6_opt_hdr *opt,
+ int optname, unsigned char *tlv,
+ bool admin)
+{
+ unsigned int start = 0, end = 0, buf_len, pad, optlen, max_align;
+ size_t tlv_len = tlv[1] + 2;
+ struct tlv_tx_param *tptx;
+ struct ipv6_opt_hdr *new;
+ int ret_val;
+ u8 perm;
+
+ rcu_read_lock();
+
+ if (opt) {
+ optlen = ipv6_optlen(opt);
+ ret_val = __ipv6_opt_tlv_find(opt, tlv, &start, &end);
+ if (ret_val < 0) {
+ if (ret_val != -ENOENT) {
+ rcu_read_unlock();
+ return ERR_PTR(ret_val);
+ }
+ } else if (((unsigned char *)opt)[ret_val + 1] == tlv[1]) {
+ unsigned int roff = ret_val + tlv[1] + 2;
+
+ /* Replace existing TLV with one of the same length,
+ * we can fast path this.
+ */
+
+ rcu_read_unlock();
+
+ new = kmalloc(optlen, GFP_ATOMIC);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy((unsigned char *)new,
+ (unsigned char *)opt, ret_val);
+ memcpy((unsigned char *)new + ret_val, tlv, tlv[1] + 2);
+ memcpy((unsigned char *)new + roff,
+ (unsigned char *)opt + roff, optlen - roff);
+
+ return new;
+ }
+ } else {
+ optlen = 0;
+ start = sizeof(*opt);
+ end = 0;
+ }
+
+ tptx = tlv_deref_tx_params(tlv[0]);
+
+ /* Maximum buffer size we'll need including possible padding */
+ max_align = __tlv_sum_alignment((unsigned char *)opt, end, optlen) +
+ tptx->align_mult + IPV6_OPT_MAX_END_PAD;
+
+ buf_len = optlen + start - end + tlv_len + max_align;
+ new = kmalloc(buf_len, GFP_ATOMIC);
+ if (!new) {
+ rcu_read_unlock();
+ return ERR_PTR(-ENOMEM);
+ }
+
+ buf_len = start;
+
+ if (start > sizeof(*opt))
+ memcpy(new, opt, start);
+
+ pad = compute_padding(start, tptx->align_mult + 1, tptx->align_off);
+ ipv6_opt_tlv_pad_write((__u8 *)new, start, pad);
+ buf_len += pad;
+
+ memcpy((__u8 *)new + buf_len, tlv, tlv_len);
+ buf_len += tlv_len;
+
+ buf_len = __copy_and_align_tlvs(end, (__u8 *)opt, buf_len,
+ (__u8 *)new, optlen);
+
+ perm = admin ? tptx->admin_perm : tptx->user_perm;
+
+ rcu_read_unlock();
+
+ /* Trailer pad to 8 byte alignment */
+ pad = (8 - (buf_len & 7)) & 7;
+ ipv6_opt_tlv_pad_write((__u8 *)new, buf_len, pad);
+ buf_len += pad;
+
+ /* Set header */
+ new->nexthdr = 0;
+ new->hdrlen = buf_len / 8 - 1;
+
+ if (perm != IPV6_TLV_PERM_NO_CHECK) {
+ switch (optname) {
+ case IPV6_HOPOPTS:
+ if (buf_len > net->ipv6.sysctl.max_hbh_opts_len)
+ return ERR_PTR(-EMSGSIZE);
+ if (count_tlvs(new) > net->ipv6.sysctl.max_hbh_opts_cnt)
+ return ERR_PTR(-E2BIG);
+ break;
+ case IPV6_RTHDRDSTOPTS:
+ case IPV6_DSTOPTS:
+ if (buf_len > net->ipv6.sysctl.max_dst_opts_len)
+ return ERR_PTR(-EMSGSIZE);
+ if (count_tlvs(new) > net->ipv6.sysctl.max_dst_opts_cnt)
+ return ERR_PTR(-E2BIG);
+ break;
+ }
+ }
+
+ return new;
+}
+EXPORT_SYMBOL(ipv6_opt_tlv_insert);
+
+/* rcu_read_lock assume held */
+struct ipv6_opt_hdr *__ipv6_opt_tlv_delete(struct ipv6_opt_hdr *opt,
+ unsigned int start,
+ unsigned int end)
+{
+ unsigned int pad, optlen, buf_len;
+ struct ipv6_opt_hdr *new;
+ size_t max_align;
+
+ optlen = ipv6_optlen(opt);
+ if (start == sizeof(*opt) && end == optlen) {
+ /* There's no other option in the header so return NULL */
+ return NULL;
+ }
+
+ max_align = __tlv_sum_alignment((unsigned char *)opt, end, optlen) +
+ IPV6_OPT_MAX_END_PAD;
+
+ new = kmalloc(optlen - (end - start) + max_align, GFP_ATOMIC);
+ if (!new)
+ return ERR_PTR(-ENOMEM); /* DIFF */
+
+ memcpy(new, opt, start);
+
+ buf_len = __copy_and_align_tlvs(end, (__u8 *)opt, start,
+ (__u8 *)new, optlen);
+
+ /* Now set trailer padding, buf_len is at the end of the last TLV at
+ * this point
+ */
+ pad = (8 - (buf_len & 7)) & 7;
+ ipv6_opt_tlv_pad_write((__u8 *)new, buf_len, pad);
+ buf_len += pad;
+
+ /* Set new header length */
+ new->hdrlen = buf_len / 8 - 1;
+
+ return new;
+}
+
+/**
+ * ipv6_opt_tlv_delete - Removes the specified option from the destination
+ * or Hop-by-Hop extension header.
+ * @net: Current net
+ * @opt: The original header
+ * @tlv: Prototype of TLV being removed
+ * @admin: Set for privileged user
+ *
+ * Description:
+ * Creates a new header based on @opt without the specified option in
+ * @tlv. A new options header is returned without the option. If @opt
+ * doesn't contain the specified option ERR_PTR(-ENOENT) is returned.
+ * If @opt contains no other non-padding options, NULL is returned.
+ * Otherwise, a new header is created and returned without the option
+ * (and removing as much padding as possible).
+ */
+struct ipv6_opt_hdr *ipv6_opt_tlv_delete(struct net *net,
+ struct ipv6_opt_hdr *opt,
+ unsigned char *tlv, bool admin)
+{
+ struct ipv6_opt_hdr *retopt;
+ unsigned int start, end;
+ int ret_val;
+
+ rcu_read_lock();
+
+ ret_val = __ipv6_opt_tlv_find(opt, tlv, &start, &end);
+ if (ret_val < 0) {
+ rcu_read_unlock();
+ return ERR_PTR(ret_val);
+ }
+
+ retopt = __ipv6_opt_tlv_delete(opt, start, end);
+
+ rcu_read_unlock();
+
+ return retopt;
+}
+EXPORT_SYMBOL(ipv6_opt_tlv_delete);
+
/* Destination options header */
#if IS_ENABLED(CONFIG_IPV6_MIP6)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 009c8a4..affa46c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -493,6 +493,86 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
}
+ case IPV6_HOPOPTS_TLV:
+ case IPV6_RTHDRDSTOPTS_TLV:
+ case IPV6_DSTOPTS_TLV:
+ case IPV6_HOPOPTS_DEL_TLV:
+ case IPV6_RTHDRDSTOPTS_DEL_TLV:
+ case IPV6_DSTOPTS_DEL_TLV:
+ {
+ struct ipv6_opt_hdr *old = NULL, *new = NULL;
+ struct ipv6_txoptions *opt;
+ bool deleting = false;
+ void *new_opt = NULL;
+ int which = -1;
+ bool admin;
+
+ new_opt = memdup_user(optval, optlen);
+ if (IS_ERR(new_opt)) {
+ retv = PTR_ERR(new_opt);
+ break;
+ }
+
+ opt = rcu_dereference_protected(np->opt,
+ lockdep_sock_is_held(sk));
+
+ switch (optname) {
+ case IPV6_HOPOPTS_DEL_TLV:
+ deleting = true;
+ /* Fallthrough */
+ case IPV6_HOPOPTS_TLV:
+ if (opt)
+ old = opt->hopopt;
+ which = IPV6_HOPOPTS;
+ break;
+ case IPV6_RTHDRDSTOPTS_DEL_TLV:
+ deleting = true;
+ /* Fallthrough */
+ case IPV6_RTHDRDSTOPTS_TLV:
+ if (opt)
+ old = opt->dst0opt;
+ which = IPV6_RTHDRDSTOPTS;
+ break;
+ case IPV6_DSTOPTS_DEL_TLV:
+ deleting = true;
+ /* Fallthrough */
+ case IPV6_DSTOPTS_TLV:
+ if (opt)
+ old = opt->dst1opt;
+ which = IPV6_DSTOPTS;
+ break;
+ }
+
+ admin = ns_capable(net->user_ns, CAP_NET_RAW);
+
+ retv = ipv6_opt_validate_single_tlv(net, which, new_opt, optlen,
+ deleting, admin);
+ if (retv < 0)
+ break;
+
+ if (deleting) {
+ if (!old)
+ break;
+ new = ipv6_opt_tlv_delete(net, old, new_opt, admin);
+ } else {
+ new = ipv6_opt_tlv_insert(net, old, which, new_opt,
+ admin);
+ }
+
+ kfree(new_opt);
+
+ if (IS_ERR(new)) {
+ retv = PTR_ERR(new);
+ break;
+ }
+
+ retv = ipv6_opt_update(sk, opt, which, new);
+
+ kfree(new);
+
+ break;
+ }
+
case IPV6_PKTINFO:
{
struct in6_pktinfo pkt;
--
2.7.4
Powered by blists - more mailing lists