[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a825aa13-6f82-e6c1-3c5c-7974b14f881e@blackwall.org>
Date: Thu, 25 Aug 2022 17:24:07 +0300
From: Nikolay Aleksandrov <razor@...ckwall.org>
To: Eyal Birger <eyal.birger@...il.com>, davem@...emloft.net,
edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
steffen.klassert@...unet.com, herbert@...dor.apana.org.au,
dsahern@...nel.org, contact@...elbtn.com, pablo@...filter.org,
nicolas.dichtel@...nd.com
Cc: netdev@...r.kernel.org, bpf@...r.kernel.org,
"daniel@...earbox.net" <daniel@...earbox.net>
Subject: Re: [PATCH ipsec-next,v2 2/3] xfrm: interface: support collect
metadata mode
On 25/08/2022 16:46, Eyal Birger wrote:
> This commit adds support for 'collect_md' mode on xfrm interfaces.
>
> Each net can have one collect_md device, created by providing the
> IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be
> altered and has no if_id or link device attributes.
>
> On transmit to this device, the if_id is fetched from the attached dst
> metadata on the skb. If exists, the link property is also fetched from
> the metadata. The dst metadata type used is METADATA_XFRM which holds
> these properties.
>
> On the receive side, xfrmi_rcv_cb() populates a dst metadata for each
> packet received and attaches it to the skb. The if_id used in this case is
> fetched from the xfrm state, and the link is fetched from the incoming
> device. This information can later be used by upper layers such as tc,
> ebpf, and ip rules.
>
> Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst
> metadata is postponed until after scrubing. Similarly, xfrm_input() is
> adapted to avoid dropping metadata dsts by only dropping 'valid'
> (skb_valid_dst(skb) == true) dsts.
>
> Policy matching on packets arriving from collect_md xfrmi devices is
> done by using the xfrm state existing in the skb's sec_path.
> The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session()
> is changed to keep the details of the if_id extraction tucked away
> in xfrm_interface.c.
>
> Signed-off-by: Eyal Birger <eyal.birger@...il.com>
>
> ----
>
> v2:
> - add "link" property as suggested by Nicolas Dichtel
> - rename xfrm_if_decode_session_params to xfrm_if_decode_session_result
> ---
(+CC Daniel)
Hi,
Generally I really like the idea, but I missed to comment the first round. :)
A few comments below..
> include/net/xfrm.h | 11 +++-
> include/uapi/linux/if_link.h | 1 +
> net/xfrm/xfrm_input.c | 7 +-
> net/xfrm/xfrm_interface.c | 120 +++++++++++++++++++++++++++++------
> net/xfrm/xfrm_policy.c | 10 +--
> 5 files changed, 121 insertions(+), 28 deletions(-)
>
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 6e8fa98f786f..28b988577ed2 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -312,9 +312,15 @@ struct km_event {
> struct net *net;
> };
>
> +struct xfrm_if_decode_session_result {
> + struct net *net;
> + u32 if_id;
> +};
> +
> struct xfrm_if_cb {
> - struct xfrm_if *(*decode_session)(struct sk_buff *skb,
> - unsigned short family);
> + bool (*decode_session)(struct sk_buff *skb,
> + unsigned short family,
> + struct xfrm_if_decode_session_result *res);
> };
>
> void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
> @@ -985,6 +991,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
> struct xfrm_if_parms {
> int link; /* ifindex of underlying L2 interface */
> u32 if_id; /* interface identifyer */
> + bool collect_md;
> };
>
> struct xfrm_if {
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index e36d9d2c65a7..d96f13a42589 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -694,6 +694,7 @@ enum {
> IFLA_XFRM_UNSPEC,
> IFLA_XFRM_LINK,
> IFLA_XFRM_IF_ID,
> + IFLA_XFRM_COLLECT_METADATA,
> __IFLA_XFRM_MAX
> };
>
> diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
> index 144238a50f3d..25e822fb5771 100644
> --- a/net/xfrm/xfrm_input.c
> +++ b/net/xfrm/xfrm_input.c
> @@ -20,6 +20,7 @@
> #include <net/xfrm.h>
> #include <net/ip_tunnels.h>
> #include <net/ip6_tunnel.h>
> +#include <net/dst_metadata.h>
>
> #include "xfrm_inout.h"
>
> @@ -720,7 +721,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
> sp = skb_sec_path(skb);
> if (sp)
> sp->olen = 0;
> - skb_dst_drop(skb);
> + if (skb_valid_dst(skb))
> + skb_dst_drop(skb);
> gro_cells_receive(&gro_cells, skb);
> return 0;
> } else {
> @@ -738,7 +740,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
> sp = skb_sec_path(skb);
> if (sp)
> sp->olen = 0;
> - skb_dst_drop(skb);
> + if (skb_valid_dst(skb))
> + skb_dst_drop(skb);
> gro_cells_receive(&gro_cells, skb);
> return err;
> }
> diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
> index 5113fa0fbcee..389d8be12801 100644
> --- a/net/xfrm/xfrm_interface.c
> +++ b/net/xfrm/xfrm_interface.c
> @@ -41,6 +41,7 @@
> #include <net/addrconf.h>
> #include <net/xfrm.h>
> #include <net/net_namespace.h>
> +#include <net/dst_metadata.h>
> #include <net/netns/generic.h>
> #include <linux/etherdevice.h>
>
> @@ -56,6 +57,7 @@ static const struct net_device_ops xfrmi_netdev_ops;
> struct xfrmi_net {
> /* lists for storing interfaces in use */
> struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
> + struct xfrm_if __rcu *collect_md_xfrmi;
> };
>
> #define for_each_xfrmi_rcu(start, xi) \
> @@ -77,17 +79,23 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
> return xi;
> }
>
> + xi = rcu_dereference(xfrmn->collect_md_xfrmi);
> + if (xi && xi->dev->flags & IFF_UP)
minor nit: xi && (xi->dev->flags & IFF_UP)
> + return xi;
> +
> return NULL;
> }
>
> -static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
> - unsigned short family)
> +static bool xfrmi_decode_session(struct sk_buff *skb,
> + unsigned short family,
> + struct xfrm_if_decode_session_result *res)
> {
> struct net_device *dev;
> + struct xfrm_if *xi;
> int ifindex = 0;
>
> if (!secpath_exists(skb) || !skb->dev)
> - return NULL;
> + return false;
>
> switch (family) {
> case AF_INET6:
> @@ -107,11 +115,18 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
> }
>
> if (!dev || !(dev->flags & IFF_UP))
> - return NULL;
> + return false;
> if (dev->netdev_ops != &xfrmi_netdev_ops)
> - return NULL;
> + return false;
>
> - return netdev_priv(dev);
> + xi = netdev_priv(dev);
> + res->net = xi->net;
> +
> + if (xi->p.collect_md)
> + res->if_id = xfrm_input_state(skb)->if_id;
> + else
> + res->if_id = xi->p.if_id;
> + return true;
> }
>
> static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
> @@ -157,7 +172,10 @@ static int xfrmi_create(struct net_device *dev)
> if (err < 0)
> goto out;
>
> - xfrmi_link(xfrmn, xi);
> + if (xi->p.collect_md)
> + rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi);
> + else
> + xfrmi_link(xfrmn, xi);
>
> return 0;
>
> @@ -185,7 +203,10 @@ static void xfrmi_dev_uninit(struct net_device *dev)
> struct xfrm_if *xi = netdev_priv(dev);
> struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
>
> - xfrmi_unlink(xfrmn, xi);
> + if (xi->p.collect_md)
> + rcu_assign_pointer(xfrmn->collect_md_xfrmi, NULL);
RCU_INIT_POINTER()
> + else
> + xfrmi_unlink(xfrmn, xi);
> }
>
> static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
> @@ -214,6 +235,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
> struct xfrm_state *x;
> struct xfrm_if *xi;
> bool xnet;
> + int link;
>
> if (err && !secpath_exists(skb))
> return 0;
> @@ -224,6 +246,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
> if (!xi)
> return 1;
>
> + link = skb->dev->ifindex;
> dev = xi->dev;
> skb->dev = dev;
>
> @@ -254,6 +277,17 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
> }
>
> xfrmi_scrub_packet(skb, xnet);
> + if (xi->p.collect_md) {
> + struct metadata_dst *md_dst;
> +
> + md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC);
> + if (!md_dst)
> + return -ENOMEM;
> +
> + md_dst->u.xfrm_info.if_id = x->if_id;
> + md_dst->u.xfrm_info.link = link;
> + skb_dst_set(skb, (struct dst_entry *)md_dst);
> + }
> dev_sw_netstats_rx_add(dev, skb->len);
>
> return 0;
> @@ -269,10 +303,24 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
> struct net_device *tdev;
> struct xfrm_state *x;
> int err = -1;
> + u32 if_id;
> int mtu;
>
> + if (xi->p.collect_md) {
> + struct xfrm_md_info *md_info = skb_xfrm_md_info(skb);
> +
> + if (unlikely(!md_info))
> + return -EINVAL;
> +
> + if_id = md_info->if_id;
> + if (md_info->link)
> + fl->flowi_oif = md_info->link;
> + } else {
> + if_id = xi->p.if_id;
> + }
> +
> dst_hold(dst);
> - dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
> + dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id);
> if (IS_ERR(dst)) {
> err = PTR_ERR(dst);
> dst = NULL;
> @@ -283,7 +331,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
> if (!x)
> goto tx_err_link_failure;
>
> - if (x->if_id != xi->p.if_id)
> + if (x->if_id != if_id)
> goto tx_err_link_failure;
>
> tdev = dst->dev;
> @@ -633,6 +681,9 @@ static void xfrmi_netlink_parms(struct nlattr *data[],
>
> if (data[IFLA_XFRM_IF_ID])
> parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
> +
> + if (data[IFLA_XFRM_COLLECT_METADATA])
> + parms->collect_md = true;
> }
>
> static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
> @@ -645,14 +696,27 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
> int err;
>
> xfrmi_netlink_parms(data, &p);
> - if (!p.if_id) {
> - NL_SET_ERR_MSG(extack, "if_id must be non zero");
> - return -EINVAL;
> - }
> + if (p.collect_md) {
> + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
>
> - xi = xfrmi_locate(net, &p);
> - if (xi)
> - return -EEXIST;
> + if (p.link || p.if_id) {
> + NL_SET_ERR_MSG(extack, "link and if_id must be zero");
> + return -EINVAL;
> + }
> +
> + if (rtnl_dereference(xfrmn->collect_md_xfrmi))
> + return -EEXIST;
> +
> + } else {
> + if (!p.if_id) {
> + NL_SET_ERR_MSG(extack, "if_id must be non zero");
> + return -EINVAL;
> + }
> +
> + xi = xfrmi_locate(net, &p);
> + if (xi)
> + return -EEXIST;
> + }
>
> xi = netdev_priv(dev);
> xi->p = p;
> @@ -682,12 +746,19 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
> return -EINVAL;
> }
>
> + if (p.collect_md) {
> + NL_SET_ERR_MSG(extack, "collect_md can't be changed");
> + return -EINVAL;
> + }
> +
> xi = xfrmi_locate(net, &p);
> if (!xi) {
> xi = netdev_priv(dev);
> } else {
> if (xi->dev != dev)
> return -EEXIST;
> + if (xi->p.collect_md)
> + return -EINVAL;
> }
>
> return xfrmi_update(xi, &p);
> @@ -700,6 +771,8 @@ static size_t xfrmi_get_size(const struct net_device *dev)
> nla_total_size(4) +
> /* IFLA_XFRM_IF_ID */
> nla_total_size(4) +
> + /* IFLA_XFRM_COLLECT_METADATA */
> + nla_total_size(0) +
> 0;
> }
>
> @@ -711,6 +784,10 @@ static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
> if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
> nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
> goto nla_put_failure;
> + if (xi->p.collect_md) {
> + if (nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA))
minor: these can be combined
> + goto nla_put_failure;
> + }
> return 0;
>
> nla_put_failure:
> @@ -725,8 +802,10 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev)
> }
>
> static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
> - [IFLA_XFRM_LINK] = { .type = NLA_U32 },
> - [IFLA_XFRM_IF_ID] = { .type = NLA_U32 },
> + [IFLA_XFRM_UNSPEC] = { .strict_start_type = IFLA_XFRM_COLLECT_METADATA },
> + [IFLA_XFRM_LINK] = { .type = NLA_U32 },
link is signed, so s32
> + [IFLA_XFRM_IF_ID] = { .type = NLA_U32 },
> + [IFLA_XFRM_COLLECT_METADATA] = { .type = NLA_FLAG },
> };
>
> static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
> @@ -762,6 +841,9 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
> xip = &xi->next)
> unregister_netdevice_queue(xi->dev, &list);
> }
> + xi = rcu_dereference(xfrmn->collect_md_xfrmi);
rtnl_dereference()
> + if (xi)
> + unregister_netdevice_queue(xi->dev, &list);
> }
> unregister_netdevice_many(&list);
> rtnl_unlock();
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index f1a0bab920a5..70376f3fe84a 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -3516,17 +3516,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
> int xerr_idx = -1;
> const struct xfrm_if_cb *ifcb;
> struct sec_path *sp;
> - struct xfrm_if *xi;
> u32 if_id = 0;
>
> rcu_read_lock();
> ifcb = xfrm_if_get_cb();
>
> if (ifcb) {
> - xi = ifcb->decode_session(skb, family);
> - if (xi) {
> - if_id = xi->p.if_id;
> - net = xi->net;
> + struct xfrm_if_decode_session_result r;
> +
> + if (ifcb->decode_session(skb, family, &r)) {
> + if_id = r.if_id;
> + net = r.net;
> }
> }
> rcu_read_unlock();
Powered by blists - more mailing lists