netdev - Re: [RFC net-next 0/3] IP imposition of per-nh MPLS encap

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <556DAF9A.9050505@brocade.com>
Date:	Tue, 2 Jun 2015 14:28:58 +0100
From:	Robert Shearman <rshearma@...cade.com>
To:	Thomas Graf <tgraf@...g.ch>
CC:	<netdev@...r.kernel.org>,
	"Eric W. Biederman" <ebiederm@...ssion.com>,
	roopa <roopa@...ulusnetworks.com>
Subject: Re: [RFC net-next 0/3] IP imposition of per-nh MPLS encap

On 02/06/15 01:06, Thomas Graf wrote:
> On 06/01/15 at 05:46pm, Robert Shearman wrote:
>> In order to be able to function as a Label Edge Router in an MPLS
>> network, it is necessary to be able to take IP packets and impose an
>> MPLS encap and forward them out. The traditional approach of setting
>> up an interface for each "tunnel" endpoint doesn't scale for the
>> common MPLS use-cases where each IP route tends to be assigned a
>> different label as encap.
>>
>> The solution suggested here for further discussion is to provide the
>> facility to define encap data on a per-nexthop basis using a new
>> netlink attribue, RTA_ENCAP, which would be opaque to the IPv4/IPv6
>> forwarding code, but interpreted by the virtual interface assigned to
>> the nexthop.
>
> RTA_ENCAP is currently a binary blob specific to each encapsulation
> type interface. I guess this should be converted to a set of nested
> Netlink attributes for each type of encap to make it extendible in
> the future.

Nesting attributes inside the RTA_ENCAP blob should be supported by the 
patch series today. Something like this:

+enum rta_tunnel_t {
+	RTA_TUN_UNSPEC,
+	RTA_TUN_ID,
+	RTA_TUN_DST,
+	RTA_TUN_SRC,
+	RTA_TUN_TTL,
+	RTA_TUN_TOS,
+	RTA_TUN_SPORT,
+	RTA_TUN_DPORT,
+	RTA_TUN_FLAGS,
+	RTA_TUN_MAX,
+};
+
+static const struct nla_policy tunnel_policy[RTA_TUN_MAX + 1] = {
+	[RTA_TUN_ID]		= { .type = NLA_U64 },
+	[RTA_TUN_DST]		= { .type = NLA_U32 },
+	[RTA_TUN_SRC]		= { .type = NLA_U32 },
+	[RTA_TUN_TTL]		= { .type = NLA_U8 },
+	[RTA_TUN_TOS]		= { .type = NLA_U8 },
+	[RTA_TUN_SPORT]		= { .type = NLA_U16 },
+	[RTA_TUN_DPORT]		= { .type = NLA_U16 },
+	[RTA_TUN_FLAGS]		= { .type = NLA_U16 },
+};
+
+static int vxlan_parse_encap(const struct net_device *dev,
+			     const struct nlattr *nla,
+			     void *encap)
+{
+	if (encap) {
+		struct ip_tunnel_info *tun_info = encap;
+		struct nlattr *tb[RTA_TUN_MAX+1];
+		int err;
+
+		err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+		if (err < 0)
+			return err;
+
+		if (tb[RTA_TUN_ID])
+			tun_info->key.tun_id = nla_get_u64(tb[RTA_TUN_ID]);
+
+		if (tb[RTA_TUN_DST])
+			tun_info->key.ipv4_dst = nla_get_be32(tb[RTA_TUN_DST]);
+
+		if (tb[RTA_TUN_SRC])
+			tun_info->key.ipv4_src = nla_get_be32(tb[RTA_TUN_SRC]);
+
+		if (tb[RTA_TUN_TTL])
+			tun_info->key.ipv4_ttl = nla_get_u8(tb[RTA_TUN_TTL]);
+
+		if (tb[RTA_TUN_TOS])
+			tun_info->key.ipv4_tos = nla_get_u8(tb[RTA_TUN_TOS]);
+
+		if (tb[RTA_TUN_SPORT])
+			tun_info->key.tp_src = nla_get_be16(tb[RTA_TUN_SPORT]);
+
+		if (tb[RTA_TUN_DPORT])
+			tun_info->key.tp_dst = nla_get_be16(tb[RTA_TUN_DPORT]);
+
+		if (tb[RTA_TUN_FLAGS])
+			tun_info->key.tun_flags = nla_get_u16(tb[RTA_TUN_FLAGS]);
+
+		tun_info->options = NULL;
+		tun_info->options_len = 0;
+	}
+
+	return sizeof(struct ip_tunnel_info);
+}
+
+static int vxlan_fill_encap(const struct net_device *dev,
+			    struct sk_buff *skb, int encap_len,
+			    const void *encap)
+{
+	const struct ip_tunnel_info *tun_info = encap;
+	struct nlattr *encap_attr;
+
+	encap_attr = nla_nest_start(skb, RTA_ENCAP);
+	if (!encap_attr)
+		return -ENOMEM;
+
+	if (nla_put_u64(skb, RTA_TUN_ID, tun_info->key.tun_id) ||
+	    nla_put_be32(skb, RTA_TUN_DST, tun_info->key.ipv4_dst) ||
+	    nla_put_be32(skb, RTA_TUN_SRC, tun_info->key.ipv4_src) ||
+	    nla_put_u8(skb, RTA_TUN_TOS, tun_info->key.ipv4_tos) ||
+	    nla_put_u8(skb, RTA_TUN_TTL, tun_info->key.ipv4_ttl) ||
+	    nla_put_u16(skb, RTA_TUN_SPORT, tun_info->key.tp_src) ||
+	    nla_put_u16(skb, RTA_TUN_DPORT, tun_info->key.tp_dst) ||
+	    nla_put_u16(skb, RTA_TUN_FLAGS, tun_info->key.tun_flags))
+		return -ENOMEM;
+
+	nla_nest_end(skb, encap_attr);
+
+	return 0;
+}
+
+static int vxlan_match_encap(const struct net_device *dev,
+			     const struct nlattr *nla, int encap_len,
+			     const void *encap)
+{
+	const struct ip_tunnel_info *tun_info = encap;
+	struct nlattr *tb[RTA_TUN_MAX+1];
+	int err;
+
+	err = nla_parse_nested(tb, RTA_TUN_MAX, nla, tunnel_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[RTA_TUN_ID] &&
+	    tun_info->key.tun_id != nla_get_u64(tb[RTA_TUN_ID]))
+		return 1;
+
+	if (tb[RTA_TUN_DST] &&
+	    tun_info->key.ipv4_dst != nla_get_be32(tb[RTA_TUN_DST]))
+		return 1;
+
+	if (tb[RTA_TUN_SRC] &&
+	    tun_info->key.ipv4_src != nla_get_be32(tb[RTA_TUN_SRC]))
+		return 1;
+
+	if (tb[RTA_TUN_TTL] &&
+	    tun_info->key.ipv4_ttl != nla_get_u8(tb[RTA_TUN_TTL]))
+		return 1;
+
+	if (tb[RTA_TUN_TOS] &&
+	    tun_info->key.ipv4_tos != nla_get_u8(tb[RTA_TUN_TOS]))
+		return 1;
+
+	if (tb[RTA_TUN_SPORT] &&
+	    tun_info->key.tp_src != nla_get_be16(tb[RTA_TUN_SPORT]))
+		return 1;
+
+	if (tb[RTA_TUN_DPORT] &&
+	    tun_info->key.tp_dst != nla_get_be16(tb[RTA_TUN_DPORT]))
+		return 1;
+
+	if (tb[RTA_TUN_FLAGS] &&
+	    tun_info->key.tun_flags != nla_get_u16(tb[RTA_TUN_FLAGS]))
+		return 1;
+
+	return 0;
+}
+
  static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
  	.kind		= "vxlan",
  	.maxtype	= IFLA_VXLAN_MAX,
@@ -2893,6 +3093,9 @@ static struct rtnl_link_ops vxlan_link_ops 
__read_mostly = {
  	.get_size	= vxlan_get_size,
  	.fill_info	= vxlan_fill_info,
  	.get_link_net	= vxlan_get_link_net,
+	.parse_encap	= vxlan_parse_encap,
+	.fill_encap	= vxlan_fill_encap,
+	.match_encap	= vxlan_match_encap,
  };


> What is your plan regarding the receive side and on the matching of
> encap fields? Storing the receive parameters is what lead me to
> storing it in skb_shared_info.

No plan for the receive side and it wouldn't easily fit in with my 
approach, so you'll need to implement that separately.

Thanks,
Rob
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html