[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+mtBx_gswubpaBF1bX-dRN580Xv2F5aJAJaKRp56pFeWzRWEA@mail.gmail.com>
Date: Thu, 2 Oct 2014 16:04:57 -0700
From: Tom Herbert <therbert@...gle.com>
To: Andy Zhou <azhou@...ira.com>
Cc: David Miller <davem@...emloft.net>,
Linux Netdev List <netdev@...r.kernel.org>,
Jesse Gross <jesse@...ira.com>
Subject: Re: [net-next 6/6] openvswitch: Add support for Geneve tunneling.
On Thu, Oct 2, 2014 at 1:04 AM, Andy Zhou <azhou@...ira.com> wrote:
> From: Jesse Gross <jesse@...ira.com>
>
> The Openvswitch implementation is completely agnostic to the options
> that are in use and can handle newly defined options without
> further work. It does this by simply matching on a byte array
> of options and allowing userspace to setup flows on this array.
>
> Signed-off-by: Jesse Gross <jesse@...ira.com>
> Signed-off-by: Andy Zhou <azhou@...ira.com>
> ---
> include/net/ip_tunnels.h | 21 ++--
> include/uapi/linux/openvswitch.h | 2 +
> net/openvswitch/Kconfig | 11 ++
> net/openvswitch/Makefile | 4 +
> net/openvswitch/datapath.c | 5 +-
> net/openvswitch/flow.c | 20 +++-
> net/openvswitch/flow.h | 20 +++-
> net/openvswitch/flow_netlink.c | 176 +++++++++++++++++++++++-----
> net/openvswitch/vport-geneve.c | 236 ++++++++++++++++++++++++++++++++++++++
> net/openvswitch/vport-gre.c | 2 +-
> net/openvswitch/vport-vxlan.c | 2 +-
> net/openvswitch/vport.c | 3 +
> net/openvswitch/vport.h | 1 +
> 13 files changed, 461 insertions(+), 42 deletions(-)
> create mode 100644 net/openvswitch/vport-geneve.c
>
> diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
> index a9ce155..5bc6ede 100644
> --- a/include/net/ip_tunnels.h
> +++ b/include/net/ip_tunnels.h
> @@ -86,17 +86,18 @@ struct ip_tunnel {
> struct gro_cells gro_cells;
> };
>
> -#define TUNNEL_CSUM __cpu_to_be16(0x01)
> -#define TUNNEL_ROUTING __cpu_to_be16(0x02)
> -#define TUNNEL_KEY __cpu_to_be16(0x04)
> -#define TUNNEL_SEQ __cpu_to_be16(0x08)
> -#define TUNNEL_STRICT __cpu_to_be16(0x10)
> -#define TUNNEL_REC __cpu_to_be16(0x20)
> -#define TUNNEL_VERSION __cpu_to_be16(0x40)
> -#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
> +#define TUNNEL_CSUM __cpu_to_be16(0x01)
> +#define TUNNEL_ROUTING __cpu_to_be16(0x02)
> +#define TUNNEL_KEY __cpu_to_be16(0x04)
> +#define TUNNEL_SEQ __cpu_to_be16(0x08)
> +#define TUNNEL_STRICT __cpu_to_be16(0x10)
> +#define TUNNEL_REC __cpu_to_be16(0x20)
Just changing whitespace in these?
> +#define TUNNEL_VERSION __cpu_to_be16(0x40)
> +#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
> #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
> -#define TUNNEL_OAM __cpu_to_be16(0x0200)
> -#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
> +#define TUNNEL_OAM __cpu_to_be16(0x0200)
> +#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
> +#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800)
>
> struct tnl_ptk_info {
> __be16 flags;
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index 6753032..435eabc 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -192,6 +192,7 @@ enum ovs_vport_type {
> OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
> OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
> OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */
> + OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */
> __OVS_VPORT_TYPE_MAX
> };
>
> @@ -310,6 +311,7 @@ enum ovs_tunnel_key_attr {
> OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
> OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
> OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */
> + OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
> __OVS_TUNNEL_KEY_ATTR_MAX
> };
>
> diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
> index 6ecf491..ba3bb82 100644
> --- a/net/openvswitch/Kconfig
> +++ b/net/openvswitch/Kconfig
> @@ -54,3 +54,14 @@ config OPENVSWITCH_VXLAN
> Say N to exclude this support and reduce the binary size.
>
> If unsure, say Y.
> +
> +config OPENVSWITCH_GENEVE
> + bool "Open vSwitch Geneve tunneling support"
> + depends on INET
> + depends on OPENVSWITCH
> + depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m)
> + default y
> + ---help---
> + If you say Y here, then the Open vSwitch will be able create geneve vport.
> +
> + Say N to exclude this support and reduce the binary size.
> diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
> index 3591cb5..9a33a27 100644
> --- a/net/openvswitch/Makefile
> +++ b/net/openvswitch/Makefile
> @@ -15,6 +15,10 @@ openvswitch-y := \
> vport-internal_dev.o \
> vport-netdev.o
>
> +ifneq ($(CONFIG_OPENVSWITCH_GENEVE),)
> +openvswitch-y += vport-geneve.o
> +endif
> +
> ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
> openvswitch-y += vport-vxlan.o
> endif
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 010125c..2e31d9e 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -370,6 +370,7 @@ static size_t key_attr_size(void)
> + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
> + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
> + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
> + + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
> + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
> + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
> + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
> @@ -556,10 +557,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
>
> err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
> &flow->key, 0, &acts);
> - rcu_assign_pointer(flow->sf_acts, acts);
> if (err)
> goto err_flow_free;
>
> + rcu_assign_pointer(flow->sf_acts, acts);
> +
> + OVS_CB(packet)->egress_tun_info = NULL;
> OVS_CB(packet)->flow = flow;
> packet->priority = flow->key.phy.priority;
> packet->mark = flow->key.phy.skb_mark;
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index 2924cb3..62db02b 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -448,6 +448,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> int error;
> struct ethhdr *eth;
>
> + /* Flags are always used as part of stats */
> + key->tp.flags = 0;
> +
> skb_reset_mac_header(skb);
>
> /* Link layer. We are guaranteed to have at least the 14 byte Ethernet
> @@ -646,10 +649,23 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
> struct sk_buff *skb, struct sw_flow_key *key)
> {
> /* Extract metadata from packet. */
> - if (tun_info)
> + if (tun_info) {
> memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
> - else
> +
> + if (tun_info->options) {
> + BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
> + 8)) - 1
> + > sizeof(key->tun_opts));
> + memcpy(GENEVE_OPTS(key, tun_info->options_len),
> + tun_info->options, tun_info->options_len);
> + key->tun_opts_len = tun_info->options_len;
> + } else {
> + key->tun_opts_len = 0;
> + }
> + } else {
> + key->tun_opts_len = 0;
> memset(&key->tun_key, 0, sizeof(key->tun_key));
> + }
>
> key->phy.priority = skb->priority;
> key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
> diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> index fe5a71b..7181331 100644
> --- a/net/openvswitch/flow.h
> +++ b/net/openvswitch/flow.h
> @@ -51,11 +51,24 @@ struct ovs_key_ipv4_tunnel {
>
> struct ovs_tunnel_info {
> struct ovs_key_ipv4_tunnel tunnel;
> + struct geneve_opt *options;
> + u8 options_len;
> };
>
> +/* Store options at the end of the array if they are less than the
> + * maximum size. This allows us to get the benefits of variable length
> + * matching for small options.
> + */
> +#define GENEVE_OPTS(flow_key, opt_len) \
> + ((struct geneve_opt *)((flow_key)->tun_opts + \
> + FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
> + opt_len))
> +
> static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
> const struct iphdr *iph,
> - __be64 tun_id, __be16 tun_flags)
> + __be64 tun_id, __be16 tun_flags,
> + struct geneve_opt *opts,
> + u8 opts_len)
> {
> tun_info->tunnel.tun_id = tun_id;
> tun_info->tunnel.ipv4_src = iph->saddr;
> @@ -67,9 +80,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
> /* clear struct padding. */
> memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
> sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
> +
> + tun_info->options = opts;
> + tun_info->options_len = opts_len;
> }
>
> struct sw_flow_key {
> + u8 tun_opts[255];
> + u8 tun_opts_len;
> struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
> struct {
> u32 priority; /* Packet QoS priority. */
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 5d6194d..368f233 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -42,6 +42,7 @@
> #include <linux/icmp.h>
> #include <linux/icmpv6.h>
> #include <linux/rculist.h>
> +#include <net/geneve.h>
> #include <net/ip.h>
> #include <net/ipv6.h>
> #include <net/ndisc.h>
> @@ -88,18 +89,20 @@ static void update_range__(struct sw_flow_match *match,
> } \
> } while (0)
>
> -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
> - do { \
> - update_range__(match, offsetof(struct sw_flow_key, field), \
> - len, is_mask); \
> - if (is_mask) { \
> - if ((match)->mask) \
> - memcpy(&(match)->mask->key.field, value_p, len);\
> - } else { \
> - memcpy(&(match)->key->field, value_p, len); \
> - } \
> +#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
> + do { \
> + update_range__(match, offset, len, is_mask); \
> + if (is_mask) \
> + memcpy((u8 *)&(match)->mask->key + offset, value_p, \
> + len); \
> + else \
> + memcpy((u8 *)(match)->key + offset, value_p, len); \
> } while (0)
>
> +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
> + SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
> + value_p, len, is_mask)
> +
> static u16 range_n_bytes(const struct sw_flow_key_range *range)
> {
> return range->end - range->start;
> @@ -335,6 +338,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
> int rem;
> bool ttl = false;
> __be16 tun_flags = 0;
> + unsigned long opt_key_offset;
>
> nla_for_each_nested(a, attr, rem) {
> int type = nla_type(a);
> @@ -347,6 +351,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
> [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
> [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
> [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
> + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
> };
>
> if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
> @@ -355,7 +360,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
> return -EINVAL;
> }
>
> - if (ovs_tunnel_key_lens[type] != nla_len(a)) {
> + if (ovs_tunnel_key_lens[type] != nla_len(a) &&
> + ovs_tunnel_key_lens[type] != -1) {
> OVS_NLERR("IPv4 tunnel attribute type has unexpected "
> " length (type=%d, length=%d, expected=%d).\n",
> type, nla_len(a), ovs_tunnel_key_lens[type]);
> @@ -394,7 +400,60 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
> case OVS_TUNNEL_KEY_ATTR_OAM:
> tun_flags |= TUNNEL_OAM;
> break;
> + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
> + tun_flags |= TUNNEL_OPTIONS_PRESENT;
> + if (nla_len(a) > sizeof(match->key->tun_opts)) {
> + OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n",
> + nla_len(a),
> + sizeof(match->key->tun_opts));
> + return -EINVAL;
> + }
> +
> + if (nla_len(a) % 4 != 0) {
> + OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
> + nla_len(a));
> + return -EINVAL;
> + }
> +
> + /* We need to record the length of the options passed
> + * down, otherwise packets with the same format but
> + * additional options will be silently matched.
> + */
> + if (!is_mask) {
> + SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
> + false);
> + } else {
> + /* This is somewhat unusual because it looks at
> + * both the key and mask while parsing the
> + * attributes (and by extension assumes the key
> + * is parsed first). Normally, we would verify
> + * that each is the correct length and that the
> + * attributes line up in the validate function.
> + * However, that is difficult because this is
> + * variable length and we won't have the
> + * information later.
> + */
> + if (match->key->tun_opts_len != nla_len(a)) {
> + OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
> + match->key->tun_opts_len,
> + nla_len(a));
> + return -EINVAL;
> + }
> +
> + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
> + true);
> + }
> +
> + opt_key_offset = (unsigned long)GENEVE_OPTS(
> + (struct sw_flow_key *)0,
> + nla_len(a));
> + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
> + nla_data(a), nla_len(a),
> + is_mask);
> + break;
> default:
> + OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n",
> + type);
> return -EINVAL;
> }
> }
> @@ -421,16 +480,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
> return 0;
> }
>
> -static int ipv4_tun_to_nlattr(struct sk_buff *skb,
> - const struct ovs_key_ipv4_tunnel *tun_key,
> - const struct ovs_key_ipv4_tunnel *output)
> +static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
> + const struct ovs_key_ipv4_tunnel *output,
> + const struct geneve_opt *tun_opts,
> + int swkey_tun_opts_len)
> {
> - struct nlattr *nla;
> -
> - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
> - if (!nla)
> - return -EMSGSIZE;
> -
> if (output->tun_flags & TUNNEL_KEY &&
> nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
> return -EMSGSIZE;
> @@ -454,12 +508,35 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
> if ((output->tun_flags & TUNNEL_OAM) &&
> nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
> return -EMSGSIZE;
> + if (tun_opts &&
> + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
> + swkey_tun_opts_len, tun_opts))
> + return -EMSGSIZE;
>
> - nla_nest_end(skb, nla);
> return 0;
> }
>
>
> +static int ipv4_tun_to_nlattr(struct sk_buff *skb,
> + const struct ovs_key_ipv4_tunnel *output,
> + const struct geneve_opt *tun_opts,
> + int swkey_tun_opts_len)
> +{
> + struct nlattr *nla;
> + int err;
> +
> + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
> + if (!nla)
> + return -EMSGSIZE;
> +
> + err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
> + if (err)
> + return err;
> +
> + nla_nest_end(skb, nla);
> + return 0;
> +}
> +
> static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
> const struct nlattr **a, bool is_mask)
> {
> @@ -905,9 +982,16 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
> if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
> goto nla_put_failure;
>
> - if ((swkey->tun_key.ipv4_dst || is_mask) &&
> - ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
> - goto nla_put_failure;
> + if ((swkey->tun_key.ipv4_dst || is_mask)) {
> + const struct geneve_opt *opts = NULL;
> +
> + if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
> + opts = GENEVE_OPTS(output, swkey->tun_opts_len);
> +
> + if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
> + swkey->tun_opts_len))
> + goto nla_put_failure;
> + }
>
> if (swkey->phy.in_port == DP_MAX_PORTS) {
> if (is_mask && (output->phy.in_port == 0xffff))
> @@ -1290,17 +1374,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
> if (err)
> return err;
>
> + if (key.tun_opts_len) {
> + struct geneve_opt *option = GENEVE_OPTS(&key,
> + key.tun_opts_len);
> + int opts_len = key.tun_opts_len;
> + bool crit_opt = false;
> +
> + while (opts_len > 0) {
> + int len;
> +
> + if (opts_len < sizeof(*option))
> + return -EINVAL;
> +
> + len = sizeof(*option) + option->length * 4;
> + if (len > opts_len)
> + return -EINVAL;
> +
> + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
> +
> + option = (struct geneve_opt *)((u8 *)option + len);
> + opts_len -= len;
> + };
> +
> + key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
> + };
> +
> start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
> if (start < 0)
> return start;
>
> a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
> - sizeof(*tun_info));
> + sizeof(*tun_info) + key.tun_opts_len);
> if (IS_ERR(a))
> return PTR_ERR(a);
>
> tun_info = nla_data(a);
> tun_info->tunnel = key.tun_key;
> + tun_info->options_len = key.tun_opts_len;
> +
> + if (tun_info->options_len) {
> + /* We need to store the options in the action itself since
> + * everything else will go away after flow setup. We can append
> + * it to tun_info and then point there.
> + */
> + memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
> + key.tun_opts_len);
> + tun_info->options = (struct geneve_opt *)(tun_info + 1);
> + } else {
> + tun_info->options = NULL;
> + }
>
> add_nested_action_end(*sfa, start);
>
> @@ -1592,7 +1714,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
> return -EMSGSIZE;
>
> err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
> - nla_data(ovs_key));
> + tun_info->options_len ?
> + tun_info->options : NULL,
> + tun_info->options_len);
> if (err)
> return err;
> nla_nest_end(skb, start);
> diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
> new file mode 100644
> index 0000000..5572d48
> --- /dev/null
> +++ b/net/openvswitch/vport-geneve.c
> @@ -0,0 +1,236 @@
> +/*
> + * Copyright (c) 2014 Nicira, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/version.h>
> +
> +#include <linux/in.h>
> +#include <linux/ip.h>
> +#include <linux/net.h>
> +#include <linux/rculist.h>
> +#include <linux/udp.h>
> +#include <linux/if_vlan.h>
> +
> +#include <net/geneve.h>
> +#include <net/icmp.h>
> +#include <net/ip.h>
> +#include <net/route.h>
> +#include <net/udp.h>
> +#include <net/xfrm.h>
> +
> +#include "datapath.h"
> +#include "vport.h"
> +
> +/**
> + * struct geneve_port - Keeps track of open UDP ports
> + * @sock: The socket created for this port number.
> + * @name: vport name.
> + */
> +struct geneve_port {
> + struct geneve_sock *gs;
> + char name[IFNAMSIZ];
> +};
> +
> +static LIST_HEAD(geneve_ports);
> +
> +static inline struct geneve_port *geneve_vport(const struct vport *vport)
> +{
> + return vport_priv(vport);
> +}
> +
> +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
> +{
> + return (struct genevehdr *)(udp_hdr(skb) + 1);
> +}
> +
> +/* Convert 64 bit tunnel ID to 24 bit VNI. */
> +static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
> +{
> +#ifdef __BIG_ENDIAN
> + vni[0] = (__force __u8)(tun_id >> 16);
> + vni[1] = (__force __u8)(tun_id >> 8);
> + vni[2] = (__force __u8)tun_id;
> +#else
> + vni[0] = (__force __u8)((__force u64)tun_id >> 40);
> + vni[1] = (__force __u8)((__force u64)tun_id >> 48);
> + vni[2] = (__force __u8)((__force u64)tun_id >> 56);
> +#endif
> +}
> +
> +/* Convert 24 bit VNI to 64 bit tunnel ID. */
> +static __be64 vni_to_tunnel_id(__u8 *vni)
> +{
> +#ifdef __BIG_ENDIAN
> + return (vni[0] << 16) | (vni[1] << 8) | vni[2];
> +#else
> + return (__force __be64)(((__force u64)vni[0] << 40) |
> + ((__force u64)vni[1] << 48) |
> + ((__force u64)vni[2] << 56));
> +#endif
> +}
> +
> +static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
> +{
> + struct vport *vport = gs->rcv_data;
> + struct genevehdr *geneveh = geneve_hdr(skb);
> + int opts_len;
> + struct ovs_tunnel_info tun_info;
> + __be64 key;
> + __be16 flags;
> +
> + opts_len = geneveh->opt_len * 4;
> +
> + flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
> + (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
> + (geneveh->oam ? TUNNEL_OAM : 0) |
> + (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
> +
> + key = vni_to_tunnel_id(geneveh->vni);
> +
> + ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
> + geneveh->options, opts_len);
> +
> + ovs_vport_receive(vport, skb, &tun_info);
> +}
> +
> +static int geneve_get_options(const struct vport *vport,
> + struct sk_buff *skb)
> +{
> + struct geneve_port *geneve_port = geneve_vport(vport);
> + __be16 sport;
> +
> + sport = ntohs(inet_sk(geneve_port->gs->sock->sk)->inet_sport);
> + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, sport))
> + return -EMSGSIZE;
> + return 0;
> +}
> +
> +static void geneve_tnl_destroy(struct vport *vport)
> +{
> + struct geneve_port *geneve_port = geneve_vport(vport);
> +
> + geneve_sock_release(geneve_port->gs);
> +
> + ovs_vport_deferred_free(vport);
> +}
> +
> +static struct vport *geneve_tnl_create(const struct vport_parms *parms)
> +{
> + struct net *net = ovs_dp_get_net(parms->dp);
> + struct nlattr *options = parms->options;
> + struct geneve_port *geneve_port;
> + struct geneve_sock *gs;
> + struct vport *vport;
> + struct nlattr *a;
> + int err;
> + u16 dst_port;
> +
> + if (!options) {
> + err = -EINVAL;
> + goto error;
> + }
> +
> + a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
> + if (a && nla_len(a) == sizeof(u16)) {
> + dst_port = nla_get_u16(a);
> + } else {
> + /* Require destination port from userspace. */
> + err = -EINVAL;
> + goto error;
> + }
> +
> + vport = ovs_vport_alloc(sizeof(struct geneve_port),
> + &ovs_geneve_vport_ops, parms);
> + if (IS_ERR(vport))
> + return vport;
> +
> + geneve_port = geneve_vport(vport);
> + strncpy(geneve_port->name, parms->name, IFNAMSIZ);
> +
> + gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
> + if (IS_ERR(gs)) {
> + ovs_vport_free(vport);
> + return (void *)gs;
> + }
> + geneve_port->gs = gs;
> +
> + return vport;
> +error:
> + return ERR_PTR(err);
> +}
> +
> +static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
> +{
> + struct ovs_key_ipv4_tunnel *tun_key;
> + struct ovs_tunnel_info *tun_info;
> + struct net *net = ovs_dp_get_net(vport->dp);
> + struct geneve_port *geneve_port = geneve_vport(vport);
> + __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
> + __be16 sport;
> + struct rtable *rt;
> + struct flowi4 fl;
> + u8 vni[3];
> + __be16 df;
> + int err;
> +
> + tun_info = OVS_CB(skb)->egress_tun_info;
> + if (unlikely(!tun_info)) {
> + err = -EINVAL;
> + goto error;
> + }
> +
> + tun_key = &tun_info->tunnel;
> +
> + /* Route lookup */
> + memset(&fl, 0, sizeof(fl));
> + fl.daddr = tun_key->ipv4_dst;
> + fl.saddr = tun_key->ipv4_src;
> + fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
> + fl.flowi4_mark = skb->mark;
> + fl.flowi4_proto = IPPROTO_UDP;
> +
> + rt = ip_route_output_key(net, &fl);
> + if (IS_ERR(rt)) {
> + err = PTR_ERR(rt);
> + goto error;
> + }
> +
> + df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
> + sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
> + tunnel_id_to_vni(tun_key->tun_id, vni);
> + skb->ignore_df = 1;
> +
> + err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
> + tun_key->ipv4_dst, tun_key->ipv4_tos,
> + tun_key->ipv4_ttl, df, sport, dport,
> + tun_key->tun_flags, vni,
> + tun_info->options_len, (u8 *)tun_info->options,
> + false);
> + if (err < 0)
> + ip_rt_put(rt);
> +error:
> + return err;
> +}
> +
> +static const char *geneve_get_name(const struct vport *vport)
> +{
> + struct geneve_port *geneve_port = geneve_vport(vport);
> +
> + return geneve_port->name;
> +}
> +
> +const struct vport_ops ovs_geneve_vport_ops = {
> + .type = OVS_VPORT_TYPE_GENEVE,
> + .create = geneve_tnl_create,
> + .destroy = geneve_tnl_destroy,
> + .get_name = geneve_get_name,
> + .get_options = geneve_get_options,
> + .send = geneve_tnl_send,
> +};
> diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
> index fe768bd..108b82d 100644
> --- a/net/openvswitch/vport-gre.c
> +++ b/net/openvswitch/vport-gre.c
> @@ -106,7 +106,7 @@ static int gre_rcv(struct sk_buff *skb,
>
> key = key_to_tunnel_id(tpi->key, tpi->seq);
> ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
> - filter_tnl_flags(tpi->flags));
> + filter_tnl_flags(tpi->flags), NULL, 0);
>
> ovs_vport_receive(vport, skb, &tun_info);
> return PACKET_RCVD;
> diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
> index 5fbff2c..2735e01 100644
> --- a/net/openvswitch/vport-vxlan.c
> +++ b/net/openvswitch/vport-vxlan.c
> @@ -66,7 +66,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
> /* Save outer tunnel values */
> iph = ip_hdr(skb);
> key = cpu_to_be64(ntohl(vx_vni) >> 8);
> - ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY);
> + ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
>
> ovs_vport_receive(vport, skb, &tun_info);
> }
> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
> index 3e50ee8..53001b0 100644
> --- a/net/openvswitch/vport.c
> +++ b/net/openvswitch/vport.c
> @@ -48,6 +48,9 @@ static const struct vport_ops *vport_ops_list[] = {
> #ifdef CONFIG_OPENVSWITCH_VXLAN
> &ovs_vxlan_vport_ops,
> #endif
> +#ifdef CONFIG_OPENVSWITCH_GENEVE
> + &ovs_geneve_vport_ops,
> +#endif
> };
>
> /* Protected by RCU read lock for reading, ovs_mutex for writing. */
> diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
> index e28964a..8942125 100644
> --- a/net/openvswitch/vport.h
> +++ b/net/openvswitch/vport.h
> @@ -215,6 +215,7 @@ extern const struct vport_ops ovs_netdev_vport_ops;
> extern const struct vport_ops ovs_internal_vport_ops;
> extern const struct vport_ops ovs_gre_vport_ops;
> extern const struct vport_ops ovs_vxlan_vport_ops;
> +extern const struct vport_ops ovs_geneve_vport_ops;
>
> static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
> const void *start, unsigned int len)
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists