[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZBxycrxU93mhgkAT@corigine.com>
Date: Thu, 23 Mar 2023 16:38:26 +0100
From: Simon Horman <simon.horman@...igine.com>
To: Vladimir Nikishkin <vladimir@...ishkin.pw>
Cc: netdev@...r.kernel.org, davem@...emloft.net, edumazet@...gle.com,
kuba@...nel.org, pabeni@...hat.com,
eng.alaamohamedsoliman.am@...il.com, gnault@...hat.com,
razor@...ckwall.org
Subject: Re: [PATCH net-next v5] vxlan: try to send a packet normally if
local bypass fails
On Thu, Mar 23, 2023 at 12:26:08PM +0800, Vladimir Nikishkin wrote:
> In vxlan_core, if an fdb entry is pointing to a local
> address with some port, the system tries to get the packet to
> deliver the packet to the vxlan directly, bypassing the network
> stack.
>
> This patch makes it still try canonical delivery, if there is no
> linux kernel vxlan listening on this port. This will be useful
> for the cases when there is some userspace daemon expecting
> vxlan packets for post-processing, or some other implementation
> of vxlan.
>
> Signed-off-by: Vladimir Nikishkin <vladimir@...ishkin.pw>
> ---
> drivers/net/vxlan/vxlan_core.c | 34 ++++++++++++++++++++++++------
> include/net/vxlan.h | 4 +++-
> include/uapi/linux/if_link.h | 1 +
> tools/include/uapi/linux/if_link.h | 2 ++
> 4 files changed, 33 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
> index 561fe1b314f5..cef7a9aec24b 100644
> --- a/drivers/net/vxlan/vxlan_core.c
> +++ b/drivers/net/vxlan/vxlan_core.c
> @@ -2341,7 +2341,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
> union vxlan_addr *daddr,
> __be16 dst_port, int dst_ifindex, __be32 vni,
> struct dst_entry *dst,
> - u32 rt_flags)
> + u32 rt_flags, bool localbypass)
> {
> #if IS_ENABLED(CONFIG_IPV6)
> /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
> @@ -2355,18 +2355,21 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
> !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
> struct vxlan_dev *dst_vxlan;
>
> - dst_release(dst);
> dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
> daddr->sa.sa_family, dst_port,
> vxlan->cfg.flags);
> - if (!dst_vxlan) {
> + if (!dst_vxlan && localbypass) {
> + dst_release(dst);
> dev->stats.tx_errors++;
> vxlan_vnifilter_count(vxlan, vni, NULL,
> VXLAN_VNI_STATS_TX_ERRORS, 0);
> kfree_skb(skb);
>
> return -ENOENT;
> + } else if (!dst_vxlan && !localbypass) {
> + return 0;
> }
I'm a bit unsure about the logic around dst_release().
But assuming it is correct, perhaps this is a slightly
nicer way to express the same logic:
if (!dst_vxlan) {
if (!localbypass)
return 0;
dst_release(dst);
dev->stats.tx_errors++;
vxlan_vnifilter_count(vxlan, vni, NULL,
VXLAN_VNI_STATS_TX_ERRORS, 0);
kfree_skb(skb);
return -ENOENT;
}
> + dst_release(dst);
> vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
> return 1;
> }
> @@ -2393,6 +2396,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
> int err;
> u32 flags = vxlan->cfg.flags;
> bool udp_sum = false;
> + bool localbypass = true;
Is there a need to initialise this here?
Also, it would be good to move towards, rather than away from,
reverse xmas tree - longest line to shortest - for
local variable declarations.
> bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
> __be32 vni = 0;
> #if IS_ENABLED(CONFIG_IPV6)
> @@ -2494,9 +2498,11 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>
> if (!info) {
> /* Bypass encapsulation if the destination is local */
> + localbypass = !(flags & VXLAN_F_LOCALBYPASS);
Extra space after '='.
Also, the scope of localbypass could be reduced to this block.
> err = encap_bypass_if_local(skb, dev, vxlan, dst,
> dst_port, ifindex, vni,
> - &rt->dst, rt->rt_flags);
> + &rt->dst, rt->rt_flags,
> + localbypass);
> if (err)
> goto out_unlock;
>
> @@ -2568,10 +2574,10 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>
> if (!info) {
> u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
> -
> + localbypass = !(flags & VXLAN_F_LOCALBYPASS);
Ditto.
> err = encap_bypass_if_local(skb, dev, vxlan, dst,
> dst_port, ifindex, vni,
> - ndst, rt6i_flags);
> + ndst, rt6i_flags, localbypass);
> if (err)
> goto out_unlock;
> }
> @@ -3202,6 +3208,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
> [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
> [IFLA_VXLAN_DF] = { .type = NLA_U8 },
> [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
> + [IFLA_VXLAN_LOCALBYPASS] = { .type = NLA_U8 },
> };
>
> static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
> @@ -4011,6 +4018,16 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
> conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
> }
>
> + if (data[IFLA_VXLAN_LOCALBYPASS]) {
> + if (changelink) {
> + NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCALBYPASS],
> + "Cannot change LOCALBYPASS flag");
> + return -EOPNOTSUPP;
> + }
> + if (!nla_get_u8(data[IFLA_VXLAN_LOCALBYPASS]))
> + conf->flags |= VXLAN_F_LOCALBYPASS;
> + }
Can vxlan_nl2flag() be used here?
> +
> if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
> err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
> VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
> @@ -4232,6 +4249,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
> + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */
> 0;
> }
>
> @@ -4308,7 +4326,9 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
> nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
> !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
> nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
> - !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
> + !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)) ||
> + nla_put_u8(skb, IFLA_VXLAN_LOCALBYPASS,
> + !(vxlan->cfg.flags & VXLAN_F_LOCALBYPASS)))
It seems that the sense of VXLAN_F_LOCALBYPASS is the opposite
of other flags. I think it would be good to reconcile that.
> goto nla_put_failure;
>
> if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 20bd7d893e10..0be91ca78d3a 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -328,6 +328,7 @@ struct vxlan_dev {
> #define VXLAN_F_TTL_INHERIT 0x10000
> #define VXLAN_F_VNIFILTER 0x20000
> #define VXLAN_F_MDB 0x40000
> +#define VXLAN_F_LOCALBYPASS 0x80000
>
> /* Flags that are used in the receive path. These flags must match in
> * order for a socket to be shareable
> @@ -348,7 +349,8 @@ struct vxlan_dev {
> VXLAN_F_UDP_ZERO_CSUM6_TX | \
> VXLAN_F_UDP_ZERO_CSUM6_RX | \
> VXLAN_F_COLLECT_METADATA | \
> - VXLAN_F_VNIFILTER)
> + VXLAN_F_VNIFILTER | \
> + VXLAN_F_LOCALBYPASS)
>
> struct net_device *vxlan_dev_create(struct net *net, const char *name,
> u8 name_assign_type, struct vxlan_config *conf);
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 57ceb788250f..4e3a3d295056 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -826,6 +826,7 @@ enum {
> IFLA_VXLAN_TTL_INHERIT,
> IFLA_VXLAN_DF,
> IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
> + IFLA_VXLAN_LOCALBYPASS,
> __IFLA_VXLAN_MAX
> };
> #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
> diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
> index 901d98b865a1..3d9a1fd6f7e7 100644
> --- a/tools/include/uapi/linux/if_link.h
> +++ b/tools/include/uapi/linux/if_link.h
> @@ -747,6 +747,8 @@ enum {
> IFLA_VXLAN_GPE,
> IFLA_VXLAN_TTL_INHERIT,
> IFLA_VXLAN_DF,
> + IFLA_VXLAN_VNIFILTER,
> + IFLA_VXLAN_LOCALBYPASS,
> __IFLA_VXLAN_MAX
> };
> #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
> --
> 2.35.7
>
> --
> Fastmail.
>
Powered by blists - more mailing lists