[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250209101853.15828-1-gal@nvidia.com>
Date: Sun, 9 Feb 2025 12:18:53 +0200
From: Gal Pressman <gal@...dia.com>
To: <netdev@...r.kernel.org>, "David S. Miller" <davem@...emloft.net>, "Eric
Dumazet" <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni
<pabeni@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>
CC: Tariq Toukan <tariqt@...dia.com>, Louis Peens <louis.peens@...igine.com>,
Simon Horman <horms@...nel.org>, David Ahern <dsahern@...nel.org>, "Pravin B
Shelar" <pshelar@....org>, Yotam Gigi <yotam.gi@...il.com>, Jamal Hadi Salim
<jhs@...atatu.com>, Cong Wang <xiyou.wangcong@...il.com>, Jiri Pirko
<jiri@...nulli.us>, Kees Cook <kees@...nel.org>, "Gustavo A. R. Silva"
<gustavoars@...nel.org>, <dev@...nvswitch.org>,
<linux-hardening@...r.kernel.org>, Gal Pressman <gal@...dia.com>, "Cosmin
Ratiu" <cratiu@...dia.com>
Subject: [PATCH net-next] net: Add options as a flexible array to struct ip_tunnel_info
Remove the hidden assumption that options are allocated at the end of
the struct, and teach the compiler about them using a flexible array.
With this, we can revert the unsafe_memcpy() call we have in
tun_dst_unclone() [1], and resolve the false field-spanning write
warning caused by the memcpy() in ip_tunnel_info_opts_set().
Note that this patch changes the layout of struct ip_tunnel_info since
there is padding at the end of the struct.
Before this, options would be written at 'info + 1' which is after the
padding.
After this patch, options are written right after 'mode' field (into the
padding).
[1] Commit 13cfd6a6d7ac ("net: Silence false field-spanning write warning in metadata_dst memcpy")
Link: https://lore.kernel.org/all/53D1D353-B8F6-4ADC-8F29-8C48A7C9C6F1@kernel.org/
Suggested-by: Kees Cook <kees@...nel.org>
Reviewed-by: Cosmin Ratiu <cratiu@...dia.com>
Reviewed-by: Tariq Toukan <tariqt@...dia.com>
Signed-off-by: Gal Pressman <gal@...dia.com>
---
.../mellanox/mlx5/core/en/tc_tun_encap.c | 4 +---
.../mellanox/mlx5/core/en/tc_tun_vxlan.c | 2 +-
.../ethernet/netronome/nfp/flower/action.c | 4 ++--
drivers/net/pfcp.c | 2 +-
drivers/net/vxlan/vxlan_core.c | 4 ++--
include/net/dst_metadata.h | 7 ++----
include/net/ip_tunnels.h | 11 +++------
net/core/dst.c | 3 ++-
net/ipv4/ip_gre.c | 4 ++--
net/ipv4/ip_tunnel_core.c | 24 +++++++++----------
net/ipv6/ip6_gre.c | 4 ++--
net/openvswitch/flow_netlink.c | 4 ++--
net/psample/psample.c | 2 +-
net/sched/act_tunnel_key.c | 12 +++++-----
14 files changed, 39 insertions(+), 48 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index e7e01f3298ef..d9f40cf8198d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -620,9 +620,7 @@ bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
return a_info->options_len == b_info->options_len &&
- !memcmp(ip_tunnel_info_opts(a_info),
- ip_tunnel_info_opts(b_info),
- a_info->options_len);
+ !memcmp(a_info->options, b_info->options, a_info->options_len);
}
static int cmp_decap_info(struct mlx5e_decap_key *a,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
index e4e487c8431b..561c874b0825 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -100,7 +100,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
vxh->vx_flags = VXLAN_HF_VNI;
vxh->vx_vni = vxlan_vni_field(tun_id);
if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags)) {
- md = ip_tunnel_info_opts(e->tun_info);
+ md = (struct vxlan_metadata *)e->tun_info->options;
vxlan_build_gbp_hdr(vxh, md);
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index aca2a7417af3..6dd8817771b5 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -333,7 +333,7 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
{
struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel;
int opt_len, opt_cnt, act_start, tot_push_len;
- u8 *src = ip_tunnel_info_opts(ip_tun);
+ u8 *src = ip_tun->options;
/* We need to populate the options in reverse order for HW.
* Therefore we go through the options, calculating the
@@ -370,7 +370,7 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
act_start = *list_len;
*list_len += tot_push_len;
- src = ip_tunnel_info_opts(ip_tun);
+ src = ip_tun->options;
while (opt_cnt) {
struct geneve_opt *opt = (struct geneve_opt *)src;
struct nfp_fl_push_geneve *push;
diff --git a/drivers/net/pfcp.c b/drivers/net/pfcp.c
index 68d0d9e92a22..4963f85ad807 100644
--- a/drivers/net/pfcp.c
+++ b/drivers/net/pfcp.c
@@ -71,7 +71,7 @@ static int pfcp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely(!tun_dst))
goto drop;
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md = (struct pfcp_metadata *)tun_dst->u.tun_info.options;
if (unlikely(!md))
goto drop;
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 05c10acb2a57..9fd1832af6b0 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1756,7 +1756,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
goto drop;
}
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md = (struct vxlan_metadata *)tun_dst->u.tun_info.options;
skb_dst_set(skb, (struct dst_entry *)tun_dst);
} else {
@@ -2459,7 +2459,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) {
if (info->options_len < sizeof(*md))
goto drop;
- md = ip_tunnel_info_opts(info);
+ md = (struct vxlan_metadata *)info->options;
}
ttl = info->key.ttl;
tos = info->key.tos;
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 84c15402931c..4160731dcb6e 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -163,11 +163,8 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
if (!new_md)
return ERR_PTR(-ENOMEM);
- unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
- sizeof(struct ip_tunnel_info) + md_size,
- /* metadata_dst_alloc() reserves room (md_size bytes) for
- * options right after the ip_tunnel_info struct.
- */);
+ memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+ sizeof(struct ip_tunnel_info) + md_size);
#ifdef CONFIG_DST_CACHE
/* Unclone the dst cache if there is one */
if (new_md->u.tun_info.dst_cache.cache) {
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1aa31bdb2b31..2a6dca52e61d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -93,12 +93,6 @@ struct ip_tunnel_encap {
GENMASK((sizeof_field(struct ip_tunnel_info, \
options_len) * BITS_PER_BYTE) - 1, 0)
-#define ip_tunnel_info_opts(info) \
- _Generic(info, \
- const struct ip_tunnel_info * : ((const void *)((info) + 1)),\
- struct ip_tunnel_info * : ((void *)((info) + 1))\
- )
-
struct ip_tunnel_info {
struct ip_tunnel_key key;
struct ip_tunnel_encap encap;
@@ -107,6 +101,7 @@ struct ip_tunnel_info {
#endif
u8 options_len;
u8 mode;
+ u8 options[] __counted_by(options_len);
};
/* 6rd prefix/relay information */
@@ -650,7 +645,7 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
static inline void ip_tunnel_info_opts_get(void *to,
const struct ip_tunnel_info *info)
{
- memcpy(to, info + 1, info->options_len);
+ memcpy(to, info->options, info->options_len);
}
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
@@ -659,7 +654,7 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
{
info->options_len = len;
if (len > 0) {
- memcpy(ip_tunnel_info_opts(info), from, len);
+ memcpy(info->options, from, len);
ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags,
flags);
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 9552a90d4772..d981c295a48e 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -286,7 +286,8 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type,
{
struct metadata_dst *md_dst;
- md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
+ md_dst = kmalloc(struct_size(md_dst, u.tun_info.options, optslen),
+ flags);
if (!md_dst)
return NULL;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ed1b6b44faf8..e061aec6e7bf 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -334,7 +334,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
skb_network_header_len(skb);
pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
sizeof(*ershdr));
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md = (struct erspan_metadata *)tun_dst->u.tun_info.options;
md->version = ver;
md2 = &md->u.md2;
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
@@ -556,7 +556,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
if (tun_info->options_len < sizeof(*md))
goto err_free_skb;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct erspan_metadata *)tun_info->options;
/* ERSPAN has fixed 8 byte GRE header */
version = md->version;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index a3676155be78..e0b0169175e5 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -147,8 +147,7 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags);
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
- ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src),
- src->options_len, tun_flags);
+ ip_tunnel_info_opts_set(dst, src->options, src->options_len, tun_flags);
return res;
}
@@ -490,7 +489,8 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr,
return -EINVAL;
if (info) {
- struct geneve_opt *opt = ip_tunnel_info_opts(info) + opts_len;
+ struct geneve_opt *opt =
+ (struct geneve_opt *)(info->options + opts_len);
memcpy(opt->opt_data, nla_data(attr), data_len);
opt->length = data_len / 4;
@@ -521,7 +521,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
if (info) {
struct vxlan_metadata *md =
- ip_tunnel_info_opts(info) + opts_len;
+ (struct vxlan_metadata *)(info->options + opts_len);
attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
md->gbp = nla_get_u32(attr);
@@ -562,7 +562,7 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
if (info) {
struct erspan_metadata *md =
- ip_tunnel_info_opts(info) + opts_len;
+ (struct erspan_metadata *)(info->options + opts_len);
md->version = ver;
if (ver == 1) {
@@ -746,7 +746,7 @@ static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb,
return -ENOMEM;
while (tun_info->options_len > offset) {
- opt = ip_tunnel_info_opts(tun_info) + offset;
+ opt = (struct geneve_opt *)(tun_info->options + offset);
if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS,
opt->opt_class) ||
nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
@@ -772,7 +772,7 @@ static int ip_tun_fill_encap_opts_vxlan(struct sk_buff *skb,
if (!nest)
return -ENOMEM;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct vxlan_metadata *)tun_info->options;
if (nla_put_u32(skb, LWTUNNEL_IP_OPT_VXLAN_GBP, md->gbp)) {
nla_nest_cancel(skb, nest);
return -ENOMEM;
@@ -792,7 +792,7 @@ static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb,
if (!nest)
return -ENOMEM;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct erspan_metadata *)tun_info->options;
if (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version))
goto err;
@@ -875,7 +875,7 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
opt_len += nla_total_size(0); /* LWTUNNEL_IP_OPTS_GENEVE */
while (info->options_len > offset) {
- opt = ip_tunnel_info_opts(info) + offset;
+ opt = (struct geneve_opt *)(info->options + offset);
opt_len += nla_total_size(2) /* OPT_GENEVE_CLASS */
+ nla_total_size(1) /* OPT_GENEVE_TYPE */
+ nla_total_size(opt->length * 4);
@@ -886,7 +886,8 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ nla_total_size(4); /* OPT_VXLAN_GBP */
} else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) {
- struct erspan_metadata *md = ip_tunnel_info_opts(info);
+ struct erspan_metadata *md =
+ (struct erspan_metadata *)info->options;
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */
+ nla_total_size(1) /* OPT_ERSPAN_VER */
@@ -920,8 +921,7 @@ static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
return memcmp(info_a, info_b, sizeof(info_a->key)) ||
info_a->mode != info_b->mode ||
info_a->options_len != info_b->options_len ||
- memcmp(ip_tunnel_info_opts(info_a),
- ip_tunnel_info_opts(info_b), info_a->options_len);
+ memcmp(info_a->options, info_b->options, info_a->options_len);
}
static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 235808cfec70..35b0fb2162d7 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -575,7 +575,7 @@ static int ip6erspan_rcv(struct sk_buff *skb,
pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
sizeof(*ershdr));
info = &tun_dst->u.tun_info;
- md = ip_tunnel_info_opts(info);
+ md = (struct erspan_metadata *)info->options;
md->version = ver;
md2 = &md->u.md2;
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
@@ -1022,7 +1022,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
if (tun_info->options_len < sizeof(*md))
goto tx_err;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct erspan_metadata *)tun_info->options;
tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 881ddd3696d5..2c0ebc9890e4 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -980,7 +980,7 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb,
struct ip_tunnel_info *tun_info)
{
return __ip_tun_to_nlattr(skb, &tun_info->key,
- ip_tunnel_info_opts(tun_info),
+ tun_info->options,
tun_info->options_len,
ip_tunnel_info_af(tun_info), tun_info->mode);
}
@@ -3753,7 +3753,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
return -EMSGSIZE;
err = ip_tun_to_nlattr(skb, &tun_info->key,
- ip_tunnel_info_opts(tun_info),
+ tun_info->options,
tun_info->options_len,
ip_tunnel_info_af(tun_info), tun_info->mode);
if (err)
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 25f92ba0840c..8ed75e83826e 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -217,7 +217,7 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
struct ip_tunnel_info *tun_info)
{
unsigned short tun_proto = ip_tunnel_info_af(tun_info);
- const void *tun_opts = ip_tunnel_info_opts(tun_info);
+ const void *tun_opts = tun_info->options;
const struct ip_tunnel_key *tun_key = &tun_info->key;
int tun_opts_len = tun_info->options_len;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index af7c99845948..5bb7d32967da 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -303,7 +303,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
#if IS_ENABLED(CONFIG_INET)
__set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags);
- return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ return tunnel_key_copy_opts(nla, info->options,
opts_len, extack);
#else
return -EAFNOSUPPORT;
@@ -311,7 +311,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
#if IS_ENABLED(CONFIG_INET)
__set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags);
- return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ return tunnel_key_copy_opts(nla, info->options,
opts_len, extack);
#else
return -EAFNOSUPPORT;
@@ -319,7 +319,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
#if IS_ENABLED(CONFIG_INET)
__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags);
- return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ return tunnel_key_copy_opts(nla, info->options,
opts_len, extack);
#else
return -EAFNOSUPPORT;
@@ -572,7 +572,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
int len = info->options_len;
- u8 *src = (u8 *)(info + 1);
+ u8 *src = (u8 *)info->options;
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
@@ -603,7 +603,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
- struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1);
+ struct vxlan_metadata *md = (struct vxlan_metadata *)info->options;
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN);
@@ -622,7 +622,7 @@ static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
static int tunnel_key_erspan_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
- struct erspan_metadata *md = (struct erspan_metadata *)(info + 1);
+ struct erspan_metadata *md = (struct erspan_metadata *)info->options;
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN);
--
2.40.1
Powered by blists - more mailing lists