[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250424211455.242482-5-pablo@netfilter.org>
Date: Thu, 24 Apr 2025 23:14:52 +0200
From: Pablo Neira Ayuso <pablo@...filter.org>
To: netfilter-devel@...r.kernel.org
Cc: davem@...emloft.net,
netdev@...r.kernel.org,
kuba@...nel.org,
pabeni@...hat.com,
edumazet@...gle.com,
fw@...len.de,
horms@...nel.org
Subject: [PATCH net-next 4/7] netfilter: Exclude LEGACY TABLES on PREEMPT_RT.
The seqcount xt_recseq is used to synchronize the replacement of
xt_table::private in xt_replace_table() against all readers such as
ipt_do_table()
To ensure that there is only one writer, the writing side disables
bottom halves. The sequence counter can be acquired recursively. Only the
first invocation modifies the sequence counter (signaling that a writer
is in progress) while the following (recursive) writer does not modify
the counter.
The lack of a proper locking mechanism for the sequence counter can lead
to live lock on PREEMPT_RT if the high prior reader preempts the
writer. Additionally if the per-CPU lock on PREEMPT_RT is removed from
local_bh_disable() then there is no synchronisation for the per-CPU
sequence counter.
The affected code is "just" the legacy netfilter code which is replaced
by "netfilter tables". That code can be disabled without sacrificing
functionality because everything is provided by the newer
implementation. This will only requires the usage of the "-nft" tools
instead of the "-legacy" ones.
The long term plan is to remove the legacy code so lets accelerate the
progress.
Relax dependencies on iptables legacy, replace select with depends on,
this should cause no harm to existing kernel configs and users can still
toggle IP{6}_NF_IPTABLES_LEGACY in any case.
Make EBTABLES_LEGACY, IPTABLES_LEGACY and ARPTABLES depend on
NETFILTER_LEGACY. Hide xt_recseq and its users, xt_register_table() and
xt_percpu_counter_alloc() behind NETFILTER_LEGACY. Let NETFILTER_LEGACY
depend on !PREEMPT_RT.
Replace CONFIG_IP6_NF_MANGLE->CONFIG_IP6_NF_IPTABLES for TCPOPTSTRIP and
add CONFIG_NFT_COMPAT_ARP to the MARK target for the IPv6 and ARP target
to keep it enabled without the LEGACY code for NFT.
Regarding selftests, prepare for non-default IP_TABLES_LEGACY.
Enable relevant iptables config options explicitly, this is needed
to avoid breakage when symbols related to iptables-legacy
will depend on NETFILTER_LEGACY resp. IP_TABLES_LEGACY.
This also means that the classic tables (Kernel modules) will
not be enabled by default, so enable them too.
Co-developed-by: Florian Westphal <fw@...len.de>
Co-developed-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Signed-off-by: Pablo Neira Ayuso <pablo@...filter.org>
---
net/Kconfig | 10 ++++++++++
net/bridge/netfilter/Kconfig | 8 ++++----
net/ipv4/netfilter/Kconfig | 15 ++++++++-------
net/ipv6/netfilter/Kconfig | 13 +++++++------
net/netfilter/x_tables.c | 16 +++++++++++-----
net/netfilter/xt_TCPOPTSTRIP.c | 4 ++--
net/netfilter/xt_mark.c | 2 +-
tools/testing/selftests/net/config | 11 +++++++++++
tools/testing/selftests/net/netfilter/config | 5 +++++
9 files changed, 59 insertions(+), 25 deletions(-)
diff --git a/net/Kconfig b/net/Kconfig
index 202cc595e5e6..1a4d18b8501d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -211,6 +211,16 @@ menuconfig NETFILTER
if NETFILTER
+config NETFILTER_LEGACY
+ bool "Netfilter legacy tables support"
+ depends on NETFILTER && !PREEMPT_RT
+ help
+ Say Y here if you still require support for legacy tables. This is
+ required by the legacy tools (iptables-legacy) and is not needed if
+ you use iptables over nftables (iptales-nft).
+ Legacy support is not limited to IP, it also includes EBTABLES and
+ ARPTABLES.
+
config NETFILTER_ADVANCED
bool "Advanced netfilter configuration"
depends on NETFILTER
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index f16bbbbb9481..008012742188 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -42,7 +42,7 @@ config NF_CONNTRACK_BRIDGE
# old sockopt interface and eval loop
config BRIDGE_NF_EBTABLES_LEGACY
tristate "Legacy EBTABLES support"
- depends on BRIDGE && NETFILTER_XTABLES
+ depends on BRIDGE && NETFILTER_XTABLES && NETFILTER_LEGACY
default n
help
Legacy ebtables packet/frame classifier.
@@ -65,7 +65,7 @@ if BRIDGE_NF_EBTABLES
#
config BRIDGE_EBT_BROUTE
tristate "ebt: broute table support"
- select BRIDGE_NF_EBTABLES_LEGACY
+ depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables broute table is used to define rules that decide between
bridging and routing frames, giving Linux the functionality of a
@@ -76,7 +76,7 @@ config BRIDGE_EBT_BROUTE
config BRIDGE_EBT_T_FILTER
tristate "ebt: filter table support"
- select BRIDGE_NF_EBTABLES_LEGACY
+ depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables filter table is used to define frame filtering rules at
local input, forwarding and local output. See the man page for
@@ -86,7 +86,7 @@ config BRIDGE_EBT_T_FILTER
config BRIDGE_EBT_T_NAT
tristate "ebt: nat table support"
- select BRIDGE_NF_EBTABLES_LEGACY
+ depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables nat table is used to define rules that alter the MAC
source address (MAC SNAT) or the MAC destination address (MAC DNAT).
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ef8009281da5..dcf015e0d426 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -14,6 +14,7 @@ config NF_DEFRAG_IPV4
config IP_NF_IPTABLES_LEGACY
tristate "Legacy IP tables support"
default n
+ depends on NETFILTER_LEGACY
select NETFILTER_XTABLES
help
iptables is a legacy packet classifier.
@@ -183,7 +184,7 @@ config IP_NF_MATCH_TTL
config IP_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
- select IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -220,10 +221,10 @@ config IP_NF_TARGET_SYNPROXY
config IP_NF_NAT
tristate "iptables NAT support"
depends on NF_CONNTRACK
+ depends on IP_NF_IPTABLES_LEGACY
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NETFILTER_XT_NAT
- select IP_NF_IPTABLES_LEGACY
help
This enables the `nat' table in iptables. This allows masquerading,
port forwarding and other forms of full Network Address Port
@@ -264,7 +265,7 @@ endif # IP_NF_NAT
config IP_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
- select IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -299,7 +300,7 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
- select IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -313,7 +314,7 @@ config IP_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
- select IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -325,7 +326,7 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
tristate "Legacy ARPTABLES support"
- depends on NETFILTER_XTABLES
+ depends on NETFILTER_XTABLES && NETFILTER_LEGACY
default n
help
arptables is a legacy packet classifier.
@@ -342,7 +343,7 @@ config IP_NF_ARPFILTER
tristate "arptables-legacy packet filtering support"
select IP_NF_ARPTABLES
select NETFILTER_FAMILY_ARP
- depends on NETFILTER_XTABLES
+ depends on NETFILTER_XTABLES && NETFILTER_LEGACY
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index e087a8e97ba7..303942174b5d 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -9,8 +9,9 @@ menu "IPv6: Netfilter Configuration"
# old sockopt interface and eval loop
config IP6_NF_IPTABLES_LEGACY
tristate "Legacy IP6 tables support"
- depends on INET && IPV6
+ depends on INET && IPV6 && NETFILTER_LEGACY
select NETFILTER_XTABLES
+ select IP6_NF_IPTABLES
default n
help
ip6tables is a legacy packet classifier.
@@ -197,7 +198,7 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
- select IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
tristate
help
Packet filtering defines a table `filter', which has a series of
@@ -234,7 +235,7 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
- select IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -244,7 +245,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
- select IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -258,7 +259,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
- select IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -269,8 +270,8 @@ config IP6_NF_NAT
tristate "ip6tables NAT support"
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
+ depends on IP6_NF_IPTABLES_LEGACY
select NF_NAT
- select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 709840612f0d..24788bd3cbcb 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1317,12 +1317,13 @@ void xt_compat_unlock(u_int8_t af)
EXPORT_SYMBOL_GPL(xt_compat_unlock);
#endif
-DEFINE_PER_CPU(seqcount_t, xt_recseq);
-EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
-
struct static_key xt_tee_enabled __read_mostly;
EXPORT_SYMBOL_GPL(xt_tee_enabled);
+#ifdef CONFIG_NETFILTER_LEGACY
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
@@ -1514,6 +1515,7 @@ void *xt_unregister_table(struct xt_table *table)
return private;
}
EXPORT_SYMBOL_GPL(xt_unregister_table);
+#endif
#ifdef CONFIG_PROC_FS
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1897,6 +1899,7 @@ void xt_proto_fini(struct net *net, u_int8_t af)
}
EXPORT_SYMBOL_GPL(xt_proto_fini);
+#ifdef CONFIG_NETFILTER_LEGACY
/**
* xt_percpu_counter_alloc - allocate x_tables rule counter
*
@@ -1951,6 +1954,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
free_percpu((void __percpu *)pcnt);
}
EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+#endif
static int __net_init xt_net_init(struct net *net)
{
@@ -1983,8 +1987,10 @@ static int __init xt_init(void)
unsigned int i;
int rv;
- for_each_possible_cpu(i) {
- seqcount_init(&per_cpu(xt_recseq, i));
+ if (IS_ENABLED(CONFIG_NETFILTER_LEGACY)) {
+ for_each_possible_cpu(i) {
+ seqcount_init(&per_cpu(xt_recseq, i));
+ }
}
xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 30e99464171b..93f064306901 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -91,7 +91,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
}
-#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static unsigned int
tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -119,7 +119,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "TCPOPTSTRIP",
.family = NFPROTO_IPV6,
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 65b965ca40ea..59b9d04400ca 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -48,7 +48,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_mark_tginfo2),
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES)
+#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) || IS_ENABLED(CONFIG_NFT_COMPAT_ARP)
{
.name = "MARK",
.revision = 2,
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3cfef5153823..20deec955d39 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -29,17 +29,26 @@ CONFIG_INET_ESP_OFFLOAD=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NETFILTER=y
+CONFIG_NETFILTER_LEGACY=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_SEG6_LWTUNNEL=y
@@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_NAT=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 43d8b500d391..55ffb6f77ad4 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -1,6 +1,8 @@
CONFIG_AUDIT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
+CONFIG_NETFILTER_LEGACY=y
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_IP=m
CONFIG_BRIDGE_EBT_REDIRECT=m
@@ -14,7 +16,10 @@ CONFIG_INET_ESP=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP_NF_RAW=m
--
2.30.2
Powered by blists - more mailing lists