[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250828214918.3385-1-fw@strlen.de>
Date: Thu, 28 Aug 2025 23:49:18 +0200
From: Florian Westphal <fw@...len.de>
To: <netdev@...r.kernel.org>
Cc: Paolo Abeni <pabeni@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
<netfilter-devel@...r.kernel.org>,
pablo@...filter.org
Subject: [PATCH net] netfilter: nft_flowtable.sh: re-run with random mtu sizes
Jakub says:
nft_flowtable.sh is one of the most flake-atious test for netdev CI currently :(
The root cause is two-fold:
1. the failing part of the test is supposed to make sure that ip
fragments are forwarded for offloaded flows.
(flowtable has to pass them to classic forward path).
path mtu discovery for these subtests is disabled.
2. nft_flowtable.sh has two passes. One with fixed mtus/file size and
one where link mtus and file sizes are random.
The CI failures all have same pattern:
re-run with random mtus and file size: -o 27663 -l 4117 -r 10089 -s 54384840
[..]
PASS: dscp_egress: dscp packet counters match
FAIL: file mismatch for ns1 -> ns2
In some cases this error triggers a bit ealier, sometimes in a later
subtest:
re-run with random mtus and file size: -o 20201 -l 4555 -r 12657 -s 9405856
[..]
PASS: dscp_egress: dscp packet counters match
PASS: dscp_fwd: dscp packet counters match
2025/08/17 20:37:52 socat[18954] E write(7, 0x560716b96000, 8192): Broken pipe
FAIL: file mismatch for ns1 -> ns2
-rw------- 1 root root 9405856 Aug 17 20:36 /tmp/tmp.2n63vlTrQe
But all logs I saw show same scenario:
1. Failing tests have pmtu discovery off (i.e., ip fragmentation)
2. The test file is much larger than first-pass default (2M Byte)
3. peers have much larger MTUs compared to the 'network'.
These errors are very reproducible when re-running the test with
the same commandline arguments.
The timeout became much more prominent with
1d2fbaad7cd8 ("tcp: stronger sk_rcvbuf checks"): reassembled packets
typically have a skb->truesize more than double the skb length.
As that commit is intentional and pmtud-off with
large-tcp-packets-as-fragments is not normal adjust the test to use a
smaller file for the pmtu-off subtests.
While at it, add more information to pass/fail messages and
also run the dscp alteration subtest with pmtu discovery enabled.
Link: https://netdev.bots.linux.dev/contest.html?test=nft-flowtable-sh
Fixes: f84ab634904c ("selftests: netfilter: nft_flowtable.sh: re-run with random mtu sizes")
Reported-by: Jakub Kicinski <kuba@...nel.org>
Closes: https://lore.kernel.org/netdev/20250822071330.4168f0db@kernel.org/
Signed-off-by: Florian Westphal <fw@...len.de>
---
.../selftests/net/netfilter/nft_flowtable.sh | 113 ++++++++++++------
1 file changed, 76 insertions(+), 37 deletions(-)
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a4ee5496f2a1..45832df98295 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -20,6 +20,7 @@ ret=0
SOCAT_TIMEOUT=60
nsin=""
+nsin_small=""
ns1out=""
ns2out=""
@@ -36,7 +37,7 @@ cleanup() {
cleanup_all_ns
- rm -f "$nsin" "$ns1out" "$ns2out"
+ rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
}
@@ -72,6 +73,7 @@ lmtu=1500
rmtu=2000
filesize=$((2 * 1024 * 1024))
+filesize_small=$((filesize / 16))
usage(){
echo "nft_flowtable.sh [OPTIONS]"
@@ -89,7 +91,10 @@ do
o) omtu=$OPTARG;;
l) lmtu=$OPTARG;;
r) rmtu=$OPTARG;;
- s) filesize=$OPTARG;;
+ s)
+ filesize=$OPTARG
+ filesize_small=$((OPTARG / 16))
+ ;;
*) usage;;
esac
done
@@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
fi
nsin=$(mktemp)
+nsin_small=$(mktemp)
ns1out=$(mktemp)
ns2out=$(mktemp)
@@ -265,6 +271,7 @@ check_counters()
check_dscp()
{
local what=$1
+ local pmtud="$2"
local ok=1
local counter
@@ -277,37 +284,39 @@ check_dscp()
local pc4z=${counter%*bytes*}
local pc4z=${pc4z#*packets}
+ local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
+
case "$what" in
"dscp_none")
if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_fwd")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_ingress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_egress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
*)
- echo "FAIL: Unknown DSCP check" 1>&2
+ echo "$failmsg: Unknown DSCP check" 1>&2
ret=1
ok=0
esac
@@ -319,9 +328,9 @@ check_dscp()
check_transfer()
{
- in=$1
- out=$2
- what=$3
+ local in=$1
+ local out=$2
+ local what=$3
if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
@@ -342,25 +351,39 @@ test_tcp_forwarding_ip()
{
local nsa=$1
local nsb=$2
- local dstip=$3
- local dstport=$4
+ local pmtu=$3
+ local dstip=$4
+ local dstport=$5
local lret=0
+ local socatc
+ local socatl
+ local infile="$nsin"
+
+ if [ $pmtu -eq 0 ]; then
+ infile="$nsin_small"
+ fi
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
lpid=$!
busywait 1000 listener_ready
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+ socatc=$?
wait $lpid
+ socatl=$?
- if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+ if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
+ rc=1
+ fi
+
+ if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
lret=1
ret=1
fi
- if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+ if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
lret=1
ret=1
fi
@@ -370,14 +393,16 @@ test_tcp_forwarding_ip()
test_tcp_forwarding()
{
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ local pmtu="$3"
+
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
return $?
}
test_tcp_forwarding_set_dscp()
{
- check_dscp "dscp_none"
+ local pmtu="$3"
ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
@@ -388,8 +413,8 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_ingress"
+ test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
+ check_dscp "dscp_ingress" "$pmtu"
ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
@@ -405,10 +430,10 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_egress"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ check_dscp "dscp_egress" "$pmtu"
- ip netns exec "$nsr1" nft flush table netdev dscpmangle
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
echo "SKIP: Could not load netdev:egress for veth1"
fi
@@ -416,48 +441,53 @@ fi
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_fwd"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ check_dscp "dscp_fwd" "$pmtu"
}
test_tcp_forwarding_nat()
{
+ local nsa="$1"
+ local nsb="$2"
+ local pmtu="$3"
+ local what="$4"
local lret
- local pmtu
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- lret=$?
+ [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
- pmtu=$3
- what=$4
+ test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
+ lret=$?
if [ "$lret" -eq 0 ] ; then
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with masquerade $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
fi
- test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
lret=$?
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with dnat $what"
elif [ "$lret" -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
+ else
+ echo "FAIL: flow offload for ns1/ns2 with dnat $what"
fi
return $lret
}
make_file "$nsin" "$filesize"
+make_file "$nsin_small" "$filesize_small"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 0; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -489,8 +519,9 @@ table ip nat {
}
EOF
+check_dscp "dscp_none" "0"
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
- echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
exit 0
fi
@@ -512,6 +543,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
# are lower than file size and packets were forwarded via flowtable layer.
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
+ exit 0
+fi
+
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
@@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 1; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -668,7 +707,7 @@ if [ "$1" = "" ]; then
fi
echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
- $0 -o "$o" -l "$l" -r "$r" -s "$filesize"
+ $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
fi
exit $ret
--
2.49.1
Powered by blists - more mailing lists