[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.LRH.2.20.1905011655100.1124@dhcp-10-175-212-223.vpn.oracle.com>
Date: Wed, 1 May 2019 17:03:08 +0100 (BST)
From: Alan Maguire <alan.maguire@...cle.com>
To: netdev@...r.kernel.org
cc: daniel@...earbox.net, dsahern@...il.com
Subject: MPLS encapsulation and arp table overflow
I'm seeing the following repeated error
[ 130.821362] neighbour: arp_cache: neighbor table overflow!
when using MPLSoverGRE or MPLSoverUDP tunnels on bits synced
with bpf-next as of this morning. The test script below reliably
reproduces the problem, while working fine on a 4.14 (I haven't
bisected yet). It can be run with no arguments, or specifying
gre or udp for the specific encap type.
It seems that every MPLS-encapsulated outbound packet is attempting
to add a neighbor entry, and as a result we hit the
net.ipv4.neigh.default.gc_thresh3 limit quickly.
When this failure occurs, the arp table doesn't show any of
these additional entries. Existing arp table entries are
disappearing too, so perhaps they are being recycled when the
table becomes full?
It's entirely possible that the way I'm setting up the MPLS
tunnels is incorrect, though the fact that the test works fine on
4.14 might suggest otherwise.
An extract from the ftrace function graph when the error is
hit is below:
ping-17611 [039] 6197.507706: funcgraph_entry:
| lwtunnel_xmit() {
ping-17611 [039] 6197.507706: funcgraph_entry:
| mpls_xmit() {
ping-17611 [039] 6197.507706: funcgraph_entry:
| sock_wfree() {
ping-17611 [039] 6197.507706: funcgraph_entry: 0.137
us | sock_def_write_space();
ping-17611 [039] 6197.507707: funcgraph_exit: 0.429
us | }
ping-17611 [039] 6197.507707: funcgraph_entry: 0.126
us | mpls_output_possible();
ping-17611 [039] 6197.507707: funcgraph_entry: 0.109
us | mpls_dev_mtu();
ping-17611 [039] 6197.507708: funcgraph_entry: 0.197
us | mpls_pkt_too_big();
ping-17611 [039] 6197.507708: funcgraph_entry: 0.118
us | skb_push();
ping-17611 [039] 6197.507708: funcgraph_entry: 0.165
us | mpls_stats_inc_outucastpkts();
ping-17611 [039] 6197.507708: funcgraph_entry:
| neigh_xmit() {
ping-17611 [039] 6197.507709: funcgraph_entry: 0.115
us | arp_hash();
ping-17611 [039] 6197.507709: funcgraph_entry:
| ___neigh_create() {
ping-17611 [039] 6197.507709: funcgraph_entry: 0.123
us | _raw_write_lock_bh();
ping-17611 [039] 6197.507709: funcgraph_entry:
| _raw_write_unlock_bh() {
ping-17611 [039] 6197.507709: funcgraph_entry: 0.128
us | __local_bh_enable_ip();
ping-17611 [039] 6197.507710: funcgraph_exit: 0.335
us | }
ping-17611 [039] 6197.507710: funcgraph_entry:
| net_ratelimit() {
If there's any additional data I can collect to help narrow
this down, do let me know (I'll try bisecting in the meantime).
Thanks!
Alan
#!/usr/bin/bash
# Test MPLSoverUDP and MPLSoverGRE tunnels.
readonly ns_prefix="ns-$$"
readonly ns1="${ns_prefix}1"
readonly ns2="${ns_prefix}2"
readonly ns1_v4="192.168.1.1"
readonly ns2_v4="192.168.1.2"
readonly tun_prefix="192.168.2"
readonly ns1_tunv4="${tun_prefix}.1"
readonly ns2_tunv4="${tun_prefix}.2"
readonly tun_route="${tun_prefix}.0/24"
readonly label="1000"
cleanup()
{
ip netns delete $ns1
ip netns delete $ns2
}
tun_setup()
{
ns=$1
encap=$2
veth=$3
local_ip=$4
local_tun=$5
remote_ip=$6
remote_tun=$7
case $encap in
"udp")
ip netns exec $ns modprobe fou
ip netns exec $ns ip fou add port 6635 ipproto 137
ip netns exec $ns ip link add name tun0 type ipip mode any \
remote $remote_ip local $local_ip \
encap fou encap-sport auto encap-dport 6635
;;
"gre")
ip netns exec $ns ip link add tun0 type gre \
remote $remote_ip local $local_ip
;;
esac
ip netns exec $ns ip link set lo up
ip netns exec $ns modprobe mpls_iptunnel
ip netns exec $ns modprobe mpls_gso
ip netns exec $ns ip link set tun0 up arp on
ip netns exec $ns sysctl -qw net.mpls.platform_labels=65536
ip netns exec $ns ip -f mpls route replace $label dev lo
ip netns exec $ns sysctl -qw net.ipv4.conf.lo.rp_filter=0
ip netns exec $ns sysctl -qw net.ipv4.conf.all.rp_filter=0
ip netns exec $ns sysctl -qw net.mpls.conf.tun0.input=1
ip netns exec $ns sysctl -qw net.ipv4.conf.tun0.rp_filter=0
ip netns exec $ns sysctl -qw net.ipv4.conf.${veth}.rp_filter=0
ip netns exec $ns ip addr add ${local_tun}/24 dev tun0
ip netns exec $ns ip route change \
$tun_route encap mpls $label via inet $remote_tun dev tun0
}
setup()
{
encap=$1
ip netns add $ns1
ip netns add $ns2
ip link add dev veth1 netns $ns1 type veth peer name veth2 netns $ns2
ip -netns $ns1 link set veth1 up
ip -netns $ns2 link set veth2 up
ip -netns $ns1 -4 addr add ${ns1_v4}/24 dev veth1
ip -netns $ns2 -4 addr add ${ns2_v4}/24 dev veth2
tun_setup $ns1 $encap veth1 $ns1_v4 $ns1_tunv4 $ns2_v4 $ns2_tunv4
tun_setup $ns2 $encap veth2 $ns2_v4 $ns2_tunv4 $ns1_v4 $ns1_tunv4
}
set -o errexit
case "$#" in
0)
encap_types="gre udp"
;;
1)
case $1 in
"gre"|"udp")
encap_types=$1
;;
*)
echo "unsupported encap type $1"
exit 1
;;
esac
;;
*)
echo "Usage: $0"
echo " or: $0 <gre|udp>"
exit 1
;;
esac
# This is the garbage collection threshold which triggers warnings.
thresh=$(sysctl -qn net.ipv4.neigh.default.gc_thresh3)
thresh=$(expr $thresh + 100)
for encap in $encap_types ; do
echo "Testing MPLS-over-${encap} tunnel..."
trap cleanup EXIT
setup $encap
echo "Verifying basic connectivity between ${ns1}, ${ns2}"
ip netns exec $ns1 ping -q -c 1 $ns2_v4
ip netns exec $ns2 ping -q -c 1 $ns1_v4
echo "Testing tunnel encap/decap"
timeout 60 ip netns exec $ns1 ping -q -c $thresh -i 0.01 $ns2_tunv4
trap - EXIT
cleanup
echo "MPLS-over-${encap} tunnel OK"
done
Powered by blists - more mailing lists