lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.LRH.2.20.1905011655100.1124@dhcp-10-175-212-223.vpn.oracle.com>
Date:   Wed, 1 May 2019 17:03:08 +0100 (BST)
From:   Alan Maguire <alan.maguire@...cle.com>
To:     netdev@...r.kernel.org
cc:     daniel@...earbox.net, dsahern@...il.com
Subject: MPLS encapsulation and arp table overflow

I'm seeing the following repeated error

[  130.821362] neighbour: arp_cache: neighbor table overflow!

when using MPLSoverGRE or MPLSoverUDP tunnels on bits synced
with bpf-next as of this morning. The test script below reliably
reproduces the problem, while working fine on a 4.14 (I haven't
bisected yet). It can be run with no arguments, or specifying
gre or udp for the specific encap type.

It seems that every MPLS-encapsulated outbound packet is attempting
to add  a neighbor entry, and as a result we hit the 
net.ipv4.neigh.default.gc_thresh3 limit quickly.

When this failure occurs, the arp table doesn't show any of
these additional entries. Existing arp table entries are
disappearing too, so perhaps they are being recycled when the
table becomes full?

It's entirely possible that the way I'm setting up the MPLS
tunnels is incorrect, though the fact that the test works fine on
4.14 might suggest otherwise.

An extract from the ftrace function graph when the error is
hit is below:

            ping-17611 [039]  6197.507706: funcgraph_entry:                   
|                          lwtunnel_xmit() {
            ping-17611 [039]  6197.507706: funcgraph_entry:                   
|                            mpls_xmit() {
            ping-17611 [039]  6197.507706: funcgraph_entry:                   
|                              sock_wfree() {
            ping-17611 [039]  6197.507706: funcgraph_entry:        0.137 
us   |                                sock_def_write_space();
            ping-17611 [039]  6197.507707: funcgraph_exit:         0.429 
us   |                              }
            ping-17611 [039]  6197.507707: funcgraph_entry:        0.126 
us   |                              mpls_output_possible();
            ping-17611 [039]  6197.507707: funcgraph_entry:        0.109 
us   |                              mpls_dev_mtu();
            ping-17611 [039]  6197.507708: funcgraph_entry:        0.197 
us   |                              mpls_pkt_too_big();
            ping-17611 [039]  6197.507708: funcgraph_entry:        0.118 
us   |                              skb_push();
            ping-17611 [039]  6197.507708: funcgraph_entry:        0.165 
us   |                              mpls_stats_inc_outucastpkts();
            ping-17611 [039]  6197.507708: funcgraph_entry:                   
|                              neigh_xmit() {
            ping-17611 [039]  6197.507709: funcgraph_entry:        0.115 
us   |                                arp_hash();
            ping-17611 [039]  6197.507709: funcgraph_entry:                   
|                                ___neigh_create() {
            ping-17611 [039]  6197.507709: funcgraph_entry:        0.123 
us   |                                  _raw_write_lock_bh();
            ping-17611 [039]  6197.507709: funcgraph_entry:                   
|                                  _raw_write_unlock_bh() {
            ping-17611 [039]  6197.507709: funcgraph_entry:        0.128 
us   |                                    __local_bh_enable_ip();
            ping-17611 [039]  6197.507710: funcgraph_exit:         0.335 
us   |                                  }
            ping-17611 [039]  6197.507710: funcgraph_entry:                   
|                                  net_ratelimit() {


If there's any additional data I can collect to help narrow
this down, do let me know (I'll try bisecting in the meantime).

Thanks!

Alan

#!/usr/bin/bash

# Test MPLSoverUDP and MPLSoverGRE tunnels.

readonly ns_prefix="ns-$$"
readonly ns1="${ns_prefix}1"
readonly ns2="${ns_prefix}2"

readonly ns1_v4="192.168.1.1"
readonly ns2_v4="192.168.1.2"
readonly tun_prefix="192.168.2"
readonly ns1_tunv4="${tun_prefix}.1"
readonly ns2_tunv4="${tun_prefix}.2"
readonly tun_route="${tun_prefix}.0/24"
readonly label="1000"

cleanup()
{
	ip netns delete $ns1
	ip netns delete $ns2
}

tun_setup()
{
	ns=$1
	encap=$2
	veth=$3
	local_ip=$4
	local_tun=$5
	remote_ip=$6
	remote_tun=$7

	case $encap in
	"udp")
		ip netns exec $ns modprobe fou
		ip netns exec $ns ip fou add port 6635 ipproto 137
		ip netns exec $ns ip link add name tun0 type ipip mode any \
			remote $remote_ip local $local_ip \
			encap fou encap-sport auto encap-dport 6635
		;;
	"gre")
		ip netns exec $ns ip link add tun0 type gre \
			remote $remote_ip local $local_ip	
		;;
	esac

	ip netns exec $ns ip link set lo up
	ip netns exec $ns modprobe mpls_iptunnel
	ip netns exec $ns modprobe mpls_gso
	ip netns exec $ns ip link set tun0 up arp on
	ip netns exec $ns sysctl -qw net.mpls.platform_labels=65536
	ip netns exec $ns ip -f mpls route replace $label dev lo
	ip netns exec $ns sysctl -qw net.ipv4.conf.lo.rp_filter=0
	ip netns exec $ns sysctl -qw net.ipv4.conf.all.rp_filter=0
	ip netns exec $ns sysctl -qw net.mpls.conf.tun0.input=1
	ip netns exec $ns sysctl -qw net.ipv4.conf.tun0.rp_filter=0
	ip netns exec $ns sysctl -qw net.ipv4.conf.${veth}.rp_filter=0
	ip netns exec $ns ip addr add ${local_tun}/24 dev tun0
	ip netns exec $ns ip route change \
		$tun_route encap mpls $label via inet $remote_tun dev tun0
}

setup()
{
	encap=$1
	ip netns add $ns1
	ip netns add $ns2

	ip link add dev veth1 netns $ns1 type veth peer name veth2 netns $ns2

	ip -netns $ns1 link set veth1 up
	ip -netns $ns2 link set veth2 up

	ip -netns $ns1 -4 addr add ${ns1_v4}/24 dev veth1
	ip -netns $ns2 -4 addr add ${ns2_v4}/24 dev veth2

	tun_setup $ns1 $encap veth1 $ns1_v4 $ns1_tunv4 $ns2_v4 $ns2_tunv4
	tun_setup $ns2 $encap veth2 $ns2_v4 $ns2_tunv4 $ns1_v4 $ns1_tunv4
}

set -o errexit

case "$#" in
0)
	encap_types="gre udp"
	;;
1)
	case $1 in
	"gre"|"udp")
		encap_types=$1
		;;
	*)
		echo "unsupported encap type $1"
		exit 1
		;;
	esac
	;;
*)
	echo "Usage: $0"
	echo "   or: $0 <gre|udp>"
	exit 1
	;;
esac


# This is the garbage collection threshold which triggers warnings.
thresh=$(sysctl -qn net.ipv4.neigh.default.gc_thresh3)
thresh=$(expr $thresh + 100)

for encap in $encap_types ; do
	echo "Testing MPLS-over-${encap} tunnel..."
	trap cleanup EXIT
	setup $encap

	echo "Verifying basic connectivity between ${ns1}, ${ns2}"
	ip netns exec $ns1 ping -q -c 1 $ns2_v4
	ip netns exec $ns2 ping -q -c 1 $ns1_v4
	echo "Testing tunnel encap/decap"
	timeout 60 ip netns exec $ns1 ping -q -c $thresh -i 0.01 $ns2_tunv4
	trap - EXIT
	cleanup
	echo "MPLS-over-${encap} tunnel OK"
done

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ