lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 13 Jun 2022 12:14:46 +0900
From:   Benjamin Poirier <bpoirier@...dia.com>
To:     Mike Manning <mvrmanning@...il.com>
Cc:     Netdev <netdev@...r.kernel.org>, David Ahern <dsahern@...il.com>,
        Saikrishna Arcot <sarcot@...rosoft.com>,
        Craig Gallek <kraig@...gle.com>
Subject: Re: [PATCH] net: prefer socket bound to interface when not in VRF

On 2021-10-05 14:03 +0100, Mike Manning wrote:
[...]
> 
> Fixes: 6da5b0f027a8 ("net: ensure unbound datagram socket to be chosen when not in a VRF")
> Fixes: e78190581aff ("net: ensure unbound stream socket to be chosen when not in a VRF")
> Signed-off-by: Mike Manning <mmanning@...tta.att-mail.com>
> ---
> 
> diff nettest-baseline-9e9fb7655ed5.txt nettest-fix.txt
> 955,956c955,956
> < TEST: IPv4 TCP connection over VRF with SNAT                                  [FAIL]
> < TEST: IPv6 TCP connection over VRF with SNAT                                  [FAIL]
> ---
> > TEST: IPv4 TCP connection over VRF with SNAT                                  [ OK ]
> > TEST: IPv6 TCP connection over VRF with SNAT                                  [ OK ]
> 958,959c958,959
> < Tests passed: 713
> < Tests failed:   5
> ---
> > Tests passed: 715
> > Tests failed:   3
> 
> ---
>  net/ipv4/inet_hashtables.c  | 4 +++-
>  net/ipv4/udp.c              | 3 ++-
>  net/ipv6/inet6_hashtables.c | 2 +-
>  net/ipv6/udp.c              | 3 ++-
>  4 files changed, 8 insertions(+), 4 deletions(-)
> 

Hi Mike,

I was looking at this commit, 8d6c414cd2fb ("net: prefer socket bound to
interface when not in VRF"), and I get the feeling that it is only
partially effective. It works with UDP connected sockets but it doesn't
work for TCP and UDP unconnected sockets.

The compute_score() functions are a bit misleading. Because of the
reuseport shortcut in their callers (inet_lhash2_lookup() and the like),
the first socket with score > 0 may be chosen, not necessarily the
socket with highest score. In order to prefer certain sockets, I think
an approach like commit d894ba18d4e4 ("soreuseport: fix ordering for
mixed v4/v6 sockets") would be needed. What do you think?

Extra info:
1) fcnal-test.sh results

I tried to reproduce the fcnal-test.sh test results quoted above but in
my case the test cases already pass at 8d6c414cd2fb^ and 9e9fb7655ed5.
Moreover I believe those test cases don't have multiple listening
sockets. So that just added to my confusion.

Running 9e9fb7655ed5,
root@...d:/src/linux/tools/testing/selftests/net# ./fcnal-test.sh -t use_cases
[...]

#################################################################
SNAT on VRF

TEST: IPv4 TCP connection over VRF with SNAT                                  [ OK ]
TEST: IPv6 TCP connection over VRF with SNAT                                  [ OK ]

Tests passed:  16
Tests failed:   0


2) reuseport_bindtodevice test

I wrote a selftest based on
tools/testing/selftests/net/reuseport_addr_any.c It tests that listening
sockets that have SO_BINDTODEVICE set are preferred over ones that do
not. All of the sockets have SO_REUSEPORT set. I ran it over a few
relevant revisions:

               IPv4                       IPv6
HEAD           TCP  UDP unconn  UDP conn  TCP  UDP unconn  UDP conn
6a5ef90c58da^  ✔    ✔           ✔         ✔    ✔           ✔   
6a5ef90c58da   ✔    ✘           ✔         ✔    ✘           ✔   
fd1914b2901b   ✘    ✘           ✔         ✘    ✘           ✔   
7e225619e8af   ✘    ✘           ✘         ✘    ✘           ✘   
8d6c414cd2fb   ✘    ✘           ✔         ✘    ✘           ✔   

✔ pass
✘ fail

reuseport_bindtodevice.c:

// SPDX-License-Identifier: GPL-2.0

/* Test that listening sockets that have SO_BINDTODEVICE set are preferred
 * over ones that do not. All of the sockets have SO_REUSEPORT set.
 */

#define _GNU_SOURCE

#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>


static const int SEND_PORT = 8888;
static const int RECV_PORT = 8889;

static const char *get_family_name(int domain)
{
	if (domain == AF_INET)
		return "IPv4";
	else if (domain == AF_INET6)
		return "IPv6";
	else
		error(1, 0, "Unknown address family \"%d\"", domain);

	return "";
}

static void build_rcv_fd(int domain, int type, int *rcv_fds, int count,
			 const char *ifname, bool do_connect)
{
	struct sockaddr_storage saddr, daddr;
	int opt, i;

	if (domain == AF_INET) {
		struct sockaddr_in *saddr4 = (struct sockaddr_in *)&saddr,
				   *daddr4 = (struct sockaddr_in *)&daddr;

		saddr4->sin_family = AF_INET;
		saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
		saddr4->sin_port = htons(RECV_PORT);

		daddr4->sin_family = AF_INET;
		daddr4->sin_addr.s_addr = htonl(INADDR_ANY);
		daddr4->sin_port = htons(SEND_PORT);
	} else if (domain == AF_INET6) {
		struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)&saddr,
				    *daddr6 = (struct sockaddr_in6 *)&daddr;

		saddr6->sin6_family = AF_INET6;
		saddr6->sin6_addr = in6addr_any;
		saddr6->sin6_port = htons(RECV_PORT);

		daddr6->sin6_family = AF_INET6;
		daddr6->sin6_addr = in6addr_any;
		daddr6->sin6_port = htons(SEND_PORT);
	} else {
		error(1, 0, "Unsupported family %d", domain);
	}

	for (i = 0; i < count; ++i) {
		rcv_fds[i] = socket(domain, type, 0);
		if (rcv_fds[i] < 0)
			error(1, errno, "failed to create receive socket");

		opt = 1;
		if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
			       sizeof(opt)))
			error(1, errno, "failed to set SO_REUSEPORT");

		if (ifname && setsockopt(rcv_fds[i], SOL_SOCKET,
					 SO_BINDTODEVICE, ifname,
					 strlen(ifname)))
			error(1, errno, "failed to set SO_BINDTODEVICE");

		if (bind(rcv_fds[i], (struct sockaddr *)&saddr, sizeof(saddr)))
			error(1, errno, "failed to bind receive socket");

		if (do_connect &&
		    connect(rcv_fds[i], (struct sockaddr *)&daddr,
			    sizeof(daddr)))
			error(1, errno, "failed to connect receive socket");

		if (type == SOCK_STREAM && listen(rcv_fds[i], 10))
			error(1, errno, "failed to listen on receive socket");
	}
}

static int connect_and_send(int domain, int type)
{
	struct sockaddr_storage saddr, daddr;
	int fd;

	if (domain == AF_INET) {
		struct sockaddr_in *saddr4 = (struct sockaddr_in *)&saddr,
				   *daddr4 = (struct sockaddr_in *)&daddr;

		saddr4->sin_family = AF_INET;
		saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
		saddr4->sin_port = htons(SEND_PORT);

		daddr4->sin_family = AF_INET;
		daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
		daddr4->sin_port = htons(RECV_PORT);
	} else if (domain == AF_INET6) {
		struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)&saddr,
				    *daddr6 = (struct sockaddr_in6 *)&daddr;

		saddr6->sin6_family = AF_INET6;
		saddr6->sin6_addr = in6addr_any;
		saddr6->sin6_port = htons(SEND_PORT);

		daddr6->sin6_family = AF_INET6;
		daddr6->sin6_addr = in6addr_loopback;
		daddr6->sin6_port = htons(RECV_PORT);
	} else {
		error(1, 0, "Unsupported family %d", domain);
	}

	fd = socket(domain, type, 0);
	if (fd < 0)
		error(1, errno, "failed to create send socket");

	if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
		error(1, errno, "failed to bind send socket");

	if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
		error(1, errno, "failed to connect send socket");

	if (send(fd, "a", 1, 0) < 0)
		error(1, errno, "failed to send message");

	return fd;
}

static int receive_once(int epfd, int type)
{
	struct epoll_event ev;
	int i, fd;
	char buf[8];

	i = epoll_wait(epfd, &ev, 1, 3);
	if (i < 0)
		error(1, errno, "epoll_wait failed");
	else if (i == 0)
		error(1, errno, "no socket is ready");

	if (type == SOCK_STREAM) {
		fd = accept(ev.data.fd, NULL, NULL);
		if (fd < 0)
			error(1, errno, "failed to accept");
		i = recv(fd, buf, sizeof(buf), 0);
		close(fd);
	} else {
		i = recv(ev.data.fd, buf, sizeof(buf), 0);
	}

	if (i < 0)
		error(1, errno, "failed to recv");

	return ev.data.fd;
}

static int test(int *rcv_fds, int count, int domain, int type, int fd)
{
	int epfd, i, send_fd, recv_fd;
	struct epoll_event ev;

	epfd = epoll_create(1);
	if (epfd < 0)
		error(1, errno, "failed to create epoll");

	ev.events = EPOLLIN;
	for (i = 0; i < count; ++i) {
		ev.data.fd = rcv_fds[i];
		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
			error(1, errno, "failed to register sock epoll");
	}

	send_fd = connect_and_send(domain, type);

	recv_fd = receive_once(epfd, type);

	close(send_fd);
	close(epfd);

	return recv_fd == fd;
}

static int run_one_test(int domain, int type, bool do_connect)
{
	/* Below we test that a socket listening with SO_BINDTODEVICE set is
	 * always selected in preference over a socket listening without. Bugs
	 * where this is not the case often result in sockets created first or
	 * last to get picked. So below we make sure that there are always
	 * sockets with SO_BINDTODEVICE created before and after a specific
	 * socket is created.
	 */
	int rcv_fds[10], i, result;

	build_rcv_fd(AF_INET, type, rcv_fds, 2, NULL, do_connect);
	build_rcv_fd(AF_INET6, type, rcv_fds + 2, 2, NULL, do_connect);
	build_rcv_fd(domain, type, rcv_fds + 4, 1, "lo", do_connect);
	build_rcv_fd(AF_INET, type, rcv_fds + 5, 2, NULL, do_connect);
	build_rcv_fd(AF_INET6, type, rcv_fds + 7, 2, NULL, do_connect);
	result = test(rcv_fds, 9, domain, type, rcv_fds[4]);
	for (i = 0; i < 9; ++i)
		close(rcv_fds[i]);
	if (result)
		fprintf(stderr, "pass\n");
	else
		fprintf(stderr, "fail\n");
	return result;
}

static int test_family(int domain)
{
	int result = 1;

	fprintf(stderr, "%s TCP ... ", get_family_name(domain));
	result &= run_one_test(domain, SOCK_STREAM, false);

	fprintf(stderr, "%s UDP unconnected ... ", get_family_name(domain));
	result &= run_one_test(domain, SOCK_DGRAM, false);

	fprintf(stderr, "%s UDP connected ... ", get_family_name(domain));
	result &= run_one_test(domain, SOCK_DGRAM, true);

	return result;
}

int main(void)
{
	int result = 1;

	result &= test_family(AF_INET);
	result &= test_family(AF_INET6);

	if (result) {
		fprintf(stderr, "SUCCESS\n");
		return 0;
	}
	fprintf(stderr, "FAIL\n");
	return 1;
}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ