lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 27 Sep 2022 02:15:33 +0000
From:   "liujian (CE)" <liujian56@...wei.com>
To:     John Fastabend <john.fastabend@...il.com>,
        Cong Wang <xiyou.wangcong@...il.com>
CC:     Jakub Sitnicki <jakub@...udflare.com>,
        Eric Dumazet <edumazet@...gle.com>,
        davem <davem@...emloft.net>,
        "yoshfuji@...ux-ipv6.org" <yoshfuji@...ux-ipv6.org>,
        "dsahern@...nel.org" <dsahern@...nel.org>,
        "Jakub Kicinski" <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        netdev <netdev@...r.kernel.org>,
        "bpf@...r.kernel.org" <bpf@...r.kernel.org>
Subject: RE: [bug report] one possible out-of-order issue in sockmap



> -----Original Message-----
> From: John Fastabend [mailto:john.fastabend@...il.com]
> Sent: Tuesday, September 27, 2022 5:16 AM
> To: liujian (CE) <liujian56@...wei.com>; Cong Wang
> <xiyou.wangcong@...il.com>
> Cc: John Fastabend <john.fastabend@...il.com>; Jakub Sitnicki
> <jakub@...udflare.com>; Eric Dumazet <edumazet@...gle.com>; davem
> <davem@...emloft.net>; yoshfuji@...ux-ipv6.org; dsahern@...nel.org;
> Jakub Kicinski <kuba@...nel.org>; Paolo Abeni <pabeni@...hat.com>;
> netdev <netdev@...r.kernel.org>; bpf@...r.kernel.org
> Subject: RE: [bug report] one possible out-of-order issue in sockmap
> 
> liujian (CE) wrote:
> >
> >
> > > -----Original Message-----
> > > From: Cong Wang [mailto:xiyou.wangcong@...il.com]
> > > Sent: Monday, September 26, 2022 2:26 AM
> > > To: liujian (CE) <liujian56@...wei.com>
> > > Cc: John Fastabend <john.fastabend@...il.com>; Jakub Sitnicki
> > > <jakub@...udflare.com>; Eric Dumazet <edumazet@...gle.com>;
> davem
> > > <davem@...emloft.net>; yoshfuji@...ux-ipv6.org; dsahern@...nel.org;
> > > Jakub Kicinski <kuba@...nel.org>; Paolo Abeni <pabeni@...hat.com>;
> > > netdev <netdev@...r.kernel.org>; bpf@...r.kernel.org
> > > Subject: Re: [bug report] one possible out-of-order issue in sockmap
> > >
> > > On Sat, Sep 24, 2022 at 07:59:15AM +0000, liujian (CE) wrote:
> > > > Hello,
> > > >
> > > > I had a scp failure problem here. I analyze the code, and the
> > > > reasons may
> > > be as follows:
> > > >
> > > > From commit e7a5f1f1cd00 ("bpf/sockmap: Read psock ingress_msg
> > > before
> > > > sk_receive_queue", if we use sockops
> > > > (BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB
> > > > and BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) to enable socket's
> > > sockmap
> > > > function, and don't enable strparse and verdict function, the
> > > > out-of-order problem may occur in the following process.
> > > >
> > > > client SK                                   server SK
> > > > ------------------------------------------------------------------
> > > > ----
> > > > ----
> > > > tcp_rcv_synsent_state_process
> > > >   tcp_finish_connect
> > > >     tcp_init_transfer
> > > >       tcp_set_state(sk, TCP_ESTABLISHED);
> > > >       // insert SK to sockmap
> > > >     wake up waitter
> > > >     tcp_send_ack
> > > >
> > > > tcp_bpf_sendmsg(msgA)
> > > > // msgA will go tcp stack
> > > >                                             tcp_rcv_state_process
> > > >                                               tcp_init_transfer
> > > >                                                 //insert SK to sockmap
> > > >                                               tcp_set_state(sk,
> > > >                                                      TCP_ESTABLISHED)
> > > >                                               wake up waitter
> > >
> > > Here after the socket is inserted to a sockmap, its
> > > ->sk_data_ready() is already replaced with
> > > sk_psock_verdict_data_ready(), so msgA should go to sockmap, not TCP
> stack?
> > >
> > It is TCP stack.  Here I only enable BPF_SK_MSG_VERDICT type.
> > bpftool prog load bpf_redir.o /sys/fs/bpf/bpf_redir map name
> > sock_ops_map pinned /sys/fs/bpf/sock_ops_map bpftool prog attach
> > pinned /sys/fs/bpf/bpf_redir msg_verdict pinned
> > /sys/fs/bpf/sock_ops_map
> 
> Is the sender using FAST_OPEN by any chance? We know this bug exists in
> this case. Fix tbd.

FAST_OPEN is not used.
The following test cases can be used to reproduce the OOO problem. 
But the worst-case scenario described in the problem (the arrival of msgA is later than the arrival of msgB), I have not been able to construct an inevitable case.

tcp_server.c

int server_port = 5006;
int main(int argc, char *argv[])
{
	int serverSocket;
	struct sockaddr_in server_addr;
	struct sockaddr_in clientAddr;
	int addr_len = sizeof(clientAddr);
	int client;
	char buffer[200];
	int iDataNum;
	int optbuf, ret;

	if (argc != 2) {
		return -1;
	}

	server_port =  atoi(argv[1]);
	if( server_port<1025 || server_port>65535 )
	{
		return -1;
	}

	if((serverSocket = socket(AF_INET, SOCK_STREAM, 0)) < 0)
	{
		perror("socket");
		return 1;
	}
	optbuf = 1;
	ret = setsockopt(serverSocket, SOL_SOCKET, SO_REUSEADDR, &optbuf, sizeof(int));
	if (ret != 0)
		perror("reuseaddr failed");
	bzero(&server_addr, sizeof(server_addr));
	server_addr.sin_family = AF_INET;
	server_addr.sin_port = htons(server_port);
	server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
	if(bind(serverSocket, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0)
	{
		perror("connect");
		return 1;
	}
	if(listen(serverSocket, 5) < 0)
	{
		perror("listen");
		return 1;
	}
	while(1)
	{
		client = accept(serverSocket, (struct sockaddr*)&clientAddr, (socklen_t*)&addr_len);
		if(client < 0)
		{
			perror("accept");
			continue;
		}
		printf("wait until the two msgs of client are sent...\n");
		sleep(5);
		while(1)
		{
			printf("recvmsg:");
			buffer[0] = '\0';
			iDataNum = recv(client, buffer, 1024, 0);
			if(iDataNum < 0)
			{
				perror("recv null");
				continue;
			}
			buffer[iDataNum] = '\0';
			printf("%s\n", buffer);
			sleep(2);
		}
	}
	close(serverSocket);
	return 0;
}



tcp_client.c

int server_port = 5006;
int main(int argc, char *argv[])
{
	int clientSocket;
	struct sockaddr_in serverAddr;
	struct sockaddr_in clientAddr;
	char sendbuf[4096];
	char recvbuf[4096];
	int iDataNum;
	int ret;
	int client_port;

	if (argc != 3) {
		printf("client [sport] [dport]\n");
		return -1;
	}

	client_port =  atoi(argv[1]);
	if(client_port<1025 || client_port>65535 )
	{
		return -1;
	}

	server_port =  atoi(argv[2]);
	if( server_port<1025 || server_port>65535 )
	{
		return -1;
	}

	if((clientSocket = socket(AF_INET, SOCK_STREAM, 0)) < 0)
	{
		perror("socket");
		return 1;
	}
	bzero(&clientAddr, sizeof(clientAddr));
	clientAddr.sin_family = AF_INET;
	clientAddr.sin_port = htons(client_port);
	clientAddr.sin_addr.s_addr = htonl(INADDR_ANY);
	if(bind(clientSocket, (struct sockaddr *)&clientAddr, sizeof(clientAddr)) < 0)
	{
		perror("bind");
		return 1;
	}
	bzero(&serverAddr, sizeof(serverAddr));
	serverAddr.sin_family = AF_INET;
	serverAddr.sin_port = htons(server_port);
	serverAddr.sin_addr.s_addr = inet_addr("127.0.0.1");
	system("iptables -A INPUT -p tcp -m tcp --dport 5006 --tcp-flags SYN,RST,ACK,FIN ACK -j DROP");
	if(connect(clientSocket, (struct sockaddr *)&serverAddr, sizeof(serverAddr)) < 0)
	{
		perror("connect");
		return 1;
	}

	memset(sendbuf, 0, sizeof(sendbuf));
	memcpy(sendbuf, "AAAAAAAAAAA", 10); 
	ret = send(clientSocket, sendbuf, strlen(sendbuf), 0);
	if (ret <= 0) {
		perror("send fail\n");
		return -1;
	}
	printf("finish send A\n");
	system("iptables -D INPUT -p tcp -m tcp --dport 5006 --tcp-flags SYN,RST,ACK,FIN ACK -j DROP");
	sleep(2); // wait serversk insert to sockmap
	printf("start send b\n");
	memcpy(sendbuf, "bbbbbbbbbbbbb", 10); 
	ret = send(clientSocket, sendbuf, strlen(sendbuf), 0);
	if (ret <= 0) {
		perror("send fail\n");
		return -1;
	}

	sleep(10);
	close(clientSocket);
	return 0;
}

[root@...alhost sockmap_test]# ./server 5006
wait until the two msgs of client are sent...
recvmsg:bbbbbbbbbb
recvmsg:AAAAAAAAAA
^C

Powered by blists - more mailing lists