[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0dc1f0f9a8064ec3abd12bdcb069aaaf@huawei.com>
Date: Tue, 27 Sep 2022 02:15:33 +0000
From: "liujian (CE)" <liujian56@...wei.com>
To: John Fastabend <john.fastabend@...il.com>,
Cong Wang <xiyou.wangcong@...il.com>
CC: Jakub Sitnicki <jakub@...udflare.com>,
Eric Dumazet <edumazet@...gle.com>,
davem <davem@...emloft.net>,
"yoshfuji@...ux-ipv6.org" <yoshfuji@...ux-ipv6.org>,
"dsahern@...nel.org" <dsahern@...nel.org>,
"Jakub Kicinski" <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
netdev <netdev@...r.kernel.org>,
"bpf@...r.kernel.org" <bpf@...r.kernel.org>
Subject: RE: [bug report] one possible out-of-order issue in sockmap
> -----Original Message-----
> From: John Fastabend [mailto:john.fastabend@...il.com]
> Sent: Tuesday, September 27, 2022 5:16 AM
> To: liujian (CE) <liujian56@...wei.com>; Cong Wang
> <xiyou.wangcong@...il.com>
> Cc: John Fastabend <john.fastabend@...il.com>; Jakub Sitnicki
> <jakub@...udflare.com>; Eric Dumazet <edumazet@...gle.com>; davem
> <davem@...emloft.net>; yoshfuji@...ux-ipv6.org; dsahern@...nel.org;
> Jakub Kicinski <kuba@...nel.org>; Paolo Abeni <pabeni@...hat.com>;
> netdev <netdev@...r.kernel.org>; bpf@...r.kernel.org
> Subject: RE: [bug report] one possible out-of-order issue in sockmap
>
> liujian (CE) wrote:
> >
> >
> > > -----Original Message-----
> > > From: Cong Wang [mailto:xiyou.wangcong@...il.com]
> > > Sent: Monday, September 26, 2022 2:26 AM
> > > To: liujian (CE) <liujian56@...wei.com>
> > > Cc: John Fastabend <john.fastabend@...il.com>; Jakub Sitnicki
> > > <jakub@...udflare.com>; Eric Dumazet <edumazet@...gle.com>;
> davem
> > > <davem@...emloft.net>; yoshfuji@...ux-ipv6.org; dsahern@...nel.org;
> > > Jakub Kicinski <kuba@...nel.org>; Paolo Abeni <pabeni@...hat.com>;
> > > netdev <netdev@...r.kernel.org>; bpf@...r.kernel.org
> > > Subject: Re: [bug report] one possible out-of-order issue in sockmap
> > >
> > > On Sat, Sep 24, 2022 at 07:59:15AM +0000, liujian (CE) wrote:
> > > > Hello,
> > > >
> > > > I had a scp failure problem here. I analyze the code, and the
> > > > reasons may
> > > be as follows:
> > > >
> > > > From commit e7a5f1f1cd00 ("bpf/sockmap: Read psock ingress_msg
> > > before
> > > > sk_receive_queue", if we use sockops
> > > > (BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB
> > > > and BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) to enable socket's
> > > sockmap
> > > > function, and don't enable strparse and verdict function, the
> > > > out-of-order problem may occur in the following process.
> > > >
> > > > client SK server SK
> > > > ------------------------------------------------------------------
> > > > ----
> > > > ----
> > > > tcp_rcv_synsent_state_process
> > > > tcp_finish_connect
> > > > tcp_init_transfer
> > > > tcp_set_state(sk, TCP_ESTABLISHED);
> > > > // insert SK to sockmap
> > > > wake up waitter
> > > > tcp_send_ack
> > > >
> > > > tcp_bpf_sendmsg(msgA)
> > > > // msgA will go tcp stack
> > > > tcp_rcv_state_process
> > > > tcp_init_transfer
> > > > //insert SK to sockmap
> > > > tcp_set_state(sk,
> > > > TCP_ESTABLISHED)
> > > > wake up waitter
> > >
> > > Here after the socket is inserted to a sockmap, its
> > > ->sk_data_ready() is already replaced with
> > > sk_psock_verdict_data_ready(), so msgA should go to sockmap, not TCP
> stack?
> > >
> > It is TCP stack. Here I only enable BPF_SK_MSG_VERDICT type.
> > bpftool prog load bpf_redir.o /sys/fs/bpf/bpf_redir map name
> > sock_ops_map pinned /sys/fs/bpf/sock_ops_map bpftool prog attach
> > pinned /sys/fs/bpf/bpf_redir msg_verdict pinned
> > /sys/fs/bpf/sock_ops_map
>
> Is the sender using FAST_OPEN by any chance? We know this bug exists in
> this case. Fix tbd.
FAST_OPEN is not used.
The following test cases can be used to reproduce the OOO problem.
But the worst-case scenario described in the problem (the arrival of msgA is later than the arrival of msgB), I have not been able to construct an inevitable case.
tcp_server.c
int server_port = 5006;
int main(int argc, char *argv[])
{
int serverSocket;
struct sockaddr_in server_addr;
struct sockaddr_in clientAddr;
int addr_len = sizeof(clientAddr);
int client;
char buffer[200];
int iDataNum;
int optbuf, ret;
if (argc != 2) {
return -1;
}
server_port = atoi(argv[1]);
if( server_port<1025 || server_port>65535 )
{
return -1;
}
if((serverSocket = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
perror("socket");
return 1;
}
optbuf = 1;
ret = setsockopt(serverSocket, SOL_SOCKET, SO_REUSEADDR, &optbuf, sizeof(int));
if (ret != 0)
perror("reuseaddr failed");
bzero(&server_addr, sizeof(server_addr));
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(server_port);
server_addr.sin_addr.s_addr = htonl(INADDR_ANY);
if(bind(serverSocket, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0)
{
perror("connect");
return 1;
}
if(listen(serverSocket, 5) < 0)
{
perror("listen");
return 1;
}
while(1)
{
client = accept(serverSocket, (struct sockaddr*)&clientAddr, (socklen_t*)&addr_len);
if(client < 0)
{
perror("accept");
continue;
}
printf("wait until the two msgs of client are sent...\n");
sleep(5);
while(1)
{
printf("recvmsg:");
buffer[0] = '\0';
iDataNum = recv(client, buffer, 1024, 0);
if(iDataNum < 0)
{
perror("recv null");
continue;
}
buffer[iDataNum] = '\0';
printf("%s\n", buffer);
sleep(2);
}
}
close(serverSocket);
return 0;
}
tcp_client.c
int server_port = 5006;
int main(int argc, char *argv[])
{
int clientSocket;
struct sockaddr_in serverAddr;
struct sockaddr_in clientAddr;
char sendbuf[4096];
char recvbuf[4096];
int iDataNum;
int ret;
int client_port;
if (argc != 3) {
printf("client [sport] [dport]\n");
return -1;
}
client_port = atoi(argv[1]);
if(client_port<1025 || client_port>65535 )
{
return -1;
}
server_port = atoi(argv[2]);
if( server_port<1025 || server_port>65535 )
{
return -1;
}
if((clientSocket = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
perror("socket");
return 1;
}
bzero(&clientAddr, sizeof(clientAddr));
clientAddr.sin_family = AF_INET;
clientAddr.sin_port = htons(client_port);
clientAddr.sin_addr.s_addr = htonl(INADDR_ANY);
if(bind(clientSocket, (struct sockaddr *)&clientAddr, sizeof(clientAddr)) < 0)
{
perror("bind");
return 1;
}
bzero(&serverAddr, sizeof(serverAddr));
serverAddr.sin_family = AF_INET;
serverAddr.sin_port = htons(server_port);
serverAddr.sin_addr.s_addr = inet_addr("127.0.0.1");
system("iptables -A INPUT -p tcp -m tcp --dport 5006 --tcp-flags SYN,RST,ACK,FIN ACK -j DROP");
if(connect(clientSocket, (struct sockaddr *)&serverAddr, sizeof(serverAddr)) < 0)
{
perror("connect");
return 1;
}
memset(sendbuf, 0, sizeof(sendbuf));
memcpy(sendbuf, "AAAAAAAAAAA", 10);
ret = send(clientSocket, sendbuf, strlen(sendbuf), 0);
if (ret <= 0) {
perror("send fail\n");
return -1;
}
printf("finish send A\n");
system("iptables -D INPUT -p tcp -m tcp --dport 5006 --tcp-flags SYN,RST,ACK,FIN ACK -j DROP");
sleep(2); // wait serversk insert to sockmap
printf("start send b\n");
memcpy(sendbuf, "bbbbbbbbbbbbb", 10);
ret = send(clientSocket, sendbuf, strlen(sendbuf), 0);
if (ret <= 0) {
perror("send fail\n");
return -1;
}
sleep(10);
close(clientSocket);
return 0;
}
[root@...alhost sockmap_test]# ./server 5006
wait until the two msgs of client are sent...
recvmsg:bbbbbbbbbb
recvmsg:AAAAAAAAAA
^C
Powered by blists - more mailing lists