[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <3B969F90-F51F-4B9D-AB1A-994D9A54D460@gmail.com>
Date: Sat, 15 Nov 2025 05:03:44 +0800
From: Miao Wang <shankerwangmiao@...il.com>
To: netdev@...r.kernel.org
Subject: [Question] Unexpected SO_PEEK_OFF behavior
Hi, all
I learned from the Kernel documents that SO_PEEK_OFF manages an offset for a
socket. When using recv(MSG_PEEK), the returning data should start from the
offset. As stated in the manual, suppose the incoming data for a socket is
aaaabbbb, and the initial SO_PEEK_OFF is 0. Two calls of recv(fd, buf, 4,
MSG_PEEK) will return aaaa and bbbb respectively. However, I noticed that when
the incoming data is supplied in two batches, the second recv() will return in
total all the 8 bytes, instead of 4. As shown below:
Receiver Sender
-------- ------
send(fd, "aaaabbbb", 8)
recv(fd, buf, 4, MSG_PEEK)
Get "aaaa" in buf
recv(fd, buf, 100, MSG_PEEK)
Get "bbbb" in buf
------------------------------------------------
recv(fd, buf, 4, MSG_PEEK)
send(fd, "aaaa", 4)
Get "aaaa" in buf
recv(fd, buf, 100, MSG_PEEK)
send(fd, "bbbb", 4)
Get "aaaabbbb" in buf
I also notice that this only happens to the unix socket. I wonder if it is the
expected behavior? If so, how can one tell that if the returned data from
recv(MSG_PEEK) contains data before SO_PEEK_OFF?
The code used to carry out the test is modified from sk_so_peek_off.c from the
Kernel test suite.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/wait.h>
static void sk_peek_offset_set(int s, int offset)
{
if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset)))
perror("Failed to set SO_PEEK_OFF value");
}
static int sk_peek_offset_get(int s)
{
int offset = -0xbeef;
socklen_t len = sizeof(offset);
if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len))
perror("Failed to get SO_PEEK_OFF value");
return offset;
}
void test(int af, int type, int proto, int do_sleep){
int s[2] = {0, 0};
int r = 0;
int offset;
pid_t sender = -1;
char buf[100];
if (af == AF_UNIX){
r = socketpair(af, type, proto, s);
if (r < 0) {
perror("Temporary socket creation failed");
return;
}
} else {
r = socket(af, type, proto);
if (r < 0) {
perror("Temporary socket creation failed");
return;
}
s[0] = r;
r = socket(af, type, proto);
if (r < 0) {
perror("Temporary socket creation failed");
close(s[0]);
return;
}
s[1] = r;
union {
struct sockaddr sa;
struct sockaddr_in a4;
struct sockaddr_in6 a6;
} addr;
memset(&addr, 0, sizeof(addr));
addr.sa.sa_family = af;
r = bind(s[0], &addr.sa, sizeof(addr));
if (r < 0) {
perror("Socket bind failed");
goto out;
}
r = getsockname(s[0], &addr.sa, &(socklen_t){sizeof(addr)});
if (r < 0) {
perror("getsockname() failed");
goto out;
}
if (proto == IPPROTO_TCP) {
r = listen(s[0], 1);
if (r < 0) {
perror("Socket listen failed");
goto out;
}
}
r = connect(s[1], &addr.sa, sizeof(addr));
if (r < 0) {
perror("Socket connect failed");
goto out;
}
if (proto == IPPROTO_TCP) {
r = accept(s[0], NULL, NULL);
if (r < 0) {
perror("Socket accept failed");
goto out;
}
close(s[0]);
s[0] = r;
}
}
offset = sk_peek_offset_get(s[1]);
if (offset == -0xbeef) {
printf("SO_PEEK_OFF not supported");
goto out;
}
printf("Initial offset: %d\n", offset);
sk_peek_offset_set(s[1], 0);
offset = sk_peek_offset_get(s[1]);
printf("Offset after set to 0: %d\n", offset);
sender = fork();
if (sender == 0) {
/* Transfer a message */
if (do_sleep){
if (send(s[0], (char *)("aaaa"), 4, 0) < 0) {
perror("Temporary probe socket send() failed");
abort();
}
sleep(2);
if (send(s[0], (char *)("bbbb"), 4, 0) < 0) {
perror("Temporary probe socket send() failed");
abort();
}
} else {
if (send(s[0], (char *)("aaaabbbb"), 8, 0) < 0) {
perror("Temporary probe socket send() failed");
abort();
}
}
exit(0);
}
int len = recv(s[1], buf, 4, MSG_PEEK);
if (len < 0) {
perror("recv() failed");
goto out;
}
printf("Read %d bytes: %.*s\n", len, (int)len, buf);
offset = sk_peek_offset_get(s[1]);
printf("Offset after reading first 4 bytes: %d\n", offset);
len = recv(s[1], buf, 100, MSG_PEEK);
if (len < 0) {
perror("recv() failed");
goto out;
}
printf("Read %d bytes: %.*s\n", len, (int)len, buf);
offset = sk_peek_offset_get(s[1]);
printf("Offset after reading all bytes: %d\n", offset);
len = recv(s[1], buf, 100, 0);
if (len < 0) {
perror("recv() failed");
goto out;
}
printf("Flushed %d bytes: %.*s\n", len, (int)len, buf);
offset = sk_peek_offset_get(s[1]);
printf("Offset after flushing all bytes: %d\n", offset);
out:
close(s[0]);
close(s[1]);
if(sender > 0) {
int st;
waitpid(sender, &st, 0);
}
}
int main(void) {
printf("=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, No sleep ===\n");
test(AF_UNIX, SOCK_STREAM, 0, 0);
printf("=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, Sleep ===\n");
test(AF_UNIX, SOCK_STREAM, 0, 1);
printf("=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, No sleep ===\n");
test(AF_INET, SOCK_STREAM, IPPROTO_TCP, 0);
printf("=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, Sleep ===\n");
test(AF_INET, SOCK_STREAM, IPPROTO_TCP, 1);
printf("=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, No sleep ===\n");
test(AF_INET6, SOCK_STREAM, IPPROTO_TCP, 0);
printf("=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, Sleep ===\n");
test(AF_INET6, SOCK_STREAM, IPPROTO_TCP, 1);
return 0;
}
My execution result on 6.12.48 kernel (Debian 6.12.48+deb13-amd64) is:
=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, No sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, Sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 8 bytes: aaaabbbb
Offset after reading all bytes: 12
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 4
=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, No sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, Sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, No sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, Sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
Cheers,
Miao Wang
Powered by blists - more mailing lists