lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <3B969F90-F51F-4B9D-AB1A-994D9A54D460@gmail.com>
Date: Sat, 15 Nov 2025 05:03:44 +0800
From: Miao Wang <shankerwangmiao@...il.com>
To: netdev@...r.kernel.org
Subject: [Question] Unexpected SO_PEEK_OFF behavior

Hi, all

I learned from the Kernel documents that SO_PEEK_OFF manages an offset for a
socket. When using recv(MSG_PEEK), the returning data should start from the
offset. As stated in the manual, suppose the incoming data for a socket is
aaaabbbb, and the initial SO_PEEK_OFF is 0. Two calls of recv(fd, buf, 4, 
MSG_PEEK) will return aaaa and bbbb respectively. However, I noticed that when 
the incoming data is supplied in two batches, the second recv() will return in 
total all the 8 bytes, instead of 4. As shown below:

Receiver                     Sender
--------                     ------
                             send(fd, "aaaabbbb", 8)
recv(fd, buf, 4, MSG_PEEK)
Get "aaaa" in buf
recv(fd, buf, 100, MSG_PEEK)
Get "bbbb" in buf
------------------------------------------------
recv(fd, buf, 4, MSG_PEEK)
                             send(fd, "aaaa", 4)
Get "aaaa" in buf
recv(fd, buf, 100, MSG_PEEK)
                             send(fd, "bbbb", 4)
Get "aaaabbbb" in buf


I also notice that this only happens to the unix socket. I wonder if it is the
expected behavior? If so, how can one tell that if the returned data from
recv(MSG_PEEK) contains data before SO_PEEK_OFF?

The code used to carry out the test is modified from sk_so_peek_off.c from the
Kernel test suite.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/wait.h>

static void sk_peek_offset_set(int s, int offset)
{
        if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset)))
                perror("Failed to set SO_PEEK_OFF value");
}

static int sk_peek_offset_get(int s)
{
        int offset = -0xbeef;
        socklen_t len = sizeof(offset);

        if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len))
                perror("Failed to get SO_PEEK_OFF value");
        return offset;
}

void test(int af, int type, int proto, int do_sleep){
        int s[2] = {0, 0};
        int r = 0;
        int offset;
        pid_t sender = -1;
        char buf[100];
        if (af == AF_UNIX){
                r = socketpair(af, type, proto, s);
                if (r < 0) {
                        perror("Temporary socket creation failed");
                        return;
                }
        } else {
                r = socket(af, type, proto);
                if (r < 0) {
                        perror("Temporary socket creation failed");
                        return;
                }
                s[0] = r;
                r = socket(af, type, proto);
                if (r < 0) {
                        perror("Temporary socket creation failed");
                        close(s[0]);
                        return;
                }
                s[1] = r;
                union {
                        struct sockaddr sa;
                        struct sockaddr_in a4;
                        struct sockaddr_in6 a6;
                } addr;
                memset(&addr, 0, sizeof(addr));
                addr.sa.sa_family = af;
                r = bind(s[0], &addr.sa, sizeof(addr));
                if (r < 0) {
                        perror("Socket bind failed");
                        goto out;
                }
                r = getsockname(s[0], &addr.sa, &(socklen_t){sizeof(addr)});
                if (r < 0) {
                        perror("getsockname() failed");
                        goto out;
                }
                if (proto == IPPROTO_TCP) {
                        r = listen(s[0], 1);
                        if (r < 0) {
                                perror("Socket listen failed");
                                goto out;
                        }
                }
                r = connect(s[1], &addr.sa, sizeof(addr));
                if (r < 0) {
                        perror("Socket connect failed");
                        goto out;
                }
                if (proto == IPPROTO_TCP) {
                        r = accept(s[0], NULL, NULL);
                        if (r < 0) {
                                perror("Socket accept failed");
                                goto out;
                        }
                        close(s[0]);
                        s[0] = r;
                }
        }
        offset = sk_peek_offset_get(s[1]);
        if (offset == -0xbeef) {
                printf("SO_PEEK_OFF not supported");
                goto out;
        }
        printf("Initial offset: %d\n", offset);
        sk_peek_offset_set(s[1], 0);
        offset = sk_peek_offset_get(s[1]);
        printf("Offset after set to 0: %d\n", offset);
        sender = fork();
        if (sender == 0) {
                /* Transfer a message */
                if (do_sleep){
                        if (send(s[0], (char *)("aaaa"), 4, 0) < 0) {
                                perror("Temporary probe socket send() failed");
                                abort();
                        }
                        sleep(2);
                        if (send(s[0], (char *)("bbbb"), 4, 0) < 0) {
                                perror("Temporary probe socket send() failed");
                                abort();
                        }
                } else {
                        if (send(s[0], (char *)("aaaabbbb"), 8, 0) < 0) {
                                perror("Temporary probe socket send() failed");
                                abort();
                        }
                }
                exit(0);
        }
        int len = recv(s[1], buf, 4, MSG_PEEK);
        if (len < 0) {
                perror("recv() failed");
                goto out;
        }
        printf("Read %d bytes: %.*s\n", len, (int)len, buf);
        offset = sk_peek_offset_get(s[1]);
        printf("Offset after reading first 4 bytes: %d\n", offset);
        len = recv(s[1], buf, 100, MSG_PEEK);
        if (len < 0) {
                perror("recv() failed");
                goto out;
        }
        printf("Read %d bytes: %.*s\n", len, (int)len, buf);
        offset = sk_peek_offset_get(s[1]);
        printf("Offset after reading all bytes: %d\n", offset);
        len = recv(s[1], buf, 100, 0);
        if (len < 0) {
                perror("recv() failed");
                goto out;
        }
        printf("Flushed %d bytes: %.*s\n", len, (int)len, buf);
        offset = sk_peek_offset_get(s[1]);
        printf("Offset after flushing all bytes: %d\n", offset);
out:
        close(s[0]);
        close(s[1]);
        if(sender > 0) {
                int st;
                waitpid(sender, &st, 0);
        }
}

int main(void) {
        printf("=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, No sleep ===\n");
        test(AF_UNIX, SOCK_STREAM, 0, 0);
        printf("=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, Sleep ===\n");
        test(AF_UNIX, SOCK_STREAM, 0, 1);
        printf("=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, No sleep ===\n");
        test(AF_INET, SOCK_STREAM, IPPROTO_TCP, 0);
        printf("=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, Sleep ===\n");
        test(AF_INET, SOCK_STREAM, IPPROTO_TCP, 1);
        printf("=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, No sleep ===\n");
        test(AF_INET6, SOCK_STREAM, IPPROTO_TCP, 0);
        printf("=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, Sleep ===\n");
        test(AF_INET6, SOCK_STREAM, IPPROTO_TCP, 1);
        return 0;
}

My execution result on 6.12.48 kernel (Debian 6.12.48+deb13-amd64) is:

=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, No sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_UNIX, SOCK_STREAM, Sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 8 bytes: aaaabbbb
Offset after reading all bytes: 12
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 4
=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, No sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_INET, SOCK_STREAM, IPPROTO_TCP, Sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, No sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0
=== Test SO_PEEK_OFF with AF_INET6, SOCK_STREAM, IPPROTO_TCP, Sleep ===
Initial offset: -1
Offset after set to 0: 0
Read 4 bytes: aaaa
Offset after reading first 4 bytes: 4
Read 4 bytes: bbbb
Offset after reading all bytes: 8
Flushed 8 bytes: aaaabbbb
Offset after flushing all bytes: 0


Cheers,
Miao Wang

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ