lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250227211229.GD25639@redhat.com>
Date: Thu, 27 Feb 2025 22:12:29 +0100
From: Oleg Nesterov <oleg@...hat.com>
To: "Sapkal, Swapnil" <swapnil.sapkal@....com>
Cc: Mateusz Guzik <mjguzik@...il.com>,
	Manfred Spraul <manfred@...orfullife.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Christian Brauner <brauner@...nel.org>,
	David Howells <dhowells@...hat.com>,
	WangYuli <wangyuli@...ontech.com>, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	K Prateek Nayak <kprateek.nayak@....com>,
	"Shenoy, Gautham Ranjal" <gautham.shenoy@....com>,
	Neeraj.Upadhyay@....com
Subject: Re: [PATCH] pipe_read: don't wake up the writer if the pipe is still
 full

Sapkal, first of all, thanks again!

On 02/27, Sapkal, Swapnil wrote:
>
> >1. with 1 fd instead of 20:
> >
> >/usr/bin/hackbench -g 16 -f 1 --threads --pipe -l 100000 -s 100
>
> With this I was not able to reproduce the issue. I tried almost 5000
> iterations.

OK,

> >2. with a size which divides 4096 evenly (e.g., 128):
...
> When I retain the number of
> groups to 16 and change the message size to 128, it took me around 150
> iterations to reproduce this issue (with 100 bytes it was 20 iterations).
> The exact command was
>
> /usr/bin/hackbench -g 16 -f 20 --threads --pipe -l 100000 -s 128

Ah, good. This is good ;)

> I will try to sprinkle some trace_printk's in the code where the state of
> the pipe changes. I will report here if I find something.

Great! but...

Sapkal, I was going to finish (and test! ;) the patch below tomorrow, after
you test the previous debugging patch I sent in this thread. But since you
are going to change the kernel...

For the moment, please forget about that (as Mateusz pointed buggy) patch.
Could you apply the patch below and reproduce the problem ?

If yes, please do prctl(666) after the hang and send us the output from
dmesg, between "DUMP START" and "DUMP END". You can just do

	$ perl -e 'syscall 157,666'

to call prctl(666) and trigger the dump.

Oleg.
---

diff --git a/fs/pipe.c b/fs/pipe.c
index b0641f75b1ba..566c75a0ff81 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -376,6 +376,8 @@ anon_pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	}
 	if (pipe_empty(pipe->head, pipe->tail))
 		wake_next_reader = false;
+	if (ret > 0)
+		pipe->r_cnt++;
 	mutex_unlock(&pipe->mutex);
 
 	if (wake_writer)
@@ -565,6 +567,8 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
 out:
 	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
 		wake_next_writer = false;
+	if (ret > 0)
+		pipe->w_cnt++;
 	mutex_unlock(&pipe->mutex);
 
 	/*
@@ -695,6 +699,42 @@ pipe_poll(struct file *filp, poll_table *wait)
 	return mask;
 }
 
+static DEFINE_MUTEX(PI_MUTEX);
+static LIST_HEAD(PI_LIST);
+
+void pi_dump(void);
+void pi_dump(void)
+{
+	struct pipe_inode_info *pipe;
+
+	pr_crit("---------- DUMP START ----------\n");
+	mutex_lock(&PI_MUTEX);
+	list_for_each_entry(pipe, &PI_LIST, pi_list) {
+		unsigned head, tail;
+
+		mutex_lock(&pipe->mutex);
+		head = pipe->head;
+		tail = pipe->tail;
+		pr_crit("E=%d F=%d; W=%d R=%d\n",
+			pipe_empty(head, tail), pipe_full(head, tail, pipe->max_usage),
+			pipe->w_cnt, pipe->r_cnt);
+
+// INCOMPLETE
+pr_crit("RD=%d WR=%d\n", waitqueue_active(&pipe->rd_wait), waitqueue_active(&pipe->wr_wait));
+
+		for (; tail < head; tail++) {
+			struct pipe_buffer *buf = pipe_buf(pipe, tail);
+			WARN_ON(buf->ops != &anon_pipe_buf_ops);
+			pr_crit("buf: o=%d l=%d\n", buf->offset, buf->len);
+		}
+		pr_crit("\n");
+
+		mutex_unlock(&pipe->mutex);
+	}
+	mutex_unlock(&PI_MUTEX);
+	pr_crit("---------- DUMP END ------------\n");
+}
+
 static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
 {
 	int kill = 0;
@@ -706,8 +746,14 @@ static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
 	}
 	spin_unlock(&inode->i_lock);
 
-	if (kill)
+	if (kill) {
+		if (!list_empty(&pipe->pi_list)) {
+			mutex_lock(&PI_MUTEX);
+			list_del_init(&pipe->pi_list);
+			mutex_unlock(&PI_MUTEX);
+		}
 		free_pipe_info(pipe);
+	}
 }
 
 static int
@@ -790,6 +836,13 @@ struct pipe_inode_info *alloc_pipe_info(void)
 	if (pipe == NULL)
 		goto out_free_uid;
 
+	INIT_LIST_HEAD(&pipe->pi_list);
+	if (!strcmp(current->comm, "hackbench")) {
+		mutex_lock(&PI_MUTEX);
+		list_add_tail(&pipe->pi_list, &PI_LIST);
+		mutex_unlock(&PI_MUTEX);
+	}
+
 	if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
 		pipe_bufs = max_size >> PAGE_SHIFT;
 
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8ff23bf5a819..48d9bf5171dc 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -80,6 +80,9 @@ struct pipe_inode_info {
 #ifdef CONFIG_WATCH_QUEUE
 	struct watch_queue *watch_queue;
 #endif
+
+	struct list_head pi_list;
+	unsigned w_cnt, r_cnt;
 };
 
 /*
diff --git a/kernel/sys.c b/kernel/sys.c
index 4efca8a97d62..a85e34861b2e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2483,6 +2483,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 
 	error = 0;
 	switch (option) {
+	case 666: {
+		extern void pi_dump(void);
+		pi_dump();
+		break;
+	}
 	case PR_SET_PDEATHSIG:
 		if (!valid_signal(arg2)) {
 			error = -EINVAL;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ