[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <878qlep1u2.ffs@tglx>
Date: Fri, 27 Jun 2025 00:36:37 +0200
From: Thomas Gleixner <tglx@...utronix.de>
To: André Almeida <andrealmeid@...lia.com>, Ingo Molnar
<mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>, Darren Hart <dvhart@...radead.org>,
Davidlohr Bueso <dave@...olabs.net>, Shuah Khan <shuah@...nel.org>, Arnd
Bergmann <arnd@...db.de>, Sebastian Andrzej Siewior
<bigeasy@...utronix.de>, Waiman Long <longman@...hat.com>
Cc: linux-kernel@...r.kernel.org, linux-kselftest@...r.kernel.org,
linux-api@...r.kernel.org, kernel-dev@...lia.com, André
Almeida
<andrealmeid@...lia.com>
Subject: Re: [PATCH v5 2/7] selftests/futex: Create test for robust list
On Thu, Jun 26 2025 at 14:11, André Almeida wrote:
> +
> +int set_robust_list(struct robust_list_head *head, size_t len)
This function and the get() counterpart are global because they can?
> +{
> + return syscall(SYS_set_robust_list, head, len);
> +}
> +/*
> + * Basic lock struct, contains just the futex word and the robust list element
> + * Real implementations have also a *prev to easily walk in the list
> + */
> +struct lock_struct {
> + _Atomic(unsigned int) futex;
> + struct robust_list list;
tabular arrangement please.
> + pthread_barrier_wait(&barrier);
> +
> + /*
> + * There's a race here: the parent thread needs to be inside
> + * futex_wait() before the child thread dies, otherwise it will miss the
> + * wakeup from handle_futex_death() that this child will emit. We wait a
> + * little bit just to make sure that this happens.
> + */
> + sleep(1);
One second is quite a little bit. :)
> + /*
> + * futex_wait() should return 0 and the futex word should be marked with
> + * FUTEX_OWNER_DIED
> + */
> + ASSERT_EQ(ret, 0);
> + if (ret != 0)
> + printf("futex wait returned %d", errno);
What's the purpose of the extra printf() after the assert here? This
code is not even reached when ret != 0, no?
> + ASSERT_TRUE(*futex | FUTEX_OWNER_DIED);
That's always true no matter what the content of the futex variable is, no?
> +/*
> + * The only valid value for len is sizeof(*head)
> + */
> +static void test_set_robust_list_invalid_size(void)
> +{
> + struct robust_list_head head;
> + size_t head_size = sizeof(struct robust_list_head);
Groan. You already define the robust_list_head variable ahead of
head_size and violate the reverse fir tree ordering, so why don't you
use the obvious and actually robust 'sizeof(head)'?
> +/*
> + * Test get_robust_list with pid = 0, getting the list of the running thread
> + */
> +static void test_get_robust_list_self(void)
> +{
> + struct robust_list_head head, head2, *get_head;
> + size_t head_size = sizeof(struct robust_list_head), len_ptr;
Ditto.
> +static int child_list(void *arg)
> +{
> + struct robust_list_head *head = (struct robust_list_head *) arg;
void pointers really don't require type casts
> + int ret;
> +
> + ret = set_robust_list(head, sizeof(struct robust_list_head));
sizeof(*head)
> + if (ret)
> + ksft_test_result_fail("set_robust_list error\n");
> +
> + pthread_barrier_wait(&barrier);
> + pthread_barrier_wait(&barrier2);
Lacks a comment what this waits for
> + return 0;
> +}
> +
> +/*
> + * Test get_robust_list from another thread. We use two barriers here to ensure
> + * that:
> + * 1) the child thread set the list before we try to get it from the
> + * parent
> + * 2) the child thread still alive when we try to get the list from it
> + */
> +static void test_get_robust_list_child(void)
> +{
> + pid_t tid;
> + int ret;
> + struct robust_list_head head, *get_head;
> + size_t len_ptr;
Reverse fir tree ordering please.
> + ret = pthread_barrier_init(&barrier, NULL, 2);
> + ret = pthread_barrier_init(&barrier2, NULL, 2);
> + ASSERT_EQ(ret, 0);
> +
> + tid = create_child(&child_list, &head);
> + ASSERT_NE(tid, -1);
> +
> + pthread_barrier_wait(&barrier);
> +
> + ret = get_robust_list(tid, &get_head, &len_ptr);
> + ASSERT_EQ(ret, 0);
> + ASSERT_EQ(&head, get_head);
> +
> + pthread_barrier_wait(&barrier2);
> +
> + wait(NULL);
> + pthread_barrier_destroy(&barrier);
> + pthread_barrier_destroy(&barrier2);
> +
> + ksft_test_result_pass("%s\n", __func__);
> +}
> +
> +static int child_fn_lock_with_error(void *arg)
> +{
> + struct lock_struct *lock = (struct lock_struct *) arg;
See above
> + struct robust_list_head head;
> + int ret;
> +
> + ret = set_list(&head);
> + if (ret)
> + ksft_test_result_fail("set_robust_list error\n");
So you fail the test and continue to produce more fails or what? Why
does this not use one of these ASSERT thingies or return?
> + ret = mutex_lock(lock, &head, true);
> + if (ret)
> + ksft_test_result_fail("mutex_lock error\n");
> +
> + pthread_barrier_wait(&barrier);
> +
> + sleep(1);
> +
> + return 0;
> +}
> +
> +/*
> + * Same as robustness test, but inject an error where the mutex_lock() exits
> + * earlier, just after setting list_op_pending and taking the lock, to test the
> + * list_op_pending mechanism
> + */
> +static void test_set_list_op_pending(void)
> +{
> + struct lock_struct lock = { .futex = 0 };
> + struct robust_list_head head;
> + _Atomic(unsigned int) *futex = &lock.futex;
> + int ret;
See above
> + ASSERT_EQ(ret, 0);
> + if (ret != 0)
> + printf("futex wait returned %d", errno);
The random insertion of completely pointless printf()'s is stunning.
> + ASSERT_TRUE(*futex | FUTEX_OWNER_DIED);
Yet another always true assert which is happily optimized out by the
compiler.
> + wait(NULL);
> + pthread_barrier_destroy(&barrier);
> +
> + ksft_test_result_pass("%s\n", __func__);
> +}
> +static int child_wait_lock(void *arg)
> +{
> + struct lock_struct *lock = (struct lock_struct *) arg;
> + struct robust_list_head head;
> + int ret;
> +
> + pthread_barrier_wait(&barrier2);
> + ret = mutex_lock(lock, &head, false);
> +
> + if (ret)
> + ksft_test_result_fail("mutex_lock error\n");
> +
> + if (!(lock->futex | FUTEX_OWNER_DIED))
> + ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n");
Now I kinda understand this insanity. The child emits a fail and
exits. Then the parent ...
> + for (i = 0; i < CHILD_NR; i++)
> + create_child(&child_wait_lock, &locks[i]);
> +
> + /* Wait for all children to return */
> + while (wait(NULL) > 0);
> +
> + pthread_barrier_destroy(&barrier);
> + pthread_barrier_destroy(&barrier2);
> +
> + ksft_test_result_pass("%s\n", __func__);
... happily claims that the test passed.
Seriously?
Thread functions have a return value for a reason and wait(2) has a
wstatus argument for the very same reason.
> +static int child_circular_list(void *arg)
> +{
> + static struct robust_list_head head;
> + struct lock_struct a, b, c;
> + int ret;
> +
> + ret = set_list(&head);
> + if (ret)
> + ksft_test_result_fail("set_list error\n");
Yet another instance of the same ....
Thanks,
tglx
Powered by blists - more mailing lists