[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87pm48m19m.ffs@tglx>
Date: Mon, 31 Jul 2023 19:36:21 +0200
From: Thomas Gleixner <tglx@...utronix.de>
To: Peter Zijlstra <peterz@...radead.org>, axboe@...nel.dk
Cc: linux-kernel@...r.kernel.org, peterz@...radead.org,
mingo@...hat.com, dvhart@...radead.org, dave@...olabs.net,
andrealmeid@...lia.com, Andrew Morton <akpm@...ux-foundation.org>,
urezki@...il.com, hch@...radead.org, lstoakes@...il.com,
Arnd Bergmann <arnd@...db.de>, linux-api@...r.kernel.org,
linux-mm@...ck.org, linux-arch@...r.kernel.org,
malteskarupke@....de
Subject: Re: [PATCH v1 11/14] futex: Implement FUTEX2_NUMA
On Fri, Jul 21 2023 at 12:22, Peter Zijlstra wrote:
> struct futex_hash_bucket *futex_hash(union futex_key *key)
> {
> - u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
> + u32 hash = jhash2((u32 *)key,
> + offsetof(typeof(*key), both.offset) / sizeof(u32),
> key->both.offset);
> + int node = key->both.node;
>
> - return &futex_queues[hash & (futex_hashsize - 1)];
> + if (node == -1) {
> + /*
> + * In case of !FLAGS_NUMA, use some unused hash bits to pick a
> + * node -- this ensures regular futexes are interleaved across
> + * the nodes and avoids having to allocate multiple
> + * hash-tables.
> + *
> + * NOTE: this isn't perfectly uniform, but it is fast and
> + * handles sparse node masks.
> + */
> + node = (hash >> futex_hashshift) % nr_node_ids;
Is nr_node_ids guaranteed to be stable after init? It's marked
__read_mostly, but not __ro_after_init.
> + if (!node_possible(node)) {
> + node = find_next_bit_wrap(node_possible_map.bits,
> + nr_node_ids, node);
> + }
> + }
> +
> + return &futex_queues[node][hash & (futex_hashsize - 1)];
> }
> fshared = flags & FLAGS_SHARED;
> + size = futex_size(flags);
>
> /*
> * The futex address must be "naturally" aligned.
> */
> key->both.offset = address % PAGE_SIZE;
> - if (unlikely((address % sizeof(u32)) != 0))
> + if (unlikely((address % size) != 0))
> return -EINVAL;
Hmm. Shouldn't that have changed with the allowance of the 1 and 2 byte
futexes?
> address -= key->both.offset;
>
> - if (unlikely(!access_ok(uaddr, sizeof(u32))))
> + if (flags & FLAGS_NUMA)
> + size *= 2;
> +
> + if (unlikely(!access_ok(uaddr, size)))
> return -EFAULT;
>
> if (unlikely(should_fail_futex(fshared)))
> return -EFAULT;
>
> + key->both.node = -1;
Please put this into an else path.
> + if (flags & FLAGS_NUMA) {
> + void __user *naddr = uaddr + size/2;
size / 2;
> +
> + if (futex_get_value(&node, naddr, flags))
> + return -EFAULT;
> +
> + if (node == -1) {
> + node = numa_node_id();
> + if (futex_put_value(node, naddr, flags))
> + return -EFAULT;
> + }
> +
> + if (node >= MAX_NUMNODES || !node_possible(node))
> + return -EINVAL;
That's clearly an else path too. No point in checking whether
numa_node_id() is valid.
> + key->both.node = node;
> + }
>
> +static inline unsigned int futex_size(unsigned int flags)
> +{
> + unsigned int size = flags & FLAGS_SIZE_MASK;
> + return 1 << size; /* {0,1,2,3} -> {1,2,4,8} */
> +}
> +
> static inline bool futex_flags_valid(unsigned int flags)
> {
> /* Only 64bit futexes for 64bit code */
> @@ -77,13 +83,19 @@ static inline bool futex_flags_valid(uns
> if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
> return false;
>
> - return true;
> -}
> + /*
> + * Must be able to represent both NUMA_NO_NODE and every valid nodeid
> + * in a futex word.
> + */
> + if (flags & FLAGS_NUMA) {
> + int bits = 8 * futex_size(flags);
> + u64 max = ~0ULL;
> + max >>= 64 - bits;
Your newline key is broken, right?
> + if (nr_node_ids >= max)
> + return false;
> + }
Thanks,
tglx
Powered by blists - more mailing lists