[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aWWQ-ooAmTIEhdHO@chamomile>
Date: Tue, 13 Jan 2026 01:25:30 +0100
From: Pablo Neira Ayuso <pablo@...filter.org>
To: Scott Mitchell <scott.k.mitch1@...il.com>
Cc: kadlec@...filter.org, fw@...len.de, phil@....cc, davem@...emloft.net,
edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
horms@...nel.org, netfilter-devel@...r.kernel.org,
coreteam@...filter.org, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, syzbot@...kaller.appspotmail.com
Subject: Re: [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup
with hash table
Hi Scott,
> diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
> index 4aeffddb7586..3d0def310523 100644
> --- a/include/net/netfilter/nf_queue.h
> +++ b/include/net/netfilter/nf_queue.h
> @@ -11,6 +11,7 @@
> /* Each queued (to userspace) skbuff has one of these. */
> struct nf_queue_entry {
> struct list_head list;
> + struct hlist_node hash_node;
> struct sk_buff *skb;
> unsigned int id;
> unsigned int hook_index; /* index in hook_entries->hook[] */
> diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
> index efcb7c044a74..bc296a17e5aa 100644
> --- a/include/uapi/linux/netfilter/nfnetlink_queue.h
> +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
> @@ -107,6 +107,7 @@ enum nfqnl_attr_config {
> NFQA_CFG_QUEUE_MAXLEN, /* __u32 */
> NFQA_CFG_MASK, /* identify which flags to change */
> NFQA_CFG_FLAGS, /* value of these flags (__u32) */
> + NFQA_CFG_HASH_SIZE, /* __u32 hash table size (rounded to power of 2) */
This should use the rhashtable implementation, I don't find a good
reason why this is not used in first place for this enhancement.
> __NFQA_CFG_MAX
> };
> #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
> diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> index 8b7b39d8a109..b142fac70ed9 100644
> --- a/net/netfilter/nfnetlink_queue.c
> +++ b/net/netfilter/nfnetlink_queue.c
> @@ -46,7 +46,10 @@
> #include <net/netfilter/nf_conntrack.h>
> #endif
>
> -#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_MIN_HASH_SIZE 16
> +#define NFQNL_DEFAULT_HASH_SIZE 1024
> +#define NFQNL_MAX_HASH_SIZE 131072
>
> /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
> * includes the header length. Thus, the maximum packet length that we
> @@ -65,6 +68,7 @@ struct nfqnl_instance {
> unsigned int copy_range;
> unsigned int queue_dropped;
> unsigned int queue_user_dropped;
> + unsigned int queue_hash_size;
>
>
> u_int16_t queue_num; /* number of this queue */
> @@ -77,6 +81,8 @@ struct nfqnl_instance {
> spinlock_t lock ____cacheline_aligned_in_smp;
> unsigned int queue_total;
> unsigned int id_sequence; /* 'sequence' of pkt ids */
> + unsigned int queue_hash_mask;
> + struct hlist_head *queue_hash;
> struct list_head queue_list; /* packets in queue */
> };
>
> @@ -95,6 +101,39 @@ static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
> return net_generic(net, nfnl_queue_net_id);
> }
>
> +static inline unsigned int
> +nfqnl_packet_hash(u32 id, unsigned int mask)
> +{
> + return id & mask;
> +}
> +
> +static inline u32
> +nfqnl_normalize_hash_size(u32 hash_size)
> +{
> + /* Must be power of two for queue_hash_mask to work correctly.
> + * Avoid overflow of is_power_of_2 by bounding NFQNL_MAX_HASH_SIZE.
> + */
> + BUILD_BUG_ON(!is_power_of_2(NFQNL_MIN_HASH_SIZE) ||
> + !is_power_of_2(NFQNL_DEFAULT_HASH_SIZE) ||
> + !is_power_of_2(NFQNL_MAX_HASH_SIZE) ||
> + NFQNL_MAX_HASH_SIZE > 1U << 31);
> +
> + if (!hash_size)
> + return NFQNL_DEFAULT_HASH_SIZE;
> +
> + /* Clamp to valid range before power of two to avoid overflow */
> + if (hash_size <= NFQNL_MIN_HASH_SIZE)
> + return NFQNL_MIN_HASH_SIZE;
> +
> + if (hash_size >= NFQNL_MAX_HASH_SIZE)
> + return NFQNL_MAX_HASH_SIZE;
> +
> + if (!is_power_of_2(hash_size))
> + hash_size = roundup_pow_of_two(hash_size);
> +
> + return hash_size;
> +}
> +
> static inline u_int8_t instance_hashfn(u_int16_t queue_num)
> {
> return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
> @@ -114,13 +153,70 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
> return NULL;
> }
>
> +static int
> +nfqnl_hash_resize(struct nfqnl_instance *inst, u32 hash_size)
rhashtable can just handle this for you, then users do not need
to tune this hash_size parameter.
> +{
> + struct hlist_head *new_hash, *old_hash;
> + struct nf_queue_entry *entry;
> + unsigned int h, hash_mask;
> +
> + hash_size = nfqnl_normalize_hash_size(hash_size);
> + if (hash_size == inst->queue_hash_size)
> + return 0;
> +
> + /* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> + * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> + */
> + new_hash = kvmalloc_array(hash_size, sizeof(*new_hash), GFP_ATOMIC);
> + if (!new_hash)
> + return -ENOMEM;
> +
> + hash_mask = hash_size - 1;
> +
> + for (h = 0; h < hash_size; h++)
> + INIT_HLIST_HEAD(&new_hash[h]);
> +
> + spin_lock_bh(&inst->lock);
> +
> + list_for_each_entry(entry, &inst->queue_list, list) {
> + /* No hlist_del() since old_hash will be freed and we hold lock */
> + h = nfqnl_packet_hash(entry->id, hash_mask);
> + hlist_add_head(&entry->hash_node, &new_hash[h]);
> + }
> +
> + old_hash = inst->queue_hash;
> + inst->queue_hash_size = hash_size;
> + inst->queue_hash_mask = hash_mask;
> + inst->queue_hash = new_hash;
> +
> + spin_unlock_bh(&inst->lock);
> +
> + kvfree(old_hash);
> +
> + return 0;
> +}
> +
> static struct nfqnl_instance *
> -instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
> +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid,
> + u32 hash_size)
> {
> struct nfqnl_instance *inst;
> + struct hlist_head *queue_hash;
> unsigned int h;
> int err;
>
> + hash_size = nfqnl_normalize_hash_size(hash_size);
> +
> + /* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> + * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> + */
> + queue_hash = kvmalloc_array(hash_size, sizeof(*queue_hash), GFP_ATOMIC);
If rhashtable is used, this can be allocate perns and then you avoid
this GFP_ATOMIC for each instance.
> + if (!queue_hash)
> + return ERR_PTR(-ENOMEM);
> +
> + for (h = 0; h < hash_size; h++)
> + INIT_HLIST_HEAD(&queue_hash[h]);
> +
> spin_lock(&q->instances_lock);
> if (instance_lookup(q, queue_num)) {
> err = -EEXIST;
Powered by blists - more mailing lists