netdev - Re: [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aWWQ-ooAmTIEhdHO@chamomile>
Date: Tue, 13 Jan 2026 01:25:30 +0100
From: Pablo Neira Ayuso <pablo@...filter.org>
To: Scott Mitchell <scott.k.mitch1@...il.com>
Cc: kadlec@...filter.org, fw@...len.de, phil@....cc, davem@...emloft.net,
	edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
	horms@...nel.org, netfilter-devel@...r.kernel.org,
	coreteam@...filter.org, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org, syzbot@...kaller.appspotmail.com
Subject: Re: [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup
 with hash table

Hi Scott,

> diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
> index 4aeffddb7586..3d0def310523 100644
> --- a/include/net/netfilter/nf_queue.h
> +++ b/include/net/netfilter/nf_queue.h
> @@ -11,6 +11,7 @@
>  /* Each queued (to userspace) skbuff has one of these. */
>  struct nf_queue_entry {
>  	struct list_head	list;
> +	struct hlist_node	hash_node;
>  	struct sk_buff		*skb;
>  	unsigned int		id;
>  	unsigned int		hook_index;	/* index in hook_entries->hook[] */
> diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
> index efcb7c044a74..bc296a17e5aa 100644
> --- a/include/uapi/linux/netfilter/nfnetlink_queue.h
> +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
> @@ -107,6 +107,7 @@ enum nfqnl_attr_config {
>  	NFQA_CFG_QUEUE_MAXLEN,		/* __u32 */
>  	NFQA_CFG_MASK,			/* identify which flags to change */
>  	NFQA_CFG_FLAGS,			/* value of these flags (__u32) */
> +	NFQA_CFG_HASH_SIZE,		/* __u32 hash table size (rounded to power of 2) */

This should use the rhashtable implementation, I don't find a good
reason why this is not used in first place for this enhancement.

>  	__NFQA_CFG_MAX
>  };
>  #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
> diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> index 8b7b39d8a109..b142fac70ed9 100644
> --- a/net/netfilter/nfnetlink_queue.c
> +++ b/net/netfilter/nfnetlink_queue.c
> @@ -46,7 +46,10 @@
>  #include <net/netfilter/nf_conntrack.h>
>  #endif
>  
> -#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_QMAX_DEFAULT      1024
> +#define NFQNL_MIN_HASH_SIZE     16
> +#define NFQNL_DEFAULT_HASH_SIZE 1024
> +#define NFQNL_MAX_HASH_SIZE     131072
>  
>  /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
>   * includes the header length. Thus, the maximum packet length that we
> @@ -65,6 +68,7 @@ struct nfqnl_instance {
>  	unsigned int copy_range;
>  	unsigned int queue_dropped;
>  	unsigned int queue_user_dropped;
> +	unsigned int queue_hash_size;
>  
>  
>  	u_int16_t queue_num;			/* number of this queue */
> @@ -77,6 +81,8 @@ struct nfqnl_instance {
>  	spinlock_t	lock	____cacheline_aligned_in_smp;
>  	unsigned int	queue_total;
>  	unsigned int	id_sequence;		/* 'sequence' of pkt ids */
> +	unsigned int	queue_hash_mask;
> +	struct hlist_head *queue_hash;
>  	struct list_head queue_list;		/* packets in queue */
>  };
>  
> @@ -95,6 +101,39 @@ static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
>  	return net_generic(net, nfnl_queue_net_id);
>  }
>  
> +static inline unsigned int
> +nfqnl_packet_hash(u32 id, unsigned int mask)
> +{
> +	return id & mask;
> +}
> +
> +static inline u32
> +nfqnl_normalize_hash_size(u32 hash_size)
> +{
> +	/* Must be power of two for queue_hash_mask to work correctly.
> +	 * Avoid overflow of is_power_of_2 by bounding NFQNL_MAX_HASH_SIZE.
> +	 */
> +	BUILD_BUG_ON(!is_power_of_2(NFQNL_MIN_HASH_SIZE) ||
> +		     !is_power_of_2(NFQNL_DEFAULT_HASH_SIZE) ||
> +		     !is_power_of_2(NFQNL_MAX_HASH_SIZE) ||
> +		     NFQNL_MAX_HASH_SIZE > 1U << 31);
> +
> +	if (!hash_size)
> +		return NFQNL_DEFAULT_HASH_SIZE;
> +
> +	/* Clamp to valid range before power of two to avoid overflow */
> +	if (hash_size <= NFQNL_MIN_HASH_SIZE)
> +		return NFQNL_MIN_HASH_SIZE;
> +
> +	if (hash_size >= NFQNL_MAX_HASH_SIZE)
> +		return NFQNL_MAX_HASH_SIZE;
> +
> +	if (!is_power_of_2(hash_size))
> +		hash_size = roundup_pow_of_two(hash_size);
> +
> +	return hash_size;
> +}
> +
>  static inline u_int8_t instance_hashfn(u_int16_t queue_num)
>  {
>  	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
> @@ -114,13 +153,70 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
>  	return NULL;
>  }
>  
> +static int
> +nfqnl_hash_resize(struct nfqnl_instance *inst, u32 hash_size)

rhashtable can just handle this for you, then users do not need
to tune this hash_size parameter.

> +{
> +	struct hlist_head *new_hash, *old_hash;
> +	struct nf_queue_entry *entry;
> +	unsigned int h, hash_mask;
> +
> +	hash_size = nfqnl_normalize_hash_size(hash_size);
> +	if (hash_size == inst->queue_hash_size)
> +		return 0;
> +
> +	/* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> +	 * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> +	 */
> +	new_hash = kvmalloc_array(hash_size, sizeof(*new_hash), GFP_ATOMIC);
> +	if (!new_hash)
> +		return -ENOMEM;
> +
> +	hash_mask = hash_size - 1;
> +
> +	for (h = 0; h < hash_size; h++)
> +		INIT_HLIST_HEAD(&new_hash[h]);
> +
> +	spin_lock_bh(&inst->lock);
> +
> +	list_for_each_entry(entry, &inst->queue_list, list) {
> +		/* No hlist_del() since old_hash will be freed and we hold lock */
> +		h = nfqnl_packet_hash(entry->id, hash_mask);
> +		hlist_add_head(&entry->hash_node, &new_hash[h]);
> +	}
> +
> +	old_hash = inst->queue_hash;
> +	inst->queue_hash_size = hash_size;
> +	inst->queue_hash_mask = hash_mask;
> +	inst->queue_hash = new_hash;
> +
> +	spin_unlock_bh(&inst->lock);
> +
> +	kvfree(old_hash);
> +
> +	return 0;
> +}
> +
>  static struct nfqnl_instance *
> -instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
> +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid,
> +		u32 hash_size)
>  {
>  	struct nfqnl_instance *inst;
> +	struct hlist_head *queue_hash;
>  	unsigned int h;
>  	int err;
>  
> +	hash_size = nfqnl_normalize_hash_size(hash_size);
> +
> +	/* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> +	 * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> +	 */
> +	queue_hash = kvmalloc_array(hash_size, sizeof(*queue_hash), GFP_ATOMIC);

If rhashtable is used, this can be allocate perns and then you avoid
this GFP_ATOMIC for each instance.

> +	if (!queue_hash)
> +		return ERR_PTR(-ENOMEM);
> +
> +	for (h = 0; h < hash_size; h++)
> +		INIT_HLIST_HEAD(&queue_hash[h]);
> +
>  	spin_lock(&q->instances_lock);
>  	if (instance_lookup(q, queue_num)) {
>  		err = -EEXIST;