lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <661cb613.7974.18cf30c4a42.Coremail.00107082@163.com>
Date: Wed, 10 Jan 2024 19:05:18 +0800 (CST)
From: "David Wang" <00107082@....com>
To: "Jozsef Kadlecsik" <kadlec@...filter.org>
Cc: ale.crismani@...omattic.com, xiaolinkui@...inos.cn, pablo@...filter.org, 
	linux-kernel@...r.kernel.org, netfilter-devel@...r.kernel.org
Subject: Re: Performance regression in ip_set_swap on 6.1.69



At 2024-01-10 18:35:02, "Jozsef Kadlecsik" <kadlec@...filter.org> wrote:
>On Wed, 10 Jan 2024, David Wang wrote:
>
>> I confirmed this on 6.7 that this was introduced by commit 
>> 28628fa952fefc7f2072ce6e8016968cc452b1ba with following changes:
>> 
>> 	 static inline void
>> 	@@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
>> 		ip_set(inst, to_id) = from;
>> 		write_unlock_bh(&ip_set_ref_lock);
>> 	 
>> 	+       /* Make sure all readers of the old set pointers are completed. */
>> 	+       synchronize_rcu();
>> 	+
>> 		return 0;
>> 	 }
>> 
>> synchronize_rcu causes the delay, and its usage here is very confusing, 
>> there is no reclaimer code after it.
>
>As I'm seeing just the end of the discussion, please send a full report of 
>the problem and how to reproduce it.
>

This was reported in https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ by ale.crismani@...omattic.com
Just out of interest of performance issues,  I tried to reproduce it with a test stressing ipset_swap:

My test code is as following, it would stress swapping ipset 'foo' with 'bar'; (foo/bar ipset needs to be created before the test.)
With latest 6.7, the stress would take about 180 seconds to finish, but with `synchronize_rcu` removed, it only took 3seconds.


```
unsigned char mbuffer[4096];
int main() {
	int err;
	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER);
	if (sock<0) {
		perror("Fail to create socket");
		return 1;
	}
	struct sockaddr_nl addr = {
		.nl_family = AF_NETLINK,
		.nl_pad = 0,
		.nl_pid = 0,
		.nl_groups = 0
	};
	struct sockaddr raddr = {0};
	socklen_t rsize;
	int seq = 0x12345678;
	err = bind(sock, (struct sockaddr*)&addr, sizeof(addr));
	if (err) {
		perror("Fail to bind");
		return 1;
	}
	err = getsockname(sock, &raddr, &rsize);
	if (err) {
		perror("Fail to getsockname");
		return 1;
	}
	unsigned char buf[64];
	struct nlmsghdr *phdr;
	struct nfgenmsg *pnfg;
	struct nlattr *pnla;
	unsigned int total;
	ssize_t rz;
	struct iovec iovs;
	iovs.iov_base = mbuffer;
	iovs.iov_len = sizeof(mbuffer);
	struct msghdr msg = {0};
	msg.msg_name = &addr;
	msg.msg_namelen = sizeof(addr);
	msg.msg_iov = &iovs;
	msg.msg_iovlen = 1;

	memset(buf, 0, sizeof(buf));
	total = 0;
	phdr = (struct nlmsghdr*)(buf+total);
	total += sizeof(struct nlmsghdr);
	phdr->nlmsg_type=NFNL_SUBSYS_IPSET<<8|IPSET_CMD_PROTOCOL;
	phdr->nlmsg_seq = seq;
	phdr->nlmsg_flags = NLM_F_REQUEST;
	pnfg = (struct nfgenmsg*)(buf+total);
	total += sizeof(struct nfgenmsg);
	pnfg->nfgen_family=AF_INET;
    	pnfg->version= NFNETLINK_V0;
	pnfg->res_id=htons(0);
	pnla = (struct nlattr *)(buf+total);
	pnla->nla_len = 5;
	pnla->nla_type = 1;
	buf[total+sizeof(struct nlattr)]=0x06;
	total+=8;
	phdr->nlmsg_len = total;
	rz = sendto(sock, buf, total, 0, (struct sockaddr*)&addr, sizeof(addr));
	rz = recvmsg(sock, &msg, 0);

	pnla = (struct nlattr *)(buf+total);
	pnla->nla_len = 8;
	pnla->nla_type = 2;
	char *p = buf+(total+sizeof(struct nlattr));
	p[0]='f'; p[1]='o'; p[2]='o'; p[3]=0;
	total+=8;
	pnla = (struct nlattr *)(buf+total);
	pnla->nla_len = 8;
	pnla->nla_type = 3;
	p = buf+(total+sizeof(struct nlattr));
	p[0]='b'; p[1]='a'; p[2]='r'; p[3]=0;
	total+=8;
	phdr->nlmsg_type = NFNL_SUBSYS_IPSET<<8|IPSET_CMD_SWAP;
	phdr->nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
	phdr->nlmsg_len = total;

	
	for (int i=0; i<10000; i++) {
		// stress swap foo bar
		phdr->nlmsg_seq++;
		sendto(sock, buf, total, 0, (struct sockaddr*)&addr, sizeof(addr));
		recvmsg(sock, &msg, 0);
	}

	close(sock);
	return 0;
}
```

>Best regards,
>Jozsef

David

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ