lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171219072231.055bcc9d@xeon-e3>
Date:   Tue, 19 Dec 2017 07:22:31 -0800
From:   Stephen Hemminger <stephen@...workplumber.org>
To:     Chris Mi <chrism@...lanox.com>
Cc:     netdev@...r.kernel.org, gerlitz.or@...il.com
Subject: Re: [patch iproute2] tc: add -bs option for batch mode

On Tue, 19 Dec 2017 15:33:46 +0900
Chris Mi <chrism@...lanox.com> wrote:

> Currently in tc batch mode, only one command is read from the batch
> file and sent to kernel to process. With this patch, we can accumulate
> several commands before sending to kernel. The batch size is specified
> using option -bs or -batchsize.
> 
> To accumulate the commands in tc, we allocate an array of struct iovec.
> If batchsize is bigger than 1 and we haven't accumulated enough
> commands, rtnl_talk() will return without actually sending the message.
> One exception is that there is no more command in the batch file.
> 
> But please note that kernel still processes the requests one by one.
> To process the requests in parallel in kernel is another effort.
> The time we're saving in this patch is the user mode and kernel mode
> context switch. So this patch works on top of the current kernel.
> 
> Using the following script in kernel, we can generate 1,000,000 rules.
> 	tools/testing/selftests/tc-testing/tdc_batch.py
> 
> Without this patch, 'tc -b $file' exection time is:
> 
> real    0m14.916s
> user    0m6.808s
> sys     0m8.046s
> 
> With this patch, 'tc -b $file -bs 10' exection time is:
> 
> real    0m12.286s
> user    0m5.903s
> sys     0m6.312s
> 
> The insertion rate is improved more than 10%.
> 
> Signed-off-by: Chris Mi <chrism@...lanox.com>
> ---
>  include/libnetlink.h | 27 ++++++++++++++++
>  include/utils.h      |  8 +++++
>  lib/libnetlink.c     | 30 +++++++++++++-----
>  lib/utils.c          | 20 ++++++++++++
>  man/man8/tc.8        |  5 +++
>  tc/m_action.c        | 63 ++++++++++++++++++++++++++++---------
>  tc/tc.c              | 27 ++++++++++++++--
>  tc/tc_common.h       |  3 ++
>  tc/tc_filter.c       | 89 ++++++++++++++++++++++++++++++++++++----------------
>  9 files changed, 221 insertions(+), 51 deletions(-)

In addition to my earlier comments, these are the implementation
issues.

> 
> diff --git a/include/libnetlink.h b/include/libnetlink.h
> index a4d83b9e..07e88c94 100644
> --- a/include/libnetlink.h
> +++ b/include/libnetlink.h
> @@ -13,6 +13,8 @@
>  #include <linux/netconf.h>
>  #include <arpa/inet.h>
>  
> +#define MSG_IOV_MAX 256
> +
>  struct rtnl_handle {
>  	int			fd;
>  	struct sockaddr_nl	local;
> @@ -93,6 +95,31 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
>  			void *arg, __u16 nc_flags);
>  #define rtnl_dump_filter(rth, filter, arg) \
>  	rtnl_dump_filter_nc(rth, filter, arg, 0)
> +
> +extern int msg_iov_index;
> +static inline int get_msg_iov_index(void)
> +{
> +	return msg_iov_index;
> +}
> +static inline void set_msg_iov_index(int index)
> +{
> +	msg_iov_index = index;
> +}
> +static inline void incr_msg_iov_index(void)
> +{
> +	++msg_iov_index;
> +}
> +
> +extern int batch_size;
> +static inline int get_batch_size(void)
> +{
> +	return batch_size;
> +}
> +static inline void set_batch_size(int size)
> +{
> +	batch_size = size;
> +}

Iproute2 is C not C++; no accessors for every variable. 


>  int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
>  	      struct nlmsghdr **answer)
>  	__attribute__((warn_unused_result));
> diff --git a/include/utils.h b/include/utils.h
> index d3895d56..66cb4747 100644
> --- a/include/utils.h
> +++ b/include/utils.h
> @@ -235,6 +235,14 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
>  
>  extern int cmdlineno;
>  ssize_t getcmdline(char **line, size_t *len, FILE *in);
> +
> +extern int cmdlinetotal;
> +static inline int getcmdlinetotal(void)
> +{
> +	return cmdlinetotal;
> +}
> +void setcmdlinetotal(const char *name);
> +
>  int makeargs(char *line, char *argv[], int maxargs);
>  int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6);
>  
> diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> index 00e6ce0c..f9be1c6d 100644
> --- a/lib/libnetlink.c
> +++ b/lib/libnetlink.c
> @@ -24,6 +24,7 @@
>  #include <sys/uio.h>
>  
>  #include "libnetlink.h"
> +#include "utils.h"
>  
>  #ifndef SOL_NETLINK
>  #define SOL_NETLINK 270
> @@ -581,6 +582,10 @@ static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
>  		strerror(-err->error));
>  }
>  
> +static struct iovec msg_iov[MSG_IOV_MAX];
> +int msg_iov_index;
> +int batch_size = 1;
> +
>  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
>  		       struct nlmsghdr **answer,
>  		       bool show_rtnl_err, nl_ext_ack_fn_t errfn)
> @@ -589,23 +594,34 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
>  	unsigned int seq;
>  	struct nlmsghdr *h;
>  	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
> -	struct iovec iov = {
> -		.iov_base = n,
> -		.iov_len = n->nlmsg_len
> -	};
> +	struct iovec *iov = &msg_iov[get_msg_iov_index()];
> +	int index;
> +	char *buf;
> +
> +	iov->iov_base = n;
> +	iov->iov_len = n->nlmsg_len;
> +
> +	incr_msg_iov_index();
>  	struct msghdr msg = {
>  		.msg_name = &nladdr,
>  		.msg_namelen = sizeof(nladdr),
> -		.msg_iov = &iov,
> -		.msg_iovlen = 1,
> +		.msg_iov = msg_iov,
> +		.msg_iovlen = get_msg_iov_index(),
>  	};
> -	char *buf;
>  
>  	n->nlmsg_seq = seq = ++rtnl->seq;
>  
>  	if (answer == NULL)
>  		n->nlmsg_flags |= NLM_F_ACK;

Your real performance win is just not asking for ACK for every rule.
If you switched to pure streaming mode, then the batching wouldn't
be necessary.

>  
> +	index = get_msg_iov_index() % get_batch_size();
> +	if (index != 0 && get_batch_size() > 1 &&
> +	    cmdlineno != getcmdlinetotal() &&
> +	    (n->nlmsg_type == RTM_NEWTFILTER ||
> +	    n->nlmsg_type == RTM_NEWACTION))
> +		return 3;
> +	set_msg_iov_index(index);
> +
>  	status = sendmsg(rtnl->fd, &msg, 0);
>  	if (status < 0) {
>  		perror("Cannot talk to rtnetlink");
> diff --git a/lib/utils.c b/lib/utils.c
> index 7ced8c06..53ca389f 100644
> --- a/lib/utils.c
> +++ b/lib/utils.c
> @@ -1202,6 +1202,26 @@ ssize_t getcmdline(char **linep, size_t *lenp, FILE *in)
>  	return cc;
>  }
>  
> +int cmdlinetotal;
> +
> +void setcmdlinetotal(const char *name)
> +{
> +	char *line = NULL;
> +	size_t len = 0;
> +
> +	if (name && strcmp(name, "-") != 0) {
> +		if (freopen(name, "r", stdin) == NULL) {
> +			fprintf(stderr, "Cannot open file \"%s\" for reading: %s\n",
> +				name, strerror(errno));
> +			return;
> +		}
> +	}
> +
> +	cmdlinetotal = 0;
> +	while (getcmdline(&line, &len, stdin) != -1)
> +		cmdlinetotal++;
> +}
> +
>  /* split command line into argument vector */
>  int makeargs(char *line, char *argv[], int maxargs)
>  {
> diff --git a/man/man8/tc.8 b/man/man8/tc.8
> index ff071b33..de137e16 100644
> --- a/man/man8/tc.8
> +++ b/man/man8/tc.8
> @@ -601,6 +601,11 @@ must exist already.
>  read commands from provided file or standard input and invoke them.
>  First failure will cause termination of tc.
>  
> +.TP
> +.BR "\-bs", " \-bs size", " \-batchsize", " \-batchsize size"
> +How many commands are accumulated before sending to kernel.
> +By default, it is 1. It only takes effect in batch mode.
> +
>  .TP
>  .BR "\-force"
>  don't terminate tc on errors in batch mode.
> diff --git a/tc/m_action.c b/tc/m_action.c
> index 13f942bf..574b78ca 100644
> --- a/tc/m_action.c
> +++ b/tc/m_action.c
> @@ -23,6 +23,7 @@
>  #include <arpa/inet.h>
>  #include <string.h>
>  #include <dlfcn.h>
> +#include <errno.h>
>  
>  #include "utils.h"
>  #include "tc_common.h"
> @@ -546,33 +547,65 @@ bad_val:
>  	return ret;
>  }
>  
> +typedef struct {
> +	struct nlmsghdr		n;
> +	struct tcamsg		t;
> +	char			buf[MAX_MSG];
> +} tc_action_req;
> +
> +static tc_action_req *action_reqs;
> +
> +void free_action_reqs(void)
> +{
> +	if (action_reqs)
> +		free(action_reqs);
> +}
> +
> +static tc_action_req *get_action_req(void)
> +{
> +	tc_action_req *req;
> +
> +	if (action_reqs == NULL) {
> +		action_reqs = malloc(get_batch_size() * sizeof (tc_action_req));
> +		if (action_reqs == NULL)
> +			return NULL;
> +	}
> +	req = &action_reqs[get_msg_iov_index()];
> +	memset(req, 0, sizeof (*req));
> +
> +	return req;
> +}
> +
>  static int tc_action_modify(int cmd, unsigned int flags,
>  			    int *argc_p, char ***argv_p)
>  {
>  	int argc = *argc_p;
>  	char **argv = *argv_p;
>  	int ret = 0;
> -	struct {
> -		struct nlmsghdr         n;
> -		struct tcamsg           t;
> -		char                    buf[MAX_MSG];
> -	} req = {
> -		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)),
> -		.n.nlmsg_flags = NLM_F_REQUEST | flags,
> -		.n.nlmsg_type = cmd,
> -		.t.tca_family = AF_UNSPEC,
> -	};
> -	struct rtattr *tail = NLMSG_TAIL(&req.n);
> +	tc_action_req *req;
> +
> +	req = get_action_req();
> +	if (req == NULL) {
> +		fprintf(stderr, "get_action_req error: not enough buffer\n");
> +		return -ENOMEM;
> +	}
> +
> +	req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg));
> +	req->n.nlmsg_flags = NLM_F_REQUEST | flags;
> +	req->n.nlmsg_type = cmd;
> +	req->t.tca_family = AF_UNSPEC;
> +	struct rtattr *tail = NLMSG_TAIL(&req->n);
>  
>  	argc -= 1;
>  	argv += 1;
> -	if (parse_action(&argc, &argv, TCA_ACT_TAB, &req.n)) {
> +	if (parse_action(&argc, &argv, TCA_ACT_TAB, &req->n)) {
>  		fprintf(stderr, "Illegal \"action\"\n");
>  		return -1;
>  	}
> -	tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
> +	tail->rta_len = (void *) NLMSG_TAIL(&req->n) - (void *) tail;
>  
> -	if (rtnl_talk(&rth, &req.n, NULL) < 0) {
> +	ret = rtnl_talk(&rth, &req->n, NULL);
> +	if (ret < 0) {
>  		fprintf(stderr, "We have an error talking to the kernel\n");
>  		ret = -1;
>  	}
> @@ -739,5 +772,5 @@ int do_action(int argc, char **argv)
>  			return -1;
>  	}
>  
> -	return 0;
> +	return ret;
>  }
> diff --git a/tc/tc.c b/tc/tc.c
> index ad9f07e9..9c8e828b 100644
> --- a/tc/tc.c
> +++ b/tc/tc.c
> @@ -189,7 +189,7 @@ static void usage(void)
>  	fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n"
>  			"       tc [-force] -batch filename\n"
>  			"where  OBJECT := { qdisc | class | filter | action | monitor | exec }\n"
> -	                "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [filename] | -n[etns] name |\n"
> +	                "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [filename] | -bs | -batchsize [size] | -n[etns] name |\n"
>  			"                    -nm | -nam[es] | { -cf | -conf } path } | -j[son]\n");
>  }
>  
> @@ -223,6 +223,9 @@ static int batch(const char *name)
>  	size_t len = 0;
>  	int ret = 0;
>  
> +	if (get_batch_size() > 1)
> +		setcmdlinetotal(name);
> +
>  	batch_mode = 1;
>  	if (name && strcmp(name, "-") != 0) {
>  		if (freopen(name, "r", stdin) == NULL) {
> @@ -248,15 +251,22 @@ static int batch(const char *name)
>  		if (largc == 0)
>  			continue;	/* blank line */
>  
> -		if (do_cmd(largc, largv)) {
> +		ret = do_cmd(largc, largv);
> +		if (ret != 0 && ret != 3) {
>  			fprintf(stderr, "Command failed %s:%d\n", name, cmdlineno);
>  			ret = 1;
>  			if (!force)
>  				break;
>  		}
>  	}
> +	if (ret == 3)
> +		fprintf(stderr,
> +			"Command is not sent to kernel due to internal error %s:%d\n",
> +			name, cmdlineno);
>  	if (line)
>  		free(line);
> +	free_filter_reqs();
> +	free_action_reqs();
>  
>  	rtnl_close(&rth);
>  	return ret;
> @@ -267,6 +277,7 @@ int main(int argc, char **argv)
>  {
>  	int ret;
>  	char *batch_file = NULL;
> +	int size;
>  
>  	while (argc > 1) {
>  		if (argv[1][0] != '-')
> @@ -297,6 +308,16 @@ int main(int argc, char **argv)
>  			if (argc <= 1)
>  				usage();
>  			batch_file = argv[1];
> +		} else if (matches(argv[1], "-batchsize") == 0 ||
> +				matches(argv[1], "-bs") == 0) {
> +			argc--;	argv++;
> +			if (argc <= 1)
> +				usage();
> +			size = atoi(argv[1]);
> +			if (size > MSG_IOV_MAX)
> +				set_batch_size(MSG_IOV_MAX);
> +			else
> +				set_batch_size(size);
>  		} else if (matches(argv[1], "-netns") == 0) {
>  			NEXT_ARG();
>  			if (netns_switch(argv[1]))
> @@ -342,6 +363,8 @@ int main(int argc, char **argv)
>  	}
>  
>  	ret = do_cmd(argc-1, argv+1);
> +	free_filter_reqs();
> +	free_action_reqs();
>  Exit:
>  	rtnl_close(&rth);
>  
> diff --git a/tc/tc_common.h b/tc/tc_common.h
> index 264fbdac..fde8db1b 100644
> --- a/tc/tc_common.h
> +++ b/tc/tc_common.h
> @@ -24,5 +24,8 @@ struct tc_sizespec;
>  extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec *s);
>  extern int check_size_table_opts(struct tc_sizespec *s);
>  
> +extern void free_filter_reqs(void);
> +extern void free_action_reqs(void);
> +
>  extern int show_graph;
>  extern bool use_names;
> diff --git a/tc/tc_filter.c b/tc/tc_filter.c
> index 545cc3a1..dc53c128 100644
> --- a/tc/tc_filter.c
> +++ b/tc/tc_filter.c
> @@ -19,6 +19,7 @@
>  #include <arpa/inet.h>
>  #include <string.h>
>  #include <linux/if_ether.h>
> +#include <errno.h>
>  
>  #include "rt_names.h"
>  #include "utils.h"
> @@ -42,18 +43,51 @@ static void usage(void)
>  		"OPTIONS := ... try tc filter add <desired FILTER_KIND> help\n");
>  }
>  
> +typedef struct {
> +	struct nlmsghdr		n;
> +	struct tcmsg		t;
> +	char			buf[MAX_MSG];
> +} tc_filter_req;
> +
> +static tc_filter_req *filter_reqs;
> +
> +void free_filter_reqs(void)
> +{
> +	if (filter_reqs)
> +		free(filter_reqs);
> +}

You don't need to check for NULL when calling free.
free(NULL) is a nop. 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ