[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <VI1PR0501MB21436A17311E24C23DDFFAF4AB110@VI1PR0501MB2143.eurprd05.prod.outlook.com>
Date: Wed, 10 Jan 2018 02:58:25 +0000
From: Chris Mi <chrism@...lanox.com>
To: Marcelo Ricardo Leitner <marcelo.leitner@...il.com>
CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"gerlitz.or@...il.com" <gerlitz.or@...il.com>,
"stephen@...workplumber.org" <stephen@...workplumber.org>,
"dsahern@...il.com" <dsahern@...il.com>,
"phil@....cc" <phil@....cc>
Subject: RE: [patch iproute2 v7 2/2] tc: Add batchsize feature for filter and
actions
> -----Original Message-----
> From: Marcelo Ricardo Leitner [mailto:marcelo.leitner@...il.com]
> Sent: Wednesday, January 10, 2018 3:14 AM
> To: Chris Mi <chrism@...lanox.com>
> Cc: netdev@...r.kernel.org; gerlitz.or@...il.com;
> stephen@...workplumber.org; dsahern@...il.com; phil@....cc
> Subject: Re: [patch iproute2 v7 2/2] tc: Add batchsize feature for filter and
> actions
>
> On Tue, Jan 09, 2018 at 03:59:08PM +0900, Chris Mi wrote:
> > Currently in tc batch mode, only one command is read from the batch
> > file and sent to kernel to process. With this support, at most 128
> > commands can be accumulated before sending to kernel.
> >
> > Now it only works for the following successive commands:
> > filter and actions add/delete/change/replace.
> >
> > Signed-off-by: Chris Mi <chrism@...lanox.com>
> > ---
> > tc/m_action.c | 60 +++++++++++++++++----------
> > tc/tc.c | 127
> ++++++++++++++++++++++++++++++++++++++++++++++++++-------
> > tc/tc_common.h | 5 ++-
> > tc/tc_filter.c | 97 +++++++++++++++++++++++++------------------
> > 4 files changed, 210 insertions(+), 79 deletions(-)
> >
> > diff --git a/tc/m_action.c b/tc/m_action.c index fc422364..e5c53a80
> > 100644
> > --- a/tc/m_action.c
> > +++ b/tc/m_action.c
> > @@ -546,40 +546,56 @@ bad_val:
> > return ret;
> > }
> >
> > +struct tc_action_req {
> > + struct nlmsghdr n;
> > + struct tcamsg t;
> > + char buf[MAX_MSG];
> > +};
> > +
> > static int tc_action_modify(int cmd, unsigned int flags,
> > - int *argc_p, char ***argv_p)
> > + int *argc_p, char ***argv_p,
> > + void *buf)
> > {
> > - int argc = *argc_p;
> > + struct tc_action_req *req, action_req;
> > char **argv = *argv_p;
> > + struct rtattr *tail;
> > + int argc = *argc_p;
> > + struct iovec iov;
> > int ret = 0;
> > - struct {
> > - struct nlmsghdr n;
> > - struct tcamsg t;
> > - char buf[MAX_MSG];
> > - } req = {
> > - .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)),
> > - .n.nlmsg_flags = NLM_F_REQUEST | flags,
> > - .n.nlmsg_type = cmd,
> > - .t.tca_family = AF_UNSPEC,
> > - };
> > - struct rtattr *tail = NLMSG_TAIL(&req.n);
> > +
> > + if (buf)
> > + req = buf;
> > + else
> > + req = &action_req;
> > +
> > + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg));
> > + req->n.nlmsg_flags = NLM_F_REQUEST | flags;
> > + req->n.nlmsg_type = cmd;
> > + req->t.tca_family = AF_UNSPEC;
> > + tail = NLMSG_TAIL(&req->n);
> >
> > argc -= 1;
> > argv += 1;
> > - if (parse_action(&argc, &argv, TCA_ACT_TAB, &req.n)) {
> > + if (parse_action(&argc, &argv, TCA_ACT_TAB, &req->n)) {
> > fprintf(stderr, "Illegal \"action\"\n");
> > return -1;
> > }
> > - tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
> > + tail->rta_len = (void *) NLMSG_TAIL(&req->n) - (void *) tail;
> > +
> > + *argc_p = argc;
> > + *argv_p = argv;
> >
> > - if (rtnl_talk(&rth, &req.n, NULL) < 0) {
> > + iov.iov_base = &req->n;
> > + iov.iov_len = req->n.nlmsg_len;
> > +
> > + if (buf)
> > + return 0;
> > +
> > + if (rtnl_talk_iov(&rth, &iov, 1, NULL) < 0) {
> > fprintf(stderr, "We have an error talking to the kernel\n");
> > ret = -1;
> > }
> >
> > - *argc_p = argc;
> > - *argv_p = argv;
> > -
> > return ret;
> > }
> >
> > @@ -679,7 +695,7 @@ bad_val:
> > return ret;
> > }
> >
> > -int do_action(int argc, char **argv)
> > +int do_action(int argc, char **argv, void *buf)
> > {
> >
> > int ret = 0;
> > @@ -689,12 +705,12 @@ int do_action(int argc, char **argv)
> > if (matches(*argv, "add") == 0) {
> > ret = tc_action_modify(RTM_NEWACTION,
> > NLM_F_EXCL |
> NLM_F_CREATE,
> > - &argc, &argv);
> > + &argc, &argv, buf);
> > } else if (matches(*argv, "change") == 0 ||
> > matches(*argv, "replace") == 0) {
> > ret = tc_action_modify(RTM_NEWACTION,
> > NLM_F_CREATE |
> NLM_F_REPLACE,
> > - &argc, &argv);
> > + &argc, &argv, buf);
> > } else if (matches(*argv, "delete") == 0) {
> > argc -= 1;
> > argv += 1;
> > diff --git a/tc/tc.c b/tc/tc.c
> > index ad9f07e9..f32e4978 100644
> > --- a/tc/tc.c
> > +++ b/tc/tc.c
> > @@ -193,16 +193,16 @@ static void usage(void)
> > " -nm | -nam[es] | { -cf | -conf } path } | -
> j[son]\n");
> > }
> >
> > -static int do_cmd(int argc, char **argv)
> > +static int do_cmd(int argc, char **argv, void *buf)
> > {
> > if (matches(*argv, "qdisc") == 0)
> > return do_qdisc(argc-1, argv+1);
> > if (matches(*argv, "class") == 0)
> > return do_class(argc-1, argv+1);
> > if (matches(*argv, "filter") == 0)
> > - return do_filter(argc-1, argv+1);
> > + return do_filter(argc-1, argv+1, buf);
> > if (matches(*argv, "actions") == 0)
> > - return do_action(argc-1, argv+1);
> > + return do_action(argc-1, argv+1, buf);
> > if (matches(*argv, "monitor") == 0)
> > return do_tcmonitor(argc-1, argv+1);
> > if (matches(*argv, "exec") == 0)
> > @@ -217,11 +217,39 @@ static int do_cmd(int argc, char **argv)
> > return -1;
> > }
> >
> > +static bool batchsize_enabled(int argc, char *argv[]) {
> > + if (argc < 2)
> > + return false;
> > + if ((strcmp(argv[0], "filter") && strcmp(argv[0], "action"))
> > + || (strcmp(argv[1], "add") && strcmp(argv[1], "delete")
> > + && strcmp(argv[1], "change") && strcmp(argv[1], "replace")))
> > + return false;
> > +
> > + return true;
> > +}
> > +
> > +struct batch_buf {
> > + struct batch_buf *next;
> > + char buf[16420]; /* sizeof (struct nlmsghdr) +
> > + sizeof (struct tcmsg) +
> > + MAX_MSG */
> > +};
> > +
> > static int batch(const char *name)
> > {
> > - char *line = NULL;
> > + struct batch_buf *head = NULL, *tail = NULL, *buf;
>
> Hi. Overall it looks much better IMHO.
>
> Maybe declare *buf in the two sections that it is used, to make it clear that it
> cannot be reused between them? (minor, yes..)
Done.
>
> > + char *largv[100], *largv_next[100];
>
> A define for the 100 is probably welcomed,
>
> > + char *line, *line_next = NULL;
> > + bool bs_enabled_next = false;
> > + bool bs_enabled = false;
> > + bool lastline = false;
> > + int largc, largc_next;
> > + bool bs_enabled_saved;
> > + int batchsize = 0;
> > size_t len = 0;
> > - int ret = 0;
> > + int ret = 0, i;
> > + bool send;
> >
> > batch_mode = 1;
> > if (name && strcmp(name, "-") != 0) { @@ -240,23 +268,92 @@ static
> > int batch(const char *name)
> > }
> >
> > cmdlineno = 0;
> > - while (getcmdline(&line, &len, stdin) != -1) {
> > - char *largv[100];
> > - int largc;
> > + if (getcmdline(&line, &len, stdin) == -1)
> > + goto Exit;
> > + largc = makeargs(line, largv, 100);
> > + bs_enabled = batchsize_enabled(largc, largv);
> > + bs_enabled_saved = bs_enabled;
> > + do {
> > + if (getcmdline(&line_next, &len, stdin) == -1)
> > + lastline = true;
> > +
> > + largc_next = makeargs(line_next, largv_next, 100);
> > + bs_enabled_next = batchsize_enabled(largc_next,
> largv_next);
> > + if (bs_enabled) {
> > + buf = calloc(1, sizeof (struct batch_buf));
>
> if (!buf) .. ?
>
> > + if (head == NULL) {
> > + head = tail = buf;
> > + buf->next = NULL;
>
> this
>
> > + } else {
> > + buf->next = NULL;
>
> and this NULL assignments are not needed because you used calloc(), so it's
> already zeroed.
Done.
>
> > + tail->next = buf;
> > + tail = buf;
> > + }
> > + ++batchsize;
> > + }
> > +
> > + /*
> > + * In batch mode, if we haven't accumulated enough
> commands
> > + * and this is not the last command and this command & next
> > + * command both support the batchsize feature, don't send
> the
> > + * message immediately.
> > + */
> > + if (!lastline && bs_enabled && bs_enabled_next
> > + && batchsize != MSG_IOV_MAX)
> > + send = false;
> > + else
> > + send = true;
> > +
> > + line = line_next;
> > + line_next = NULL;
> > + len = 0;
> > + bs_enabled_saved = bs_enabled;
> > + bs_enabled = bs_enabled_next;
> > + bs_enabled_next = false;
> >
> > - largc = makeargs(line, largv, 100);
> > if (largc == 0)
> > continue; /* blank line */
> >
> > - if (do_cmd(largc, largv)) {
> > - fprintf(stderr, "Command failed %s:%d\n", name,
> cmdlineno);
> > + ret = do_cmd(largc, largv, tail == NULL ? NULL : tail->buf);
> > + if (ret != 0) {
> > + fprintf(stderr, "Command failed %s:%d\n", name,
> > + cmdlineno - 1);
> > ret = 1;
> > if (!force)
> > break;
> > }
> > - }
> > - if (line)
> > - free(line);
> > + largc = largc_next;
> > + for (i = 0; i < largc; i ++)
> > + largv[i] = largv_next[i];
>
> This loop can be a single memcpy()
> memcpy(largv, largv_next, sizeof(largv));
Done.
I changed it to:
memcpy(largv, largv_next, largc * sizeof(char *));
>
> > +
> > + if (send && bs_enabled_saved) {
> > + struct iovec *iov, *iovs;
> > + struct nlmsghdr *n;
> > + iov = iovs = malloc(batchsize * sizeof (struct iovec));
> > + for (buf = head; buf != NULL; buf = buf->next, ++iov)
> {
> > + n = (struct nlmsghdr *)&buf->buf;
> > + iov->iov_base = n;
> > + iov->iov_len = n->nlmsg_len;
> > + }
> > +
> > + ret = rtnl_talk_iov(&rth, iovs, batchsize, NULL);
> > + if (ret < 0) {
> > + fprintf(stderr, "Command failed %s:%d\n",
> name,
> > + cmdlineno - (batchsize + ret) - 1);
> > + return 2;
> > + }
> > + for (buf = head; buf != NULL; buf = head) {
> > + head = buf->next;
> > + free(buf);
>
> Maybe we could reuse these buffers? Put them in another list when free
> and pull from it when allocating. And allocate a new one if the recycle pool is
> empty. But maybe glibc already does some work for that.
I thought slab could deal with it properly. But after changing the code as you suggested,
the performance is as good as before. There was a little performance penalty while allocating
the memory dynamically.
Now it takes about 12.5 seconds to insert 1 million rules.
real 0m12.531s
user 0m6.221s
sys 0m6.246s
>
>
> That's all my comments in this patchset. Thanks Chris.
Thanks for all your comments. That really helps me a lot.
>
> > + }
> > + head = tail = NULL;
> > + batchsize = 0;
> > + free(iovs);
> > + }
> > + } while (!lastline);
> > +
> > +Exit:
> > + free(line);
> >
> > rtnl_close(&rth);
> > return ret;
> > @@ -341,7 +438,7 @@ int main(int argc, char **argv)
> > goto Exit;
> > }
> >
> > - ret = do_cmd(argc-1, argv+1);
> > + ret = do_cmd(argc-1, argv+1, NULL);
> > Exit:
> > rtnl_close(&rth);
> >
> > diff --git a/tc/tc_common.h b/tc/tc_common.h index 264fbdac..59018da5
> > 100644
> > --- a/tc/tc_common.h
> > +++ b/tc/tc_common.h
> > @@ -1,13 +1,14 @@
> > /* SPDX-License-Identifier: GPL-2.0 */
> >
> > #define TCA_BUF_MAX (64*1024)
> > +#define MSG_IOV_MAX 128
> >
> > extern struct rtnl_handle rth;
> >
> > extern int do_qdisc(int argc, char **argv); extern int do_class(int
> > argc, char **argv); -extern int do_filter(int argc, char **argv);
> > -extern int do_action(int argc, char **argv);
> > +extern int do_filter(int argc, char **argv, void *buf); extern int
> > +do_action(int argc, char **argv, void *buf);
> > extern int do_tcmonitor(int argc, char **argv); extern int
> > do_exec(int argc, char **argv);
> >
> > diff --git a/tc/tc_filter.c b/tc/tc_filter.c index 545cc3a1..7db4964b
> > 100644
> > --- a/tc/tc_filter.c
> > +++ b/tc/tc_filter.c
> > @@ -42,28 +42,38 @@ static void usage(void)
> > "OPTIONS := ... try tc filter add <desired FILTER_KIND>
> help\n");
> > }
> >
> > -static int tc_filter_modify(int cmd, unsigned int flags, int argc,
> > char **argv)
> > +struct tc_filter_req {
> > + struct nlmsghdr n;
> > + struct tcmsg t;
> > + char buf[MAX_MSG];
> > +};
> > +
> > +static int tc_filter_modify(int cmd, unsigned int flags, int argc, char **argv,
> > + void *buf)
> > {
> > - struct {
> > - struct nlmsghdr n;
> > - struct tcmsg t;
> > - char buf[MAX_MSG];
> > - } req = {
> > - .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
> > - .n.nlmsg_flags = NLM_F_REQUEST | flags,
> > - .n.nlmsg_type = cmd,
> > - .t.tcm_family = AF_UNSPEC,
> > - };
> > + struct tc_filter_req *req, filter_req;
> > struct filter_util *q = NULL;
> > - __u32 prio = 0;
> > - __u32 protocol = 0;
> > - int protocol_set = 0;
> > - __u32 chain_index;
> > + struct tc_estimator est = {};
> > + char k[FILTER_NAMESZ] = {};
> > int chain_index_set = 0;
> > + char d[IFNAMSIZ] = {};
> > + int protocol_set = 0;
> > char *fhandle = NULL;
> > - char d[IFNAMSIZ] = {};
> > - char k[FILTER_NAMESZ] = {};
> > - struct tc_estimator est = {};
> > + __u32 protocol = 0;
> > + __u32 chain_index;
> > + struct iovec iov;
> > + __u32 prio = 0;
> > + int ret;
> > +
> > + if (buf)
> > + req = buf;
> > + else
> > + req = &filter_req;
> > +
> > + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
> > + req->n.nlmsg_flags = NLM_F_REQUEST | flags;
> > + req->n.nlmsg_type = cmd;
> > + req->t.tcm_family = AF_UNSPEC;
> >
> > if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE)
> > protocol = htons(ETH_P_ALL);
> > @@ -75,37 +85,37 @@ static int tc_filter_modify(int cmd, unsigned int flags,
> int argc, char **argv)
> > duparg("dev", *argv);
> > strncpy(d, *argv, sizeof(d)-1);
> > } else if (strcmp(*argv, "root") == 0) {
> > - if (req.t.tcm_parent) {
> > + if (req->t.tcm_parent) {
> > fprintf(stderr,
> > "Error: \"root\" is duplicate parent
> ID\n");
> > return -1;
> > }
> > - req.t.tcm_parent = TC_H_ROOT;
> > + req->t.tcm_parent = TC_H_ROOT;
> > } else if (strcmp(*argv, "ingress") == 0) {
> > - if (req.t.tcm_parent) {
> > + if (req->t.tcm_parent) {
> > fprintf(stderr,
> > "Error: \"ingress\" is duplicate parent
> ID\n");
> > return -1;
> > }
> > - req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
> > + req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
> > TC_H_MIN_INGRESS);
> > } else if (strcmp(*argv, "egress") == 0) {
> > - if (req.t.tcm_parent) {
> > + if (req->t.tcm_parent) {
> > fprintf(stderr,
> > "Error: \"egress\" is duplicate parent
> ID\n");
> > return -1;
> > }
> > - req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
> > + req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT,
> > TC_H_MIN_EGRESS);
> > } else if (strcmp(*argv, "parent") == 0) {
> > __u32 handle;
> >
> > NEXT_ARG();
> > - if (req.t.tcm_parent)
> > + if (req->t.tcm_parent)
> > duparg("parent", *argv);
> > if (get_tc_classid(&handle, *argv))
> > invarg("Invalid parent ID", *argv);
> > - req.t.tcm_parent = handle;
> > + req->t.tcm_parent = handle;
> > } else if (strcmp(*argv, "handle") == 0) {
> > NEXT_ARG();
> > if (fhandle)
> > @@ -152,26 +162,26 @@ static int tc_filter_modify(int cmd, unsigned int
> flags, int argc, char **argv)
> > argc--; argv++;
> > }
> >
> > - req.t.tcm_info = TC_H_MAKE(prio<<16, protocol);
> > + req->t.tcm_info = TC_H_MAKE(prio<<16, protocol);
> >
> > if (chain_index_set)
> > - addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index);
> > + addattr32(&req->n, sizeof(*req), TCA_CHAIN, chain_index);
> >
> > if (k[0])
> > - addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1);
> > + addattr_l(&req->n, sizeof(*req), TCA_KIND, k, strlen(k)+1);
> >
> > if (d[0]) {
> > ll_init_map(&rth);
> >
> > - req.t.tcm_ifindex = ll_name_to_index(d);
> > - if (req.t.tcm_ifindex == 0) {
> > + req->t.tcm_ifindex = ll_name_to_index(d);
> > + if (req->t.tcm_ifindex == 0) {
> > fprintf(stderr, "Cannot find device \"%s\"\n", d);
> > return 1;
> > }
> > }
> >
> > if (q) {
> > - if (q->parse_fopt(q, fhandle, argc, argv, &req.n))
> > + if (q->parse_fopt(q, fhandle, argc, argv, &req->n))
> > return 1;
> > } else {
> > if (fhandle) {
> > @@ -190,10 +200,17 @@ static int tc_filter_modify(int cmd, unsigned int
> flags, int argc, char **argv)
> > }
> >
> > if (est.ewma_log)
> > - addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
> > + addattr_l(&req->n, sizeof(*req), TCA_RATE, &est,
> sizeof(est));
> >
> > - if (rtnl_talk(&rth, &req.n, NULL) < 0) {
> > - fprintf(stderr, "We have an error talking to the kernel\n");
> > + iov.iov_base = &req->n;
> > + iov.iov_len = req->n.nlmsg_len;
> > +
> > + if (buf)
> > + return 0;
> > +
> > + ret = rtnl_talk_iov(&rth, &iov, 1, NULL);
> > + if (ret < 0) {
> > + fprintf(stderr, "We have an error talking to the kernel, %d\n",
> > +ret);
> > return 2;
> > }
> >
> > @@ -636,20 +653,20 @@ static int tc_filter_list(int argc, char **argv)
> > return 0;
> > }
> >
> > -int do_filter(int argc, char **argv)
> > +int do_filter(int argc, char **argv, void *buf)
> > {
> > if (argc < 1)
> > return tc_filter_list(0, NULL);
> > if (matches(*argv, "add") == 0)
> > return tc_filter_modify(RTM_NEWTFILTER,
> NLM_F_EXCL|NLM_F_CREATE,
> > - argc-1, argv+1);
> > + argc-1, argv+1, buf);
> > if (matches(*argv, "change") == 0)
> > - return tc_filter_modify(RTM_NEWTFILTER, 0, argc-1, argv+1);
> > + return tc_filter_modify(RTM_NEWTFILTER, 0, argc-1, argv+1,
> buf);
> > if (matches(*argv, "replace") == 0)
> > return tc_filter_modify(RTM_NEWTFILTER, NLM_F_CREATE,
> argc-1,
> > - argv+1);
> > + argv+1, buf);
> > if (matches(*argv, "delete") == 0)
> > - return tc_filter_modify(RTM_DELTFILTER, 0, argc-1, argv+1);
> > + return tc_filter_modify(RTM_DELTFILTER, 0, argc-1, argv+1,
> buf);
> > if (matches(*argv, "get") == 0)
> > return tc_filter_get(RTM_GETTFILTER, 0, argc-1, argv+1);
> > if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
> > --
> > 2.14.3
> >
Powered by blists - more mailing lists