lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220620153555.2504178-5-dchumak@nvidia.com>
Date:   Mon, 20 Jun 2022 18:35:55 +0300
From:   Dima Chumak <dchumak@...dia.com>
To:     Stephen Hemminger <stephen@...workplumber.org>,
        David Ahern <dsahern@...nel.org>
CC:     Jakub Kicinski <kuba@...nel.org>, Jiri Pirko <jiri@...dia.com>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        "Paolo Abeni" <pabeni@...hat.com>, <netdev@...r.kernel.org>,
        Dima Chumak <dchumak@...dia.com>
Subject: [PATCH iproute2-next 5/5] devlink: Introduce port rate limit_type police

Add a new set of `devlink port func rate {set|add}` CLI parameters that
are supported with DEVLINK_RATE_LIMIT_TYPE_POLICE kernel API. They
require explicit use of 'limit_type police', for example:

  $ devlink port func rate add pci/0000:03:00.0/g1 \
      limit_type police tx_max 10GBps tx_burst 1gb \
                        rx_max 25GBps rx_burst 2gb \
                        tx_pkts 10000 tx_pkts_burst 1gb \
                        rx_pkts 20000 rx_pkts_burst 2gb

  $ devlink port func rate set pci/0000:03:00.0/1 \
      limit_type police tx_max 2GBps rx_burst 256mb \
                        rx_max 8GBps rx_burst 512mb \
                        parent g1

  $ devlink port func rate set pci/0000:03:00.0/2 parent g1

Signed-off-by: Dima Chumak <dchumak@...dia.com>
---
 devlink/devlink.c       | 315 +++++++++++++++++++++++++++++++++++++++-
 man/man8/devlink-rate.8 |  92 +++++++++++-
 2 files changed, 401 insertions(+), 6 deletions(-)

diff --git a/devlink/devlink.c b/devlink/devlink.c
index 9b234f2a6825..8eea45dad285 100644
--- a/devlink/devlink.c
+++ b/devlink/devlink.c
@@ -295,6 +295,13 @@ static void ifname_map_free(struct ifname_map *ifname_map)
 #define DL_OPT_PORT_FN_RATE_NODE_NAME	BIT(50)
 #define DL_OPT_PORT_FN_RATE_PARENT	BIT(51)
 #define DL_OPT_PORT_FN_RATE_LIMIT_TYPE	BIT(52)
+#define DL_OPT_PORT_FN_RATE_TX_BURST	BIT(53)
+#define DL_OPT_PORT_FN_RATE_RX_MAX	BIT(54)
+#define DL_OPT_PORT_FN_RATE_RX_BURST	BIT(55)
+#define DL_OPT_PORT_FN_RATE_TX_PKTS	BIT(56)
+#define DL_OPT_PORT_FN_RATE_TX_PKTS_BURST	BIT(57)
+#define DL_OPT_PORT_FN_RATE_RX_PKTS	BIT(58)
+#define DL_OPT_PORT_FN_RATE_RX_PKTS_BURST	BIT(59)
 
 struct dl_opts {
 	uint64_t present; /* flags of present items */
@@ -356,6 +363,13 @@ struct dl_opts {
 	char *rate_node_name;
 	const char *rate_parent_node;
 	uint16_t rate_limit_type;
+	uint64_t rate_tx_burst;
+	uint64_t rate_rx_max;
+	uint64_t rate_rx_burst;
+	uint64_t rate_tx_pkts;
+	uint64_t rate_tx_pkts_burst;
+	uint64_t rate_rx_pkts;
+	uint64_t rate_rx_pkts_burst;
 };
 
 struct dl {
@@ -1446,6 +1460,8 @@ static int port_fn_rate_limit_type_get(const char *ltypestr, uint16_t *ltype)
 		*ltype = DEVLINK_RATE_LIMIT_TYPE_UNSET;
 	else if (!strcmp(ltypestr, "shaping"))
 		*ltype = DEVLINK_RATE_LIMIT_TYPE_SHAPING;
+	else if (!strcmp(ltypestr, "police"))
+		*ltype = DEVLINK_RATE_LIMIT_TYPE_POLICE;
 	else
 		return -EINVAL;
 	return 0;
@@ -1470,6 +1486,44 @@ static int port_fn_rate_value_get(struct dl *dl, uint64_t *rate)
 	return 0;
 }
 
+static int port_fn_rate_size_get(struct dl *dl, uint64_t *size)
+{
+	const char *sizestr;
+	__u64 size64;
+	int err;
+
+	err = dl_argv_str(dl, &sizestr);
+	if (err)
+		return err;
+	err = get_size64(&size64, sizestr);
+	if (err) {
+		pr_err("Invalid burst buffer size value: \"%s\"\n", sizestr);
+		return -EINVAL;
+	}
+
+	*size = size64;
+	return 0;
+}
+
+static int port_fn_rate_pkts_get(struct dl *dl, uint64_t *pkts)
+{
+	const char *pktsstr;
+	__u64 pkts64;
+	int err;
+
+	err = dl_argv_str(dl, &pktsstr);
+	if (err)
+		return err;
+	err = get_size64(&pkts64, pktsstr);
+	if (err) {
+		pr_err("Invalid pkts value: \"%s\"\n", pktsstr);
+		return -EINVAL;
+	}
+
+	*pkts = pkts64;
+	return 0;
+}
+
 struct dl_args_metadata {
 	uint64_t o_flag;
 	char err_msg[DL_ARGS_REQUIRED_MAX_ERR_LEN];
@@ -2021,6 +2075,55 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required,
 			if (err)
 				return err;
 			o_found |= DL_OPT_PORT_FN_RATE_TX_MAX;
+		} else if (dl_argv_match(dl, "tx_burst") &&
+			   (o_all & DL_OPT_PORT_FN_RATE_TX_BURST)) {
+			dl_arg_inc(dl);
+			err = port_fn_rate_size_get(dl, &opts->rate_tx_burst);
+			if (err)
+				return err;
+			o_found |= DL_OPT_PORT_FN_RATE_TX_BURST;
+		} else if (dl_argv_match(dl, "rx_max") &&
+			   (o_all & DL_OPT_PORT_FN_RATE_RX_MAX)) {
+			dl_arg_inc(dl);
+			err = port_fn_rate_value_get(dl, &opts->rate_rx_max);
+			if (err)
+				return err;
+			o_found |= DL_OPT_PORT_FN_RATE_RX_MAX;
+		} else if (dl_argv_match(dl, "rx_burst") &&
+			   (o_all & DL_OPT_PORT_FN_RATE_RX_BURST)) {
+			dl_arg_inc(dl);
+			err = port_fn_rate_size_get(dl, &opts->rate_rx_burst);
+			if (err)
+				return err;
+			o_found |= DL_OPT_PORT_FN_RATE_RX_BURST;
+		} else if (dl_argv_match(dl, "tx_pkts") &&
+			   (o_all & DL_OPT_PORT_FN_RATE_TX_PKTS)) {
+			dl_arg_inc(dl);
+			err = port_fn_rate_pkts_get(dl, &opts->rate_tx_pkts);
+			if (err)
+				return err;
+			o_found |= DL_OPT_PORT_FN_RATE_TX_PKTS;
+		} else if (dl_argv_match(dl, "tx_pkts_burst") &&
+			   (o_all & DL_OPT_PORT_FN_RATE_TX_PKTS_BURST)) {
+			dl_arg_inc(dl);
+			err = port_fn_rate_size_get(dl, &opts->rate_tx_pkts_burst);
+			if (err)
+				return err;
+			o_found |= DL_OPT_PORT_FN_RATE_TX_PKTS_BURST;
+		} else if (dl_argv_match(dl, "rx_pkts") &&
+			   (o_all & DL_OPT_PORT_FN_RATE_RX_PKTS)) {
+			dl_arg_inc(dl);
+			err = port_fn_rate_pkts_get(dl, &opts->rate_rx_pkts);
+			if (err)
+				return err;
+			o_found |= DL_OPT_PORT_FN_RATE_RX_PKTS;
+		} else if (dl_argv_match(dl, "rx_pkts_burst") &&
+			   (o_all & DL_OPT_PORT_FN_RATE_RX_PKTS_BURST)) {
+			dl_arg_inc(dl);
+			err = port_fn_rate_size_get(dl, &opts->rate_rx_pkts_burst);
+			if (err)
+				return err;
+			o_found |= DL_OPT_PORT_FN_RATE_RX_PKTS_BURST;
 		} else if (dl_argv_match(dl, "parent") &&
 			   (o_all & DL_OPT_PORT_FN_RATE_PARENT)) {
 			dl_arg_inc(dl);
@@ -2246,6 +2349,27 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl)
 	if (opts->present & DL_OPT_PORT_FN_RATE_TX_MAX)
 		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_MAX,
 				 opts->rate_tx_max);
+	if (opts->present & DL_OPT_PORT_FN_RATE_TX_BURST)
+		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_BURST,
+				 opts->rate_tx_burst);
+	if (opts->present & DL_OPT_PORT_FN_RATE_RX_MAX)
+		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_RX_MAX,
+				 opts->rate_rx_max);
+	if (opts->present & DL_OPT_PORT_FN_RATE_RX_BURST)
+		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_RX_BURST,
+				 opts->rate_rx_burst);
+	if (opts->present & DL_OPT_PORT_FN_RATE_TX_PKTS)
+		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_PKTS,
+				 opts->rate_tx_pkts);
+	if (opts->present & DL_OPT_PORT_FN_RATE_TX_PKTS_BURST)
+		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_PKTS_BURST,
+				 opts->rate_tx_pkts_burst);
+	if (opts->present & DL_OPT_PORT_FN_RATE_RX_PKTS)
+		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_RX_PKTS,
+				 opts->rate_rx_pkts);
+	if (opts->present & DL_OPT_PORT_FN_RATE_RX_PKTS_BURST)
+		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_RX_PKTS_BURST,
+				 opts->rate_rx_pkts_burst);
 	if (opts->present & DL_OPT_PORT_FN_RATE_PARENT)
 		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
 				  opts->rate_parent_node);
@@ -4522,11 +4646,43 @@ static char *port_rate_limit_type_name(uint16_t ltype)
 		return "unset";
 	case DEVLINK_RATE_LIMIT_TYPE_SHAPING:
 		return "shaping";
+	case DEVLINK_RATE_LIMIT_TYPE_POLICE:
+		return "police";
 	default:
 		return "<unknown type>";
 	}
 }
 
+static char *port_rate_opt_name(enum devlink_rate_limit_type ltype, uint64_t present)
+{
+	switch (ltype) {
+	case DEVLINK_RATE_LIMIT_TYPE_SHAPING:
+		if (present & DL_OPT_PORT_FN_RATE_TX_SHARE)
+			return "tx_share";
+		if (present & DL_OPT_PORT_FN_RATE_TX_MAX)
+			return "tx_max";
+	case DEVLINK_RATE_LIMIT_TYPE_POLICE:
+		if (present & DL_OPT_PORT_FN_RATE_TX_MAX)
+			return "tx_max";
+		if (present & DL_OPT_PORT_FN_RATE_TX_BURST)
+			return "tx_burst";
+		if (present & DL_OPT_PORT_FN_RATE_RX_MAX)
+			return "rx_max";
+		if (present & DL_OPT_PORT_FN_RATE_RX_BURST)
+			return "rx_burst";
+		if (present & DL_OPT_PORT_FN_RATE_TX_PKTS)
+			return "tx_pkts";
+		if (present & DL_OPT_PORT_FN_RATE_TX_PKTS_BURST)
+			return "tx_pkts_burst";
+		if (present & DL_OPT_PORT_FN_RATE_RX_PKTS)
+			return "rx_pkts";
+		if (present & DL_OPT_PORT_FN_RATE_RX_PKTS_BURST)
+			return "rx_pkts_burst";
+	default:
+		return "";
+	}
+}
+
 static void pr_out_port_fn_rate(struct dl *dl, struct nlattr **tb)
 {
 	uint16_t ltype = DEVLINK_RATE_LIMIT_TYPE_UNSET;
@@ -4567,6 +4723,69 @@ static void pr_out_port_fn_rate(struct dl *dl, struct nlattr **tb)
 			print_rate(use_iec, PRINT_ANY, "tx_max",
 				   " tx_max %s", rate);
 	}
+	if (tb[DEVLINK_ATTR_RATE_TX_BURST] &&
+	    ltype == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		uint64_t size =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_TX_BURST]);
+
+		if (size)
+			print_rate(use_iec, PRINT_ANY, "tx_burst",
+				   " tx_burst %s", size);
+	}
+	if (tb[DEVLINK_ATTR_RATE_RX_MAX] &&
+	    ltype == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		uint64_t rate =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_MAX]);
+
+		if (rate)
+			print_rate(use_iec, PRINT_ANY, "rx_max",
+				   " rx_max %s", rate);
+	}
+	if (tb[DEVLINK_ATTR_RATE_RX_BURST] &&
+	    ltype == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		uint64_t size =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_BURST]);
+
+		if (size)
+			print_rate(use_iec, PRINT_ANY, "rx_burst",
+				   " rx_burst %s", size);
+	}
+	if (tb[DEVLINK_ATTR_RATE_TX_PKTS] &&
+	    ltype == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		uint64_t rate =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_TX_PKTS]);
+
+		if (rate)
+			print_rate(use_iec, PRINT_ANY, "tx_pkts",
+				   " tx_pkts %s", rate);
+	}
+	if (tb[DEVLINK_ATTR_RATE_TX_PKTS_BURST] &&
+	    ltype == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		uint64_t size =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_TX_PKTS_BURST]);
+
+		if (size)
+			print_rate(use_iec, PRINT_ANY, "tx_pkts_burst",
+				   " tx_pkts_burst %s", size);
+	}
+	if (tb[DEVLINK_ATTR_RATE_RX_PKTS] &&
+	    ltype == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		uint64_t rate =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_PKTS]);
+
+		if (rate)
+			print_rate(use_iec, PRINT_ANY, "rx_pkts",
+				   " rx_pkts %s", rate);
+	}
+	if (tb[DEVLINK_ATTR_RATE_RX_PKTS_BURST] &&
+	    ltype == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		uint64_t size =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_PKTS_BURST]);
+
+		if (size)
+			print_rate(use_iec, PRINT_ANY, "rx_pkts_burst",
+				   " rx_pkts_burst %s", size);
+	}
 	if (tb[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]) {
 		const char *parent =
 			mnl_attr_get_str(tb[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]);
@@ -4599,9 +4818,17 @@ static void cmd_port_fn_rate_help(void)
 	pr_err("       devlink port function rate show [ DEV/{ PORT_INDEX | NODE_NAME } ]\n");
 	pr_err("       devlink port function rate add DEV/NODE_NAME\n");
 	pr_err("               [ limit_type shaping ][ tx_share VAL ][ tx_max VAL ][ { parent NODE_NAME | noparent } ]\n");
+	pr_err("       devlink port function rate add DEV/NODE_NAME\n");
+	pr_err("               limit_type police [ tx_max VAL [ tx_burst VAL ]][ rx_max VAL [ rx_burst VAL ]]\n");
+	pr_err("                                 [ tx_pkts VAL [ tx_pkts_burst VAL ]][ rx_pkts VAL [ rx_pkts_burst VAL ]]\n");
+	pr_err("                                 [ { parent NODE_NAME | noparent } ]\n");
 	pr_err("       devlink port function rate del DEV/NODE_NAME\n");
 	pr_err("       devlink port function rate set DEV/{ PORT_INDEX | NODE_NAME }\n");
-	pr_err("               [ limit_type shaping ][ tx_share VAL ][ tx_max VAL ][ { parent NODE_NAME | noparent } ]\n\n");
+	pr_err("               [ limit_type shaping ][ tx_share VAL ][ tx_max VAL ][ { parent NODE_NAME | noparent } ]\n");
+	pr_err("       devlink port function rate set DEV/{ PORT_INDEX | NODE_NAME }\n");
+	pr_err("               limit_type police [ tx_max VAL [ tx_burst VAL ]][ rx_max VAL [ rx_burst VAL ]]\n");
+	pr_err("                                 [ tx_pkts VAL [ tx_pkts_burst VAL ]][ rx_pkts VAL [ rx_pkts_burst VAL ]]\n");
+	pr_err("                                 [ { parent NODE_NAME | noparent } ]\n\n");
 	pr_err("       VAL - float or integer value in units of bits or bytes per second (bit|bps)\n");
 	pr_err("       and SI (k-, m-, g-, t-) or IEC (ki-, mi-, gi-, ti-) case-insensitive prefix.\n");
 	pr_err("       Bare number, means bits per second, is possible.\n\n");
@@ -4680,7 +4907,24 @@ static int port_rate_shaping_add(struct dl *dl)
 	return mnlu_gen_socket_sndrcv(&dl->nlg, nlh, NULL, NULL);
 }
 
+static int port_rate_police_add(struct dl *dl)
+{
+	struct nlmsghdr *nlh;
+
+	nlh = mnlu_gen_socket_cmd_prepare(&dl->nlg, DEVLINK_CMD_RATE_NEW,
+					  NLM_F_REQUEST | NLM_F_ACK);
+	dl_opts_put(nlh, dl);
+	return mnlu_gen_socket_sndrcv(&dl->nlg, nlh, NULL, NULL);
+}
+
 #define RATE_SHAPING_OPTS	(DL_OPT_PORT_FN_RATE_TX_SHARE)
+#define RATE_POLICE_OPTS	(DL_OPT_PORT_FN_RATE_TX_BURST \
+				 | DL_OPT_PORT_FN_RATE_RX_MAX \
+				 | DL_OPT_PORT_FN_RATE_RX_BURST \
+				 | DL_OPT_PORT_FN_RATE_TX_PKTS \
+				 | DL_OPT_PORT_FN_RATE_TX_PKTS_BURST \
+				 | DL_OPT_PORT_FN_RATE_RX_PKTS \
+				 | DL_OPT_PORT_FN_RATE_RX_PKTS_BURST)
 
 static int cmd_port_fn_rate_add(struct dl *dl)
 {
@@ -4688,15 +4932,32 @@ static int cmd_port_fn_rate_add(struct dl *dl)
 
 	err = dl_argv_parse(dl, DL_OPT_PORT_FN_RATE_NODE_NAME,
 			    DL_OPT_PORT_FN_RATE_LIMIT_TYPE | DL_OPT_PORT_FN_RATE_TX_MAX |
-			    RATE_SHAPING_OPTS);
+			    RATE_SHAPING_OPTS | RATE_POLICE_OPTS);
 	if (err)
 		return err;
 
+	if ((dl->opts.present & DL_OPT_PORT_FN_RATE_LIMIT_TYPE) &&
+	    dl->opts.rate_limit_type == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		if (dl->opts.present & RATE_SHAPING_OPTS) {
+			pr_err("Unsupported option \"%s\" for limit_type \"%s\"\n",
+			       port_rate_opt_name(dl->opts.rate_limit_type, dl->opts.present),
+			       port_rate_limit_type_name(dl->opts.rate_limit_type));
+			return -EINVAL;
+		}
+		return port_rate_police_add(dl);
+	}
+
 	if (!(dl->opts.present & DL_OPT_PORT_FN_RATE_LIMIT_TYPE)) {
 		dl->opts.rate_limit_type = DEVLINK_RATE_LIMIT_TYPE_SHAPING;
 		dl->opts.present |= DL_OPT_PORT_FN_RATE_LIMIT_TYPE;
 	}
 
+	if (dl->opts.present & RATE_POLICE_OPTS) {
+		pr_err("Unsupported option \"%s\" for limit_type \"%s\"\n",
+			port_rate_opt_name(dl->opts.rate_limit_type, dl->opts.present),
+			port_rate_limit_type_name(dl->opts.rate_limit_type));
+		return -EINVAL;
+	}
 
 	return port_rate_shaping_add(dl);
 }
@@ -4734,6 +4995,27 @@ static int port_fn_get_rates_cb(const struct nlmsghdr *nlh, void *data)
 	if (tb[DEVLINK_ATTR_RATE_TX_MAX])
 		opts->rate_tx_max =
 			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_TX_MAX]);
+	if (tb[DEVLINK_ATTR_RATE_TX_BURST])
+		opts->rate_tx_burst =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_TX_BURST]);
+	if (tb[DEVLINK_ATTR_RATE_RX_MAX])
+		opts->rate_rx_max =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_MAX]);
+	if (tb[DEVLINK_ATTR_RATE_RX_BURST])
+		opts->rate_rx_burst =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_BURST]);
+	if (tb[DEVLINK_ATTR_RATE_TX_PKTS])
+		opts->rate_tx_pkts =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_TX_PKTS]);
+	if (tb[DEVLINK_ATTR_RATE_TX_PKTS_BURST])
+		opts->rate_tx_pkts_burst =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_TX_PKTS_BURST]);
+	if (tb[DEVLINK_ATTR_RATE_RX_PKTS])
+		opts->rate_rx_pkts =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_PKTS]);
+	if (tb[DEVLINK_ATTR_RATE_RX_PKTS_BURST])
+		opts->rate_rx_pkts_burst =
+			mnl_attr_get_u64(tb[DEVLINK_ATTR_RATE_RX_PKTS_BURST]);
 	return MNL_CB_OK;
 }
 
@@ -4774,22 +5056,49 @@ static int port_rate_shaping_set(struct dl *dl)
 	return mnlu_gen_socket_sndrcv(&dl->nlg, nlh, NULL, NULL);
 }
 
+static int port_rate_police_set(struct dl *dl)
+{
+	struct nlmsghdr *nlh;
+
+	nlh = mnlu_gen_socket_cmd_prepare(&dl->nlg, DEVLINK_CMD_RATE_SET,
+					  NLM_F_REQUEST | NLM_F_ACK);
+	dl_opts_put(nlh, dl);
+	return mnlu_gen_socket_sndrcv(&dl->nlg, nlh, NULL, NULL);
+}
+
 static int cmd_port_fn_rate_set(struct dl *dl)
 {
 	int err;
 
 	err = dl_argv_parse(dl, DL_OPT_HANDLEP | DL_OPT_PORT_FN_RATE_NODE_NAME,
 			    DL_OPT_PORT_FN_RATE_LIMIT_TYPE | DL_OPT_PORT_FN_RATE_TX_MAX |
-			    RATE_SHAPING_OPTS | DL_OPT_PORT_FN_RATE_PARENT);
+			    RATE_SHAPING_OPTS | RATE_POLICE_OPTS | DL_OPT_PORT_FN_RATE_PARENT);
 	if (err)
 		return err;
 
+	if ((dl->opts.present & DL_OPT_PORT_FN_RATE_LIMIT_TYPE) &&
+	    dl->opts.rate_limit_type == DEVLINK_RATE_LIMIT_TYPE_POLICE) {
+		if (dl->opts.present & RATE_SHAPING_OPTS) {
+			pr_err("Unsupported option \"%s\" for limit_type \"%s\"\n",
+			       port_rate_opt_name(dl->opts.rate_limit_type, dl->opts.present),
+			       port_rate_limit_type_name(dl->opts.rate_limit_type));
+			return -EINVAL;
+		}
+		return port_rate_police_set(dl);
+	}
+
 	if (!(dl->opts.present & DL_OPT_PORT_FN_RATE_LIMIT_TYPE) &&
 	    !(dl->opts.present & DL_OPT_PORT_FN_RATE_PARENT)) {
 		dl->opts.rate_limit_type = DEVLINK_RATE_LIMIT_TYPE_SHAPING;
 		dl->opts.present |= DL_OPT_PORT_FN_RATE_LIMIT_TYPE;
 	}
 
+	if (dl->opts.present & RATE_POLICE_OPTS) {
+		pr_err("Unsupported option \"%s\" for limit_type \"%s\"\n",
+			port_rate_opt_name(dl->opts.rate_limit_type, dl->opts.present),
+			port_rate_limit_type_name(dl->opts.rate_limit_type));
+		return -EINVAL;
+	}
 
 	return port_rate_shaping_set(dl);
 }
diff --git a/man/man8/devlink-rate.8 b/man/man8/devlink-rate.8
index 6b7b179a8696..56907590cd9a 100644
--- a/man/man8/devlink-rate.8
+++ b/man/man8/devlink-rate.8
@@ -28,6 +28,12 @@ devlink-rate \- devlink rate management
 .RB [ " limit_type \fIshaping " ]
 .RB [ " tx_share \fIVALUE " ]
 .RB [ " tx_max \fIVALUE " ]
+.RB | " limit_type \fIpolice "
+.RB [ " tx_max \fIVALUE " [ " tx_burst \fIVALUE " ] " " ]
+.RB [ " rx_max \fIVALUE " [ " rx_burst \fIVALUE " ] " " ]
+.RB [ " tx_pkts \fIVALUE " [ " tx_pkts_burst \fIVALUE " ] " " ]
+.RB [ " rx_pkts \fIVALUE " [ " rx_pkts_burst \fIVALUE " ] " " ]
+.RB "}"
 .RB "[ {" " parent \fINODE_NAME " | " noparent " "} ]"
 
 .ti -8
@@ -36,6 +42,12 @@ devlink-rate \- devlink rate management
 .RB [ " limit_type \fIshaping " ]
 .RB [ " tx_share \fIVALUE " ]
 .RB [ " tx_max \fIVALUE " ]
+.RB | " limit_type \fIpolice "
+.RB [ " tx_max \fIVALUE " [ " tx_burst \fIVALUE " ] " " ]
+.RB [ " rx_max \fIVALUE " [ " rx_burst \fIVALUE " ] " " ]
+.RB [ " tx_pkts \fIVALUE " [ " tx_pkts_burst \fIVALUE " ] " " ]
+.RB [ " rx_pkts \fIVALUE " [ " rx_pkts_burst \fIVALUE " ] " " ]
+.RB "}"
 .RB "[ {" " parent \fINODE_NAME " | " noparent " "} ]"
 
 .ti -8
@@ -80,7 +92,7 @@ the last occurrence is used.
 .I DEV/NODE_NAME
 - specifies devlink node rate object.
 .PP
-.BR limit_type " \fIshaping "
+.BR limit_type " {" " \fIshaping " | " \fIpolice " }
 - specifies a kind of rate limiting. The parameter is optional and, if omitted,
 \fIshaping\fR limit type is assumed by default. Each limit type has its own set
 of supported attributes. Some limit types may not be supported by a particular
@@ -93,10 +105,17 @@ This type of rate limiting doesn't require packets to be dropped in order to
 ensure the requested rate, on the other hand it may suffer from excessive delays
 and it cannot be applied to inbound traffic.
 .PP
+.I police
+- limiting traffic rate by dropping excessive packets. This type of rate
+limiting can be applied to both outbound and inbound traffic, and it doesn't
+suffer from delays that might occur with \fIshaping\fR limit type. On the other
+hand, by definition this type of rate limiting may be unacceptable for certain
+applications and workloads that are sensitive to packet loss.
+.PP
 .BI tx_share " VALUE"
 - specifies minimal tx rate value shared among all rate objects. If rate object
 is a part of some rate group, then this value shared with rate objects of this
-rate group.
+rate group. This parameter is specific to \fBlimit_type\fR \fIshaping\fR only.
 .PP
 .BI tx_max " VALUE"
 - specifies maximum tx rate value.
@@ -140,11 +159,72 @@ To specify in IEC units, replace the SI prefix (k-, m-, g-, t-) with IEC prefix
 (ki-, mi-, gi- and ti-) respectively. Input is case-insensitive.
 .RE
 .PP
+.BI tx_burst " VALUE"
+- specifies size of a bucket that's used to buffer spikes when traffic exceeds
+\fBtx_max\fR limit. This parameter is specific to \fBlimit_type\fR \fIpolice\fR
+only.
+.TP 8
+.I VALUE
+This parameter accept a floating point number, possibly followed by a unit.
+.RS
+.TP
+b or a bare number
+Bytes
+.TP
+k | kb
+Kilobytes
+.TP
+m | mb
+Megabytes
+.TP
+g | gb
+Gigabytes
+.TP
+kbit
+Kilobits
+.TP
+mbit
+Megabits
+.TP
+gbit
+Gigabits
+.RE
+.PP
+.BI rx_max " VALUE"
+- specifies maximum rx rate value. It accepts same values as \fBtx_max\fR. This
+parameter is specific to \fBlimit_type\fR \fIpolice\fR only.
+.PP
+.BI rx_burst " VALUE"
+- specifies size of a bucket that's used to buffer spikes when traffic exceeds
+\fBrx_max\fR limit. It accepts the same values as \fBtx_burst\fR. This parameter
+is specific to \fBlimit_type\fR \fIpolice\fR only.
+.PP
+.BI tx_pkts " VALUE"
+- specifies maximum tx packets per second value. This parameter is specific to
+\fBlimit_type\fR \fIpolice\fR only.
+.PP
+.BI tx_pkts_burst " VALUE"
+- specifies size of a bucket that's used to buffer spikes when traffic exceeds
+\fBtx_pkts\fR limit. It accepts the same values as \fBtx_burst\fR. This
+parameter is specific to \fBlimit_type\fR \fIpolice\fR only.
+.PP
+.BI rx_pkts " VALUE"
+- specifies maximum tx packets per second value. This parameter is specific to
+\fBlimit_type\fR \fIpolice\fR only.
+.PP
+.BI rx_pkts_burst " VALUE"
+- specifies size of a bucket that's used to buffer spikes when traffic exceeds
+\fBtx_pkts\fR limit. It accepts the same values as \fBtx_burst\fR. This
+parameter is specific to \fBlimit_type\fR \fIpolice\fR only.
+.PP
 .BI parent " NODE_NAME \fR| " noparent
 - set rate object parent to existing node with name \fINODE_NAME\fR or unset
 parent. Rate limits of the parent node applied to all it's children. Actual
 behaviour is details of driver's implementation. Setting parent to empty ("")
-name due to the kernel logic threated as parent unset.
+name due to the kernel logic treated as parent unset. It's important that
+\fBlimit_type\fR of the rate object and the parent node should match,
+otherwise setting parent will fail. In other words, it's only possible to group
+rate objects of the same \fBlimit_type\fR.
 
 .SS devlink port function rate add - create node rate object with specified parameters.
 Creates rate object of type node and sets parameters. Parameters same as for the
@@ -222,6 +302,8 @@ pci/0000:03:00.0/some_group type node
         "pci/0000:03:00.0/2": {
 .br
             "type": "leaf",
+.br
+            "limit_type": "shaping",
 .br
             "tx_share": 1500000
 .br
@@ -255,6 +337,10 @@ pci/0000:03:00.0/some_group type node
 # devlink port function rate set pci/0000:03:00.0/1 \\
 .br
 	tx_share 2Mbit tx_max 10Mbit
+.PP
+# devlink port function rate set pci/0000:03:00.0/2 \\
+.br
+	limit_type police rx_max 10Mbit rx_burst 4mb
 .RE
 
 .PP
-- 
2.36.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ