[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200324193250.1322038-2-idosch@idosch.org>
Date: Tue, 24 Mar 2020 21:32:36 +0200
From: Ido Schimmel <idosch@...sch.org>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, jiri@...lanox.com, kuba@...nel.org,
andrew@...n.ch, f.fainelli@...il.com, vivien.didelot@...il.com,
roopa@...ulusnetworks.com, nikolay@...ulusnetworks.com,
mlxsw@...lanox.com, Ido Schimmel <idosch@...lanox.com>
Subject: [PATCH net-next 01/15] devlink: Add packet trap policers support
From: Ido Schimmel <idosch@...lanox.com>
Devices capable of offloading the kernel's datapath and perform
functions such as bridging and routing must also be able to send (trap)
specific packets to the kernel (i.e., the CPU) for processing.
For example, a device acting as a multicast-aware bridge must be able to
trap IGMP membership reports to the kernel for processing by the bridge
module.
In most cases, the underlying device is capable of handling packet rates
that are several orders of magnitude higher compared to those that can
be handled by the CPU.
Therefore, in order to prevent the underlying device from overwhelming
the CPU, devices usually include packet trap policers that are able to
police the trapped packets to rates that can be handled by the CPU.
This patch allows capable device drivers to register their supported
packet trap policers with devlink. User space can then tune the
parameters of these policer (currently, rate and burst size) and read
from the device the number of packets that were dropped by the policer,
if supported.
Subsequent patches in the series will allow device drivers to create
default binding between these policers and packet trap groups and allow
user space to change the binding.
Signed-off-by: Ido Schimmel <idosch@...lanox.com>
Reviewed-by: Jiri Pirko <jiri@...lanox.com>
---
include/net/devlink.h | 63 ++++++
include/uapi/linux/devlink.h | 11 +
net/core/devlink.c | 403 +++++++++++++++++++++++++++++++++++
3 files changed, 477 insertions(+)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 37230e23b5b0..66c52a2ef2cc 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -35,6 +35,7 @@ struct devlink {
struct devlink_dpipe_headers *dpipe_headers;
struct list_head trap_list;
struct list_head trap_group_list;
+ struct list_head trap_policer_list;
const struct devlink_ops *ops;
struct device *dev;
possible_net_t _net;
@@ -528,6 +529,21 @@ struct devlink_health_reporter_ops {
struct netlink_ext_ack *extack);
};
+/**
+ * struct devlink_trap_policer - Immutable packet trap policer attributes.
+ * @id: Policer identifier.
+ * @init_rate: Initial rate in packets / sec.
+ * @init_burst: Initial burst size in packets.
+ *
+ * Describes immutable attributes of packet trap policers that drivers register
+ * with devlink.
+ */
+struct devlink_trap_policer {
+ u32 id;
+ u64 init_rate;
+ u64 init_burst;
+};
+
/**
* struct devlink_trap_group - Immutable packet trap group attributes.
* @name: Trap group name.
@@ -725,6 +741,13 @@ enum devlink_trap_group_generic_id {
.generic = true, \
}
+#define DEVLINK_TRAP_POLICER(_id, _rate, _burst) \
+ { \
+ .id = _id, \
+ .init_rate = _rate, \
+ .init_burst = _burst, \
+ }
+
struct devlink_ops {
int (*reload_down)(struct devlink *devlink, bool netns_change,
struct netlink_ext_ack *extack);
@@ -821,6 +844,38 @@ struct devlink_ops {
*/
int (*trap_group_init)(struct devlink *devlink,
const struct devlink_trap_group *group);
+ /**
+ * @trap_policer_init: Trap policer initialization function.
+ *
+ * Should be used by device drivers to initialize the trap policer in
+ * the underlying device.
+ */
+ int (*trap_policer_init)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer);
+ /**
+ * @trap_policer_fini: Trap policer de-initialization function.
+ *
+ * Should be used by device drivers to de-initialize the trap policer
+ * in the underlying device.
+ */
+ void (*trap_policer_fini)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer);
+ /**
+ * @trap_policer_set: Trap policer parameters set function.
+ */
+ int (*trap_policer_set)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer,
+ u64 rate, u64 burst,
+ struct netlink_ext_ack *extack);
+ /**
+ * @trap_policer_counter_get: Trap policer counter get function.
+ *
+ * Should be used by device drivers to report number of packets dropped
+ * by the policer.
+ */
+ int (*trap_policer_counter_get)(struct devlink *devlink,
+ const struct devlink_trap_policer *policer,
+ u64 *p_drops);
};
static inline void *devlink_priv(struct devlink *devlink)
@@ -1064,6 +1119,14 @@ int devlink_trap_groups_register(struct devlink *devlink,
void devlink_trap_groups_unregister(struct devlink *devlink,
const struct devlink_trap_group *groups,
size_t groups_count);
+int
+devlink_trap_policers_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count);
+void
+devlink_trap_policers_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count);
#if IS_ENABLED(CONFIG_NET_DEVLINK)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index dfdffc42e87d..856b88ea4aa3 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -117,6 +117,11 @@ enum devlink_command {
DEVLINK_CMD_TRAP_GROUP_NEW,
DEVLINK_CMD_TRAP_GROUP_DEL,
+ DEVLINK_CMD_TRAP_POLICER_GET, /* can dump */
+ DEVLINK_CMD_TRAP_POLICER_SET,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ DEVLINK_CMD_TRAP_POLICER_DEL,
+
/* add new commands above here */
__DEVLINK_CMD_MAX,
DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -217,6 +222,7 @@ enum devlink_param_reset_dev_on_drv_probe_value {
enum {
DEVLINK_ATTR_STATS_RX_PACKETS, /* u64 */
DEVLINK_ATTR_STATS_RX_BYTES, /* u64 */
+ DEVLINK_ATTR_STATS_RX_DROPPED, /* u64 */
__DEVLINK_ATTR_STATS_MAX,
DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1
@@ -429,6 +435,11 @@ enum devlink_attr {
DEVLINK_ATTR_NETNS_FD, /* u32 */
DEVLINK_ATTR_NETNS_PID, /* u32 */
DEVLINK_ATTR_NETNS_ID, /* u32 */
+
+ DEVLINK_ATTR_TRAP_POLICER_ID, /* u32 */
+ DEVLINK_ATTR_TRAP_POLICER_RATE, /* u64 */
+ DEVLINK_ATTR_TRAP_POLICER_BURST, /* u64 */
+
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 73bb8fbe3393..63008d3785dc 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -5453,6 +5453,23 @@ struct devlink_stats {
struct u64_stats_sync syncp;
};
+/**
+ * struct devlink_trap_policer_item - Packet trap policer attributes.
+ * @policer: Immutable packet trap policer attributes.
+ * @rate: Rate in packets / sec.
+ * @burst: Burst size in packets.
+ * @list: trap_policer_list member.
+ *
+ * Describes packet trap policer attributes. Created by devlink during trap
+ * policer registration.
+ */
+struct devlink_trap_policer_item {
+ const struct devlink_trap_policer *policer;
+ u64 rate;
+ u64 burst;
+ struct list_head list;
+};
+
/**
* struct devlink_trap_group_item - Packet trap group attributes.
* @group: Immutable packet trap group attributes.
@@ -5489,6 +5506,19 @@ struct devlink_trap_item {
void *priv;
};
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+ if (policer_item->policer->id == id)
+ return policer_item;
+ }
+
+ return NULL;
+}
+
static struct devlink_trap_item *
devlink_trap_item_lookup(struct devlink *devlink, const char *name)
{
@@ -6026,6 +6056,221 @@ static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
return 0;
}
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ u32 id;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
+ return NULL;
+ id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+
+ return devlink_trap_policer_item_lookup(devlink, id);
+}
+
+static int
+devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct nlattr *attr;
+ u64 drops;
+ int err;
+
+ if (!devlink->ops->trap_policer_counter_get)
+ return 0;
+
+ err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops);
+ if (err)
+ return err;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int
+devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
+ policer_item->policer->id))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
+ policer_item->rate, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
+ policer_item->burst, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ err = devlink_trap_policer_stats_put(msg, devlink,
+ policer_item->policer);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_policer_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_policer_list))
+ return -EOPNOTSUPP;
+
+ policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+ if (!policer_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_policer_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_policer_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ enum devlink_command cmd = DEVLINK_CMD_TRAP_POLICER_NEW;
+ struct devlink_trap_policer_item *policer_item;
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(policer_item, &devlink->trap_policer_list,
+ list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_policer_fill(msg, devlink,
+ policer_item, cmd,
+ portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int
+devlink_trap_policer_set(struct devlink *devlink,
+ struct devlink_trap_policer_item *policer_item,
+ struct genl_info *info)
+{
+ struct nlattr **attrs = info->attrs;
+ u64 rate, burst;
+ int err;
+
+ rate = policer_item->rate;
+ burst = policer_item->burst;
+
+ if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE])
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]);
+
+ if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST])
+ burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
+
+ err = devlink->ops->trap_policer_set(devlink, policer_item->policer,
+ rate, burst, info->extack);
+ if (err)
+ return err;
+
+ policer_item->rate = rate;
+ policer_item->burst = burst;
+
+ return 0;
+}
+
+static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (list_empty(&devlink->trap_policer_list))
+ return -EOPNOTSUPP;
+
+ if (!devlink->ops->trap_policer_set)
+ return -EOPNOTSUPP;
+
+ policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+ if (!policer_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+
+ return devlink_trap_policer_set(devlink, policer_item, info);
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -6064,6 +6309,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -6391,6 +6639,19 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
+ .doit = devlink_nl_cmd_trap_policer_get_doit,
+ .dumpit = devlink_nl_cmd_trap_policer_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_SET,
+ .doit = devlink_nl_cmd_trap_policer_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -6438,6 +6699,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->reporter_list);
INIT_LIST_HEAD(&devlink->trap_list);
INIT_LIST_HEAD(&devlink->trap_group_list);
+ INIT_LIST_HEAD(&devlink->trap_policer_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
return devlink;
@@ -6522,6 +6784,7 @@ void devlink_free(struct devlink *devlink)
{
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
+ WARN_ON(!list_empty(&devlink->trap_policer_list));
WARN_ON(!list_empty(&devlink->trap_group_list));
WARN_ON(!list_empty(&devlink->trap_list));
WARN_ON(!list_empty(&devlink->reporter_list));
@@ -8317,6 +8580,146 @@ void devlink_trap_groups_unregister(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
+static void
+devlink_trap_policer_notify(struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
+ cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0,
+ 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int
+devlink_trap_policer_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct devlink_trap_policer_item *policer_item;
+ int err;
+
+ if (devlink_trap_policer_item_lookup(devlink, policer->id))
+ return -EEXIST;
+
+ policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
+ if (!policer_item)
+ return -ENOMEM;
+
+ policer_item->policer = policer;
+ policer_item->rate = policer->init_rate;
+ policer_item->burst = policer->init_burst;
+
+ if (devlink->ops->trap_policer_init) {
+ err = devlink->ops->trap_policer_init(devlink, policer);
+ if (err)
+ goto err_policer_init;
+ }
+
+ list_add_tail(&policer_item->list, &devlink->trap_policer_list);
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+
+ return 0;
+
+err_policer_init:
+ kfree(policer_item);
+ return err;
+}
+
+static void
+devlink_trap_policer_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer->id);
+ if (WARN_ON_ONCE(!policer_item))
+ return;
+
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+ list_del(&policer_item->list);
+ if (devlink->ops->trap_policer_fini)
+ devlink->ops->trap_policer_fini(devlink, policer);
+ kfree(policer_item);
+}
+
+/**
+ * devlink_trap_policers_register - Register packet trap policers with devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ *
+ * Return: Non-zero value on failure.
+ */
+int
+devlink_trap_policers_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
+{
+ int i, err;
+
+ mutex_lock(&devlink->lock);
+ for (i = 0; i < policers_count; i++) {
+ const struct devlink_trap_policer *policer = &policers[i];
+
+ if (WARN_ON(policer->id == 0)) {
+ err = -EINVAL;
+ goto err_trap_policer_verify;
+ }
+
+ err = devlink_trap_policer_register(devlink, policer);
+ if (err)
+ goto err_trap_policer_register;
+ }
+ mutex_unlock(&devlink->lock);
+
+ return 0;
+
+err_trap_policer_register:
+err_trap_policer_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_policer_unregister(devlink, &policers[i]);
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_policers_register);
+
+/**
+ * devlink_trap_policers_unregister - Unregister packet trap policers from devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ */
+void
+devlink_trap_policers_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
+{
+ int i;
+
+ mutex_lock(&devlink->lock);
+ for (i = policers_count - 1; i >= 0; i--)
+ devlink_trap_policer_unregister(devlink, &policers[i]);
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_policers_unregister);
+
static void __devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len)
{
--
2.24.1
Powered by blists - more mailing lists