[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190120114959.GL2730@nanopsycho>
Date: Sun, 20 Jan 2019 12:49:59 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: Eran Ben Elisha <eranbe@...lanox.com>
Cc: netdev@...r.kernel.org, Jiri Pirko <jiri@...lanox.com>,
"David S. Miller" <davem@...emloft.net>,
Ariel Almog <ariela@...lanox.com>,
Aya Levin <ayal@...lanox.com>,
Moshe Shemesh <moshe@...lanox.com>
Subject: Re: [PATCH net-next v2 02/11] devlink: Add health reporter
create/destroy functionality
Thu, Jan 17, 2019 at 10:59:11PM CET, eranbe@...lanox.com wrote:
>Devlink health reporter is an instance for reporting, diagnosing and
>recovering from run time errors discovered by the reporters.
>Define it's data structure and supported operations.
>In addition, expose devlink API to create and destroy a reporter.
>Each devlink instance will hold it's own reporters list.
>
>As part of the allocation, driver shall provide a set of callbacks which
>will be used the devlink in order to handle health reports and user
>commands related to this reporter. In addition, driver is entitled to
>provide some priv pointer, which can be fetched from the reporter by
>devlink_health_reporter_priv function.
>
>For each reporter, devlink will hold a metadata of statistics,
>buffers and status.
>
>Signed-off-by: Eran Ben Elisha <eranbe@...lanox.com>
>Reviewed-by: Moshe Shemesh <moshe@...lanox.com>
>---
> include/net/devlink.h | 59 ++++++++++++++++++++
> net/core/devlink.c | 127 ++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 186 insertions(+)
>
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 77c77319290a..7fe30d67678a 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -30,6 +30,7 @@ struct devlink {
> struct list_head param_list;
> struct list_head region_list;
> u32 snapshot_id;
>+ struct list_head reporter_list;
> struct devlink_dpipe_headers *dpipe_headers;
> const struct devlink_ops *ops;
> struct device *dev;
>@@ -424,6 +425,34 @@ struct devlink_region;
> typedef void devlink_snapshot_data_dest_t(const void *data);
>
> struct devlink_health_buffer;
>+struct devlink_health_reporter;
>+
>+/**
>+ * struct devlink_health_reporter_ops - Reporter operations
>+ * @name: reporter name
>+ * dump_size: dump buffer size allocated by the devlink
>+ * diagnose_size: diagnose buffer size allocated by the devlink
>+ * recover: callback to recover from reported error
>+ * if priv_ctx is NULL, run a full recover
>+ * dump: callback to dump an object
>+ * if priv_ctx is NULL, run a full dump
>+ * diagnose: callback to diagnose the current status
>+ */
>+
>+struct devlink_health_reporter_ops {
>+ char *name;
>+ unsigned int dump_size;
>+ unsigned int diagnose_size;
>+ int (*recover)(struct devlink_health_reporter *reporter,
>+ void *priv_ctx);
>+ int (*dump)(struct devlink_health_reporter *reporter,
>+ struct devlink_health_buffer **buffers_array,
>+ unsigned int buffer_size, unsigned int num_buffers,
>+ void *priv_ctx);
>+ int (*diagnose)(struct devlink_health_reporter *reporter,
>+ struct devlink_health_buffer **buffers_array,
>+ unsigned int buffer_size, unsigned int num_buffers);
>+};
>
> struct devlink_ops {
> int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
>@@ -602,6 +631,16 @@ int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
> char *name);
> int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
> void *data, int len);
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+ const struct devlink_health_reporter_ops *ops,
>+ u64 graceful_period, bool auto_recover,
>+ void *priv);
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
> #else
>
> static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
>@@ -920,6 +959,26 @@ devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
> {
> return 0;
> }
>+
>+static inline struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+ const struct devlink_health_reporter_ops *ops,
>+ u64 graceful_period, bool auto_recover,
>+ void *priv)
>+{
>+ return NULL;
>+}
>+
>+static inline void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+}
>+
>+static inline void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+ return NULL;
>+}
> #endif
>
> #endif /* _NET_DEVLINK_H_ */
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index 8984501edade..fec169a28dba 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -4098,6 +4098,132 @@ devlink_health_buffer_snd(struct genl_info *info,
> return err;
> }
>
>+struct devlink_health_reporter {
>+ struct list_head list;
>+ struct devlink_health_buffer **dump_buffers_array;
>+ struct mutex dump_lock; /* lock parallel read/write from dump buffers */
>+ struct devlink_health_buffer **diagnose_buffers_array;
>+ struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */
>+ void *priv;
>+ const struct devlink_health_reporter_ops *ops;
>+ struct devlink *devlink;
>+ u64 graceful_period;
>+ bool auto_recover;
>+ u8 health_state;
>+};
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+ return reporter->priv;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
>+
>+static struct devlink_health_reporter *
>+devlink_health_reporter_find_by_name(struct devlink *devlink,
>+ const char *reporter_name)
>+{
>+ struct devlink_health_reporter *reporter;
>+
>+ list_for_each_entry(reporter, &devlink->reporter_list, list)
>+ if (!strcmp(reporter->ops->name, reporter_name))
>+ return reporter;
>+ return NULL;
>+}
>+
>+/**
>+ * devlink_health_reporter_create - create devlink health reporter
>+ *
>+ * @devlink: devlink
>+ * @ops: ops
>+ * @graceful_period: to avoid recovery loops, in msecs
>+ * @auto_recover: auto recover when error occurs
>+ * @priv: priv
>+ */
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+ const struct devlink_health_reporter_ops *ops,
>+ u64 graceful_period, bool auto_recover,
>+ void *priv)
>+{
>+ struct devlink_health_reporter *reporter;
>+
>+ mutex_lock(&devlink->lock);
>+ if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
>+ reporter = ERR_PTR(-EEXIST);
>+ goto unlock;
>+ }
>+
>+ if (WARN_ON(ops->dump && !ops->dump_size) ||
>+ WARN_ON(ops->diagnose && !ops->diagnose_size) ||
>+ WARN_ON(auto_recover && !ops->recover) ||
>+ WARN_ON(graceful_period && !ops->recover)) {
>+ reporter = ERR_PTR(-EINVAL);
>+ goto unlock;
>+ }
>+
>+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
>+ if (!reporter) {
>+ reporter = ERR_PTR(-ENOMEM);
>+ goto unlock;
>+ }
>+
>+ if (ops->dump) {
>+ reporter->dump_buffers_array =
>+ devlink_health_buffers_create(ops->dump_size);
>+ if (!reporter->dump_buffers_array) {
>+ kfree(reporter);
>+ reporter = ERR_PTR(-ENOMEM);
>+ goto unlock;
>+ }
>+ }
>+
>+ if (ops->diagnose) {
>+ reporter->diagnose_buffers_array =
>+ devlink_health_buffers_create(ops->diagnose_size);
>+ if (!reporter->diagnose_buffers_array) {
>+ devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+ DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));
This is just ugly. :/
As I wrote in the other email, should be converted to simple
"msg_ctx = msg_ctx_create(), msg_ctx_destroy(msg_ctx)", no sizes, no
buffers visible to the driver.
>+ kfree(reporter);
>+ reporter = ERR_PTR(-ENOMEM);
>+ goto unlock;
>+ }
>+ }
>+
>+ list_add_tail(&reporter->list, &devlink->reporter_list);
>+ mutex_init(&reporter->dump_lock);
>+ mutex_init(&reporter->diagnose_lock);
>+
>+ reporter->priv = priv;
>+ reporter->ops = ops;
>+ reporter->devlink = devlink;
>+ reporter->graceful_period = graceful_period;
>+ reporter->auto_recover = auto_recover;
>+unlock:
>+ mutex_unlock(&devlink->lock);
>+ return reporter;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
>+
>+/**
>+ * devlink_health_reporter_destroy - destroy devlink health reporter
>+ *
>+ * @reporter: devlink health reporter to destroy
>+ */
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+ mutex_lock(&reporter->devlink->lock);
>+ list_del(&reporter->list);
>+ devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+ DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
>+ devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
>+ DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
>+ kfree(reporter);
>+ mutex_unlock(&reporter->devlink->lock);
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
>+
> static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
> [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
> [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
>@@ -4383,6 +4509,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
> INIT_LIST_HEAD(&devlink->resource_list);
> INIT_LIST_HEAD(&devlink->param_list);
> INIT_LIST_HEAD(&devlink->region_list);
>+ INIT_LIST_HEAD(&devlink->reporter_list);
> mutex_init(&devlink->lock);
> return devlink;
> }
>--
>2.17.1
>
Powered by blists - more mailing lists