[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <6ee036c7-f4ea-4e42-faad-66a1921553ce@linux.ibm.com>
Date: Wed, 4 Mar 2020 10:25:15 +0100
From: Frederic Barrat <fbarrat@...ux.ibm.com>
To: "Alastair D'Silva" <alastair@....ibm.com>, alastair@...ilva.org
Cc: "Aneesh Kumar K . V" <aneesh.kumar@...ux.ibm.com>,
"Oliver O'Halloran" <oohall@...il.com>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Paul Mackerras <paulus@...ba.org>,
Michael Ellerman <mpe@...erman.id.au>,
Andrew Donnellan <ajd@...ux.ibm.com>,
Arnd Bergmann <arnd@...db.de>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Dan Williams <dan.j.williams@...el.com>,
Vishal Verma <vishal.l.verma@...el.com>,
Dave Jiang <dave.jiang@...el.com>,
Ira Weiny <ira.weiny@...el.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Mauro Carvalho Chehab <mchehab+samsung@...nel.org>,
"David S. Miller" <davem@...emloft.net>,
Rob Herring <robh@...nel.org>,
Anton Blanchard <anton@...abs.org>,
Krzysztof Kozlowski <krzk@...nel.org>,
Mahesh Salgaonkar <mahesh@...ux.vnet.ibm.com>,
Madhavan Srinivasan <maddy@...ux.vnet.ibm.com>,
Cédric Le Goater <clg@...d.org>,
Anju T Sudhakar <anju@...ux.vnet.ibm.com>,
Hari Bathini <hbathini@...ux.ibm.com>,
Thomas Gleixner <tglx@...utronix.de>,
Greg Kurz <groug@...d.org>,
Nicholas Piggin <npiggin@...il.com>,
Masahiro Yamada <yamada.masahiro@...ionext.com>,
Alexey Kardashevskiy <aik@...abs.ru>,
linux-kernel@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
linux-nvdimm@...ts.01.org, linux-mm@...ck.org
Subject: Re: [PATCH v3 19/27] powerpc/powernv/pmem: Add an IOCTL to report
controller statistics
Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <alastair@...ilva.org>
>
> The controller can report a number of statistics that are useful
> in evaluating the performance and reliability of the card.
>
> This patch exposes this information via an IOCTL.
>
> Signed-off-by: Alastair D'Silva <alastair@...ilva.org>
> ---
> arch/powerpc/platforms/powernv/pmem/ocxl.c | 185 +++++++++++++++++++++
> include/uapi/nvdimm/ocxl-pmem.h | 17 ++
> 2 files changed, 202 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 2cabafe1fc58..009d4fd29e7d 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -758,6 +758,186 @@ static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
> GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
> }
>
> +/**
> + * controller_stats_header_parse() - Parse the first 64 bits of the controller stats admin command response
> + * @ocxlpmem: the device metadata
> + * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
> + */
> +static int controller_stats_header_parse(struct ocxlpmem *ocxlpmem,
> + u32 *length)
> +{
> + int rc;
> + u64 val;
> +
unexpected empty line
> + u16 data_identifier;
> + u32 data_length;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + return rc;
> +
> + data_identifier = val >> 48;
> + data_length = val & 0xFFFFFFFF;
> +
> + if (data_identifier != 0x4353) { // 'CS'
> + dev_err(&ocxlpmem->dev,
> + "Bad data identifier for controller stats, expected 'CS', got '%-.*s'\n",
> + 2, (char *)&data_identifier);
Wow, I'm clueless what that string format looks like :-)
2 arguments? Did you check the kernel string formatter does what you want?
You may consider unifying the format though, the error log patch uses a
simpler (better?) format for a similar message.
> + return -EINVAL;
> + }
> +
> + *length = data_length;
> + return 0;
> +}
> +
> +static int ioctl_controller_stats(struct ocxlpmem *ocxlpmem,
> + struct ioctl_ocxl_pmem_controller_stats __user *uarg)
> +{
> + struct ioctl_ocxl_pmem_controller_stats args;
> + u32 length;
> + int rc;
> + u64 val;
> +
> + memset(&args, '\0', sizeof(args));
> +
> + mutex_lock(&ocxlpmem->admin_command.lock);
> +
> + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_STATS);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.request_offset + 0x08,
> + OCXL_LITTLE_ENDIAN, 0);
> + if (rc)
> + goto out;
> +
> + rc = admin_command_execute(ocxlpmem);
> + if (rc)
> + goto out;
> +
> +
> + rc = admin_command_complete_timeout(ocxlpmem,
> + ADMIN_COMMAND_CONTROLLER_STATS);
> + if (rc < 0) {
> + dev_warn(&ocxlpmem->dev, "Controller stats timed out\n");
> + goto out;
> + }
> +
> + rc = admin_response(ocxlpmem);
> + if (rc < 0)
> + goto out;
> + if (rc != STATUS_SUCCESS) {
> + warn_status(ocxlpmem,
> + "Unexpected status from controller stats", rc);
> + goto out;
> + }
All those ioctls commands follow the same pattern:
1. admin_command_request()
2. optionnaly, set some mmio registers specific to the command
3. admin_command_execute()
4. admin_command_complete_timeout()
5. admin_response()
By swapping 1 and 2, we could then factorize steps 1, 3, 4 and 5 in a
function and simplify/shorten the code each time a command is called.
Regarding step 2 (and that's true for all similar patches), a comment
about what the mmio tuning does would help and avoid looking up the
spec. Looking up the spec during the review is expected, but it will
ease reading the code 6 months from now.
> +
> + rc = controller_stats_header_parse(ocxlpmem, &length);
> + if (rc)
> + goto out;
> +
> + if (length != 0x140)
> + warn_status(ocxlpmem,
> + "Unexpected length for controller stats data, expected 0x140, got 0x%x",
> + length);
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x08,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + goto out;
> +
> + args.reset_count = val >> 32;
> + args.reset_uptime = val & 0xFFFFFFFF;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x10,
> + OCXL_LITTLE_ENDIAN, &val);
> + if (rc)
> + goto out;
> +
> + args.power_on_uptime = val >> 32;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x08,
> + OCXL_LITTLE_ENDIAN, &args.host_load_count);
Those offsets are hard to understand, even with the spec next to me. And
it seems that we could harden things a bit:
each block as a "statistics parameter ID" and the length of the data for
that block. We should check that and make sure we're reading what we expect.
For example, from the spec I'm looking (110d), I would expect the host
load count to be at offset 0x10. It's entirely possible I'm misreading
it though.
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x10,
> + OCXL_LITTLE_ENDIAN, &args.host_store_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x18,
> + OCXL_LITTLE_ENDIAN, &args.media_read_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x20,
> + OCXL_LITTLE_ENDIAN, &args.media_write_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x28,
> + OCXL_LITTLE_ENDIAN, &args.cache_hit_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x30,
> + OCXL_LITTLE_ENDIAN, &args.cache_miss_count);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x38,
> + OCXL_LITTLE_ENDIAN, &args.media_read_latency);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x40,
> + OCXL_LITTLE_ENDIAN, &args.media_write_latency);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x48,
> + OCXL_LITTLE_ENDIAN, &args.cache_read_latency);
> + if (rc)
> + goto out;
> +
> + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> + ocxlpmem->admin_command.data_offset + 0x08 + 0x40 + 0x50,
> + OCXL_LITTLE_ENDIAN, &args.cache_write_latency);
> + if (rc)
> + goto out;
> +
> + if (copy_to_user(uarg, &args, sizeof(args))) {
> + rc = -EFAULT;
> + goto out;
> + }
> +
> + rc = admin_response_handled(ocxlpmem);
> + if (rc)
> + goto out;
> +
> + rc = 0;
> + goto out;
That may be more of a personal habit, but that final goto disrupts the
"good case" flow. And I think it's pretty unusual within the kernel.
> +
> +out:
> + mutex_unlock(&ocxlpmem->admin_command.lock);
> + return rc;
> +}
> +
> static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> {
> struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -781,6 +961,11 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> rc = ioctl_controller_dump_complete(ocxlpmem);
> break;
> +
> + case IOCTL_OCXL_PMEM_CONTROLLER_STATS:
> + rc = ioctl_controller_stats(ocxlpmem,
> + (struct ioctl_ocxl_pmem_controller_stats __user *)args);
> + break;
> }
>
> return rc;
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> index d4d8512d03f7..add223aa2fdb 100644
> --- a/include/uapi/nvdimm/ocxl-pmem.h
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -50,6 +50,22 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> __u64 reserved[8];
> };
>
> +struct ioctl_ocxl_pmem_controller_stats {
> + __u32 reset_count;
> + __u32 reset_uptime; /* seconds */
> + __u32 power_on_uptime; /* seconds */
Same as before, we're going to have some padding here.
Fred
> + __u64 host_load_count;
> + __u64 host_store_count;
> + __u64 media_read_count;
> + __u64 media_write_count;
> + __u64 cache_hit_count;
> + __u64 cache_miss_count;
> + __u64 media_read_latency; /* nanoseconds */
> + __u64 media_write_latency; /* nanoseconds */
> + __u64 cache_read_latency; /* nanoseconds */
> + __u64 cache_write_latency; /* nanoseconds */
> +};
> +
> /* ioctl numbers */
> #define OCXL_PMEM_MAGIC 0x5C
> /* SCM devices */
> @@ -57,5 +73,6 @@ struct ioctl_ocxl_pmem_controller_dump_data {
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP _IO(OCXL_PMEM_MAGIC, 0x02)
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA _IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> #define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE _IO(OCXL_PMEM_MAGIC, 0x04)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_STATS _IO(OCXL_PMEM_MAGIC, 0x05)
>
> #endif /* _UAPI_OCXL_SCM_H */
>
Powered by blists - more mailing lists