[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1497415496.2037.4.camel@gmail.com>
Date: Wed, 14 Jun 2017 14:44:56 +1000
From: Cyril Bur <cyrilbur@...il.com>
To: Shilpasri G Bhat <shilpa.bhat@...ux.vnet.ibm.com>,
linuxppc-dev@...ts.ozlabs.org
Cc: linux-kernel@...r.kernel.org, benh@...nel.crashing.org,
paulus@...ba.org, mpe@...erman.id.au, svaidy@...ux.vnet.ibm.com,
ego@...ux.vnet.ibm.com
Subject: Re: [PATCH V2 2/2] powerpc/powernv : Add support for OPAL-OCC
command/response interface
On Tue, 2017-06-13 at 23:26 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
>
Hi Shilpasri,
I have another question about printing of errors, see below. Otherwise
looks better.
When you're dropping the locks I wonder why you couldn't just use
atomic_set().
Finally, as you say below, if a user does a read() without having done
a write(), there isn't really any guarantee as to what they get. They
will get the previous response if there had been a write() call, what
about if there had never been a write() call?
Could this be considered a security problem? Imagine that one program
is using the file descriptor and a malicious program keeps trying to
get the file descriptor and then the program which has the file
descriptor crashes? What information might the malicious program get?
More inline,
Cyril
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@...ux.vnet.ibm.com>
> ---
> Changes from V2:
> - Remove spinlock and use atomic_t for setting and clearing flags
> - Fix endian swapping
> - Use pa() and va() before and after opal call for accessing buffer
> data
> - Replace (u8 *) with __be64 for buffer pointers
> - User reads the previous OCC response if the user does a read()
> before a write(). Is this wrong?
> - Add WARN_ON check for nr_occs > 254
>
> arch/powerpc/include/asm/opal-api.h | 41 +++-
> arch/powerpc/include/asm/opal.h | 3 +
> arch/powerpc/platforms/powernv/Makefile | 2 +-
> arch/powerpc/platforms/powernv/opal-occ.c | 314 +++++++++++++++++++++++++
> arch/powerpc/platforms/powernv/opal-wrappers.S | 1 +
> arch/powerpc/platforms/powernv/opal.c | 8 +
> 6 files changed, 367 insertions(+), 2 deletions(-)
> create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
>
> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index cb3e624..011d86c 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,10 @@
> #define OPAL_I2C_STOP_ERR -24
> #define OPAL_XIVE_PROVISIONING -31
> #define OPAL_XIVE_FREE_ACTIVE -32
> +#define OPAL_OCC_INVALID_STATE -33
> +#define OPAL_OCC_BUSY -34
> +#define OPAL_OCC_CMD_TIMEOUT -35
> +#define OPAL_OCC_RSP_MISMATCH -36
>
> /* API Tokens (in r0) */
> #define OPAL_INVALID_CALL -1
> @@ -190,7 +194,8 @@
> #define OPAL_NPU_INIT_CONTEXT 146
> #define OPAL_NPU_DESTROY_CONTEXT 147
> #define OPAL_NPU_MAP_LPAR 148
> -#define OPAL_LAST 148
> +#define OPAL_OCC_COMMAND 149
> +#define OPAL_LAST 149
>
> /* Device tree flags */
>
> @@ -829,6 +834,40 @@ struct opal_prd_msg_header {
>
> struct opal_prd_msg;
>
> +enum occ_cmd {
> + OCC_CMD_AMESTER_PASS_THRU = 0,
> + OCC_CMD_CLEAR_SENSOR_DATA,
> + OCC_CMD_SET_POWER_CAP,
> + OCC_CMD_SET_POWER_SHIFTING_RATIO,
> + OCC_CMD_SELECT_SENSOR_GROUPS,
> + OCC_CMD_LAST
> +};
> +
> +struct opal_occ_cmd_rsp_msg {
> + __be64 cdata;
> + __be64 rdata;
> + __be16 cdata_size;
> + __be16 rdata_size;
> + u8 cmd;
> + u8 request_id;
> + u8 status;
> +};
> +
> +struct opal_occ_cmd_data {
> + __be16 size;
> + u8 cmd;
> + u8 data[];
> +};
> +
> +struct opal_occ_rsp_data {
> + __be16 size;
> + u8 status;
> + u8 data[];
> +};
> +
> +#define MAX_OPAL_CMD_DATA_LENGTH 4090
> +#define MAX_OCC_RSP_DATA_LENGTH 8698
> +
> #define OCC_RESET 0
> #define OCC_LOAD 1
> #define OCC_THROTTLE 2
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 03ed493..e55ed79 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>
> void opal_wake_poller(void);
>
> +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
> + bool retry);
> +
> #endif /* __ASSEMBLY__ */
>
> #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..f5f0902 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
> obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
> obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
> obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
> -obj-y += opal-kmsg.o
> +obj-y += opal-kmsg.o opal-occ.o
>
> obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
> obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
> diff --git a/arch/powerpc/platforms/powernv/opal-occ.c b/arch/powerpc/platforms/powernv/opal-occ.c
> new file mode 100644
> index 0000000..912cdc4
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-occ.c
> @@ -0,0 +1,314 @@
> +/*
> + * Copyright IBM Corporation 2017
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +
> +#define pr_fmt(fmt) "opal-occ: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/miscdevice.h>
> +#include <linux/highmem.h>
> +#include <linux/slab.h>
> +#include <asm/opal.h>
> +
> +struct occ {
> + struct miscdevice dev;
> + struct opal_occ_rsp_data *rsp;
> + atomic_t session;
> + atomic_t cmd_in_progress;
> + int id;
> + u8 last_token;
> +} *occs;
> +static int nr_occs;
> +
> +static int __send_occ_command(struct opal_occ_cmd_rsp_msg *msg,
> + int chip_id, bool retry)
> +{
> + struct opal_msg async_msg;
> + int rc;
> +
> + if (!retry) {
> + msg->cdata = cpu_to_be64(__pa(msg->cdata));
> + msg->cdata_size = cpu_to_be16(msg->cdata_size);
> + msg->rdata = cpu_to_be64(__pa(msg->rdata));
> + }
> +
> + rc = opal_occ_command(chip_id, msg, retry);
> + if (rc == OPAL_ASYNC_COMPLETION) {
> + rc = opal_async_wait_response(msg->request_id, &async_msg);
> + if (rc) {
> + pr_info("Failed to wait for async response %d\n", rc);
> + return rc;
> + }
> + } else if (rc) {
> + pr_info("Failed to send opal_occ_command %d\n", rc);
> + return rc;
> + }
> +
> + rc = opal_get_async_rc(async_msg);
> + if (rc) {
> + pr_info("opal_occ_command failed with %d\n", rc);
> + return rc;
> + }
If you get a timeout or a response mismatch you're going to retry. Is
this because you expect this to happen generally in the course of using
this interface - if so, should you print a message or would it be best
to say nothing? Also is pr_info() a good level for these?
> +
> + msg->rdata_size = be16_to_cpu(msg->rdata_size);
> + msg->rdata = be64_to_cpu(__va(msg->rdata));
> + msg->cdata_size = be16_to_cpu(msg->cdata_size);
> + msg->cdata = be64_to_cpu(__va(msg->cdata));
> +
> + if (msg->rdata_size > MAX_OCC_RSP_DATA_LENGTH) {
> + pr_info("Opal sent bigger data, clipping to the max response size\n");
> + msg->rdata_size = MAX_OCC_RSP_DATA_LENGTH;
> + }
> +
> + return rc;
> +}
> +
> +static int send_occ_command(struct opal_occ_cmd_rsp_msg *msg, struct occ *occ)
> +{
> + int token, rc;
> +
> + token = opal_async_get_unique_token_interruptible(occ->last_token);
> + if (token < 0) {
> + pr_info("Failed to get the request_id/token for command %d (%d)\n",
> + msg->cmd, token);
> + return token;
> + }
> +
> + msg->request_id = token;
> + rc = __send_occ_command(msg, occ->id, false);
> +
> + switch (rc) {
> + case OPAL_OCC_CMD_TIMEOUT:
> + case OPAL_OCC_RSP_MISMATCH:
> + occ->last_token = token;
> + opal_async_release_token(token);
> + token = opal_async_get_unique_token_interruptible(token);
> + if (token < 0) {
> + pr_info("Failed to get the request_id/token for retry command %d (%d)\n",
> + msg->cmd, token);
> +
> + return opal_error_code(rc);
> + }
> +
> + msg->request_id = token;
> + rc = __send_occ_command(msg, occ->id, true);
> + break;
> + default:
> + break;
> + }
> +
> + occ->last_token = token;
> + opal_async_release_token(token);
> + return opal_error_code(rc);
> +}
> +
> +static int opal_occ_cmd_prepare(struct opal_occ_cmd_data *cmd, struct occ *occ)
> +{
> + struct opal_occ_cmd_rsp_msg msg;
> + int rc;
> +
> + msg.cmd = cmd->cmd;
> + msg.cdata_size = cmd->size;
> + msg.rdata = (u64)occ->rsp->data;
> + msg.cdata = (u64)cmd->data;
I'm surprised this doesn't throw (gcc) warnings, I certainly expect it
will throw sparse warnings.
You've done the right thing and annotated your struct
opal_occ_cmd_rsp_msg with __be64 which is what skiboot expects,
however, at this point it is still in CPU endian and it can get
confusing. If a type is marked as a specific endian, it should *always*
be in that endian. In this case it may (depending on what the CPU
endian is) be in the wrong endian until we get to __send_occ_command()
where you do the conversion. Is there any reason you can't convert them
here?
> + rc = send_occ_command(&msg, occ);
> + if (rc)
> + return rc;
> +
> + occ->rsp->size = msg.rdata_size;
> + occ->rsp->status = msg.status;
> +
> + return rc;
> +}
> +
> +static ssize_t opal_occ_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> + struct opal_occ_cmd_data *cmd;
> + int rc;
> +
> + if (count < sizeof(*cmd))
> + return -EINVAL;
> +
> + if (atomic_xchg(&occ->cmd_in_progress, 1) == 1)
> + return -EBUSY;
> +
> + cmd = kmalloc(count, GFP_KERNEL);
> + if (!cmd) {
> + rc = -ENOMEM;
> + goto out;
> + }
> +
> + rc = copy_from_user(cmd, buf, count);
> + if (rc) {
> + pr_err("Failed to copy OCC command request message\n");
> + rc = -EFAULT;
> + goto free_cmd;
> + }
> +
> + if (cmd->size > MAX_OPAL_CMD_DATA_LENGTH) {
> + rc = -EINVAL;
> + goto free_cmd;
> + }
> +
> + rc = opal_occ_cmd_prepare(cmd, occ);
> + if (!rc)
> + rc = count;
> +
> +free_cmd:
> + kfree(cmd);
> +out:
> + atomic_xchg(&occ->cmd_in_progress, 0);
> + return rc;
> +}
> +
> +static ssize_t opal_occ_read(struct file *file, char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> + int rc;
> +
> + if (count < sizeof(*occ->rsp) + occ->rsp->size)
> + return -EINVAL;
> +
> + rc = copy_to_user((void __user *)buf, occ->rsp,
> + sizeof(occ->rsp) + occ->rsp->size);
> + if (rc) {
> + pr_err("Failed to copy OCC response data to user\n");
> + return rc;
> + }
> +
> + return sizeof(*occ->rsp) + occ->rsp->size;
> +}
> +
> +static int opal_occ_open(struct inode *inode, struct file *file)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> +
> + if (atomic_xchg(&occ->session, 1) == 1)
> + return -EBUSY;
> +
> + return 0;
> +}
> +
> +static int opal_occ_release(struct inode *inode, struct file *file)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> +
> + atomic_xchg(&occ->session, 0);
> +
> + return 0;
> +}
> +
> +static const struct file_operations opal_occ_fops = {
> + .open = opal_occ_open,
> + .read = opal_occ_read,
> + .write = opal_occ_write,
> + .release = opal_occ_release,
> + .owner = THIS_MODULE,
> +};
> +
> +static int opal_occ_probe(struct platform_device *pdev)
> +{
> + unsigned int chip[256];
> + unsigned int cpu;
> + unsigned int prev_chip_id = UINT_MAX;
> + int i, rc;
> +
> + for_each_possible_cpu(cpu) {
> + unsigned int id = cpu_to_chip_id(cpu);
> +
> + if (prev_chip_id != id) {
> + prev_chip_id = id;
> + chip[nr_occs++] = id;
> + WARN_ON(nr_occs > 254);
Perhaps have some MAX_POSSIBLE_CHIPS or something? Also might I suggest
WARN_ON_ONCE just because if there's no point printing the warning more
than once but this code could print it a bunch of times...
> + }
> + }
> +
> + occs = kcalloc(nr_occs, sizeof(*occs), GFP_KERNEL);
> + if (!occs)
> + return -ENOMEM;
> +
> + for (i = 0; i < nr_occs; i++) {
> + char name[10];
> +
> + occs[i].id = chip[i];
> + occs[i].dev.minor = MISC_DYNAMIC_MINOR;
> + snprintf(name, 10, "occ%d", chip[i]);
> + occs[i].dev.name = name;
> + occs[i].dev.fops = &opal_occ_fops;
> + occs[i].last_token = -1;
> + occs[i].rsp = kmalloc(sizeof(occs[i].rsp) +
> + MAX_OCC_RSP_DATA_LENGTH,
> + GFP_KERNEL);
> + if (!occs[i].rsp) {
> + rc = -ENOMEM;
> + goto free_occs;
> + }
> +
> + rc = misc_register(&occs[i].dev);
> + if (rc)
> + goto free_occ_rsp_data;
> + }
> +
> + return 0;
> +
> +free_occ_rsp_data:
> + kfree(occs[i].rsp);
> +free_occs:
> + while (--i >= 0) {
> + kfree(occs[i].rsp);
> + misc_deregister(&occs[i].dev);
> + }
> + kfree(occs);
> +
> + return rc;
> +}
> +
> +static int opal_occ_remove(struct platform_device *pdev)
> +{
> + int i;
> +
> + for (i = 0; i < nr_occs; i++) {
> + kfree(occs[i].rsp);
> + misc_deregister(&occs[i].dev);
> + }
> +
> + kfree(occs);
> + return 0;
> +}
> +
> +static const struct of_device_id opal_occ_match[] = {
> + { .compatible = "ibm,opal-occ-cmd-rsp-interface" },
> + { },
> +};
> +
> +static struct platform_driver opal_occ_driver = {
> + .driver = {
> + .name = "opal-occ",
> + .of_match_table = opal_occ_match,
> + },
> + .probe = opal_occ_probe,
> + .remove = opal_occ_remove,
> +};
> +
> +module_platform_driver(opal_occ_driver);
> +
> +MODULE_DESCRIPTION("PowerNV OPAL-OCC driver");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
> index f620572..e6bf18d 100644
> --- a/arch/powerpc/platforms/powernv/opal-wrappers.S
> +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
> @@ -310,3 +310,4 @@ OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
> OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
> OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
> OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
> +OPAL_CALL(opal_occ_command, OPAL_OCC_COMMAND);
> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 59684b4..d87c61b 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -815,6 +815,9 @@ static int __init opal_init(void)
> opal_pdev_init("ibm,opal-flash");
> opal_pdev_init("ibm,opal-prd");
>
> + /* Initialize platform device: OCC_OPAL command-response interface */
> + opal_pdev_init("ibm,opal-occ-cmd-rsp-interface");
> +
> /* Initialise platform device: oppanel interface */
> opal_pdev_init("ibm,opal-oppanel");
>
> @@ -859,6 +862,7 @@ void opal_shutdown(void)
> EXPORT_SYMBOL_GPL(opal_flash_write);
> EXPORT_SYMBOL_GPL(opal_flash_erase);
> EXPORT_SYMBOL_GPL(opal_prd_msg);
> +EXPORT_SYMBOL_GPL(opal_occ_command);
>
> /* Convert a region of vmalloc memory to an opal sg list */
> struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
> @@ -937,6 +941,10 @@ int opal_error_code(int rc)
> case OPAL_UNSUPPORTED: return -EIO;
> case OPAL_HARDWARE: return -EIO;
> case OPAL_INTERNAL_ERROR: return -EIO;
> + case OPAL_OCC_BUSY: return -EBUSY;
> + case OPAL_OCC_INVALID_STATE:
> + case OPAL_OCC_CMD_TIMEOUT:
> + case OPAL_OCC_RSP_MISMATCH: return -EIO;
> default:
> pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
> return -EIO;
Powered by blists - more mailing lists