[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a562f557-0530-4e29-b0d0-78d4441e06e1@os.amperecomputing.com>
Date: Tue, 21 Jan 2025 15:01:07 -0800
From: Daniel Ferguson <danielf@...amperecomputing.com>
To: shiju.jose@...wei.com, linux-edac@...r.kernel.org,
linux-cxl@...r.kernel.org, linux-acpi@...r.kernel.org, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Cc: bp@...en8.de, tony.luck@...el.com, rafael@...nel.org, lenb@...nel.org,
mchehab@...nel.org, dan.j.williams@...el.com, dave@...olabs.net,
jonathan.cameron@...wei.com, dave.jiang@...el.com,
alison.schofield@...el.com, vishal.l.verma@...el.com, ira.weiny@...el.com,
david@...hat.com, Vilas.Sridharan@....com, leo.duran@....com,
Yazen.Ghannam@....com, rientjes@...gle.com, jiaqiyan@...gle.com,
Jon.Grimm@....com, dave.hansen@...ux.intel.com, naoya.horiguchi@....com,
james.morse@....com, jthoughton@...gle.com, somasundaram.a@....com,
erdemaktas@...gle.com, pgonda@...gle.com, duenwen@...gle.com,
gthelen@...gle.com, wschwartz@...erecomputing.com,
dferguson@...erecomputing.com, wbs@...amperecomputing.com,
nifan.cxl@...il.com, tanxiaofei@...wei.com, prime.zeng@...ilicon.com,
roberto.sassu@...wei.com, kangkang.shen@...urewei.com,
wanghuiqiang@...wei.com, linuxarm@...wei.com
Subject: Re: [PATCH v18 05/19] ACPI:RAS2: Add ACPI RAS2 driver
On 1/6/2025 4:10 AM, shiju.jose@...wei.com wrote:
> +static int ras2_report_cap_error(u32 cap_status)
> +{
> + switch (cap_status) {
> + case ACPI_RAS2_NOT_VALID:
> + case ACPI_RAS2_NOT_SUPPORTED:
> + return -EPERM;
> + case ACPI_RAS2_BUSY:
> + return -EBUSY;
> + case ACPI_RAS2_FAILED:
> + case ACPI_RAS2_ABORTED:
> + case ACPI_RAS2_INVALID_DATA:
> + return -EINVAL;
> + default: /* 0 or other, Success */
> + return 0;
> + }
> +}
> +
> +static int ras2_check_pcc_chan(struct ras2_pcc_subspace *pcc_subspace)
> +{
> + struct acpi_ras2_shared_memory __iomem *generic_comm_base = pcc_subspace->pcc_comm_addr;
> + ktime_t next_deadline = ktime_add(ktime_get(), pcc_subspace->deadline);
> + u32 cap_status;
> + u16 status;
> + u32 ret;
> +
> + while (!ktime_after(ktime_get(), next_deadline)) {
> + /*
> + * As per ACPI spec, the PCC space will be initialized by
> + * platform and should have set the command completion bit when
> + * PCC can be used by OSPM
> + */
> + status = readw_relaxed(&generic_comm_base->status);
> + if (status & RAS2_PCC_CMD_ERROR) {
> + cap_status = readw_relaxed(&generic_comm_base->set_capabilities_status);
> + ret = ras2_report_cap_error(cap_status);
There is some new information:
The Scrub parameter block intends to get its own Status field, and the
set_capabilities_status field is being deprecated. This also causes a
revision bump in the spec.
See [1]
Assuming this change is ratified (not guaranteed, still pending):
This change implies that we cannot centrally decode errors, as is done
here and now. Instead error decoding must be done after some
feature-specific routine calls ras2_send_pcc_cmd. It should be the case
that each new feature, moving forward, will likely have their own status.
Please see my follow up comment on [PATCH v18 06/19]
---
[1] https://github.com/tianocore/edk2/issues/10540
---
> +
> + status &= ~RAS2_PCC_CMD_ERROR;
> + writew_relaxed(status, &generic_comm_base->status);
> + return ret;
> + }
> + if (status & RAS2_PCC_CMD_COMPLETE)
> + return 0;
> + /*
> + * Reducing the bus traffic in case this loop takes longer than
> + * a few retries.
> + */
> + msleep(10);
> + }
> +
> + return -EIO;
> +}
> +
> +/**
> + * ras2_send_pcc_cmd() - Send RAS2 command via PCC channel
> + * @ras2_ctx: pointer to the RAS2 context structure
> + * @cmd: command to send
> + *
> + * Returns: 0 on success, an error otherwise
> + */
> +int ras2_send_pcc_cmd(struct ras2_mem_ctx *ras2_ctx, u16 cmd)
> +{
> + struct ras2_pcc_subspace *pcc_subspace = ras2_ctx->pcc_subspace;
> + struct acpi_ras2_shared_memory *generic_comm_base = pcc_subspace->pcc_comm_addr;
> + static ktime_t last_cmd_cmpl_time, last_mpar_reset;
> + struct mbox_chan *pcc_channel;
> + unsigned int time_delta;
> + static int mpar_count;
> + int ret;
> +
> + guard(mutex)(&ras2_pcc_subspace_lock);
> + ret = ras2_check_pcc_chan(pcc_subspace);
> + if (ret < 0)
> + return ret;
> + pcc_channel = pcc_subspace->pcc_chan->mchan;
> +
> + /*
> + * Handle the Minimum Request Turnaround Time(MRTT)
> + * "The minimum amount of time that OSPM must wait after the completion
> + * of a command before issuing the next command, in microseconds"
> + */
> + if (pcc_subspace->pcc_mrtt) {
> + time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time);
> + if (pcc_subspace->pcc_mrtt > time_delta)
> + udelay(pcc_subspace->pcc_mrtt - time_delta);
> + }
> +
> + /*
> + * Handle the non-zero Maximum Periodic Access Rate(MPAR)
> + * "The maximum number of periodic requests that the subspace channel can
> + * support, reported in commands per minute. 0 indicates no limitation."
> + *
> + * This parameter should be ideally zero or large enough so that it can
> + * handle maximum number of requests that all the cores in the system can
> + * collectively generate. If it is not, we will follow the spec and just
> + * not send the request to the platform after hitting the MPAR limit in
> + * any 60s window
> + */
> + if (pcc_subspace->pcc_mpar) {
> + if (mpar_count == 0) {
> + time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset);
> + if (time_delta < 60 * MSEC_PER_SEC) {
> + dev_dbg(ras2_ctx->dev,
> + "PCC cmd not sent due to MPAR limit");
> + return -EIO;
> + }
> + last_mpar_reset = ktime_get();
> + mpar_count = pcc_subspace->pcc_mpar;
> + }
> + mpar_count--;
> + }
> +
> + /* Write to the shared comm region. */
> + writew_relaxed(cmd, &generic_comm_base->command);
> +
> + /* Flip CMD COMPLETE bit */
> + writew_relaxed(0, &generic_comm_base->status);
> +
> + /* Ring doorbell */
> + ret = mbox_send_message(pcc_channel, &cmd);
> + if (ret < 0) {
> + dev_err(ras2_ctx->dev,
> + "Err sending PCC mbox message. cmd:%d, ret:%d\n",
> + cmd, ret);
> + return ret;
> + }
> +
> + /*
> + * If Minimum Request Turnaround Time is non-zero, we need
> + * to record the completion time of both READ and WRITE
> + * command for proper handling of MRTT, so we need to check
> + * for pcc_mrtt in addition to CMD_READ
> + */
> + if (cmd == RAS2_PCC_CMD_EXEC || pcc_subspace->pcc_mrtt) {
> + ret = ras2_check_pcc_chan(pcc_subspace);
> + if (pcc_subspace->pcc_mrtt)
> + last_cmd_cmpl_time = ktime_get();
> + }
> +
> + if (pcc_channel->mbox->txdone_irq)
> + mbox_chan_txdone(pcc_channel, ret);
> + else
> + mbox_client_txdone(pcc_channel, ret);
> +
> + return ret >= 0 ? 0 : ret;
> +}
> +EXPORT_SYMBOL_GPL(ras2_send_pcc_cmd);
> +
> +static int ras2_register_pcc_channel(struct ras2_mem_ctx *ras2_ctx, int pcc_subspace_id)
> +{
> + struct ras2_pcc_subspace *pcc_subspace;
> + struct pcc_mbox_chan *pcc_chan;
> + struct mbox_client *mbox_cl;
> +
> + if (pcc_subspace_id < 0)
> + return -EINVAL;
> +
> + mutex_lock(&ras2_pcc_subspace_lock);
> + list_for_each_entry(pcc_subspace, &ras2_pcc_subspaces, elem) {
> + if (pcc_subspace->pcc_subspace_id == pcc_subspace_id) {
> + ras2_ctx->pcc_subspace = pcc_subspace;
> + pcc_subspace->ref_count++;
> + mutex_unlock(&ras2_pcc_subspace_lock);
> + return 0;
> + }
> + }
> + mutex_unlock(&ras2_pcc_subspace_lock);
> +
> + pcc_subspace = kcalloc(1, sizeof(*pcc_subspace), GFP_KERNEL);
> + if (!pcc_subspace)
> + return -ENOMEM;
> + mbox_cl = &pcc_subspace->mbox_client;
> + mbox_cl->knows_txdone = true;
> +
> + pcc_chan = pcc_mbox_request_channel(mbox_cl, pcc_subspace_id);
> + if (IS_ERR(pcc_chan)) {
> + kfree(pcc_subspace);
> + return PTR_ERR(pcc_chan);
> + }
> + *pcc_subspace = (struct ras2_pcc_subspace) {
> + .pcc_subspace_id = pcc_subspace_id,
> + .pcc_chan = pcc_chan,
> + .pcc_comm_addr = acpi_os_ioremap(pcc_chan->shmem_base_addr,
> + pcc_chan->shmem_size),
> + .deadline = ns_to_ktime(RAS2_NUM_RETRIES *
> + pcc_chan->latency *
> + NSEC_PER_USEC),
> + .pcc_mrtt = pcc_chan->min_turnaround_time,
> + .pcc_mpar = pcc_chan->max_access_rate,
> + .mbox_client = {
> + .knows_txdone = true,
> + },
> + .pcc_channel_acquired = true,
> + };
> + mutex_lock(&ras2_pcc_subspace_lock);
> + list_add(&pcc_subspace->elem, &ras2_pcc_subspaces);
> + pcc_subspace->ref_count++;
> + mutex_unlock(&ras2_pcc_subspace_lock);
> + ras2_ctx->pcc_subspace = pcc_subspace;
> + ras2_ctx->pcc_comm_addr = pcc_subspace->pcc_comm_addr;
> + ras2_ctx->dev = pcc_chan->mchan->mbox->dev;
> +
> + return 0;
> +}
> +
> +static DEFINE_IDA(ras2_ida);
> +static void ras2_remove_pcc(struct ras2_mem_ctx *ras2_ctx)
> +{
> + struct ras2_pcc_subspace *pcc_subspace = ras2_ctx->pcc_subspace;
> +
> + guard(mutex)(&ras2_pcc_subspace_lock);
> + if (pcc_subspace->ref_count > 0)
> + pcc_subspace->ref_count--;
> + if (!pcc_subspace->ref_count) {
> + list_del(&pcc_subspace->elem);
> + pcc_mbox_free_channel(pcc_subspace->pcc_chan);
> + kfree(pcc_subspace);
> + }
> +}
> +
> +static void ras2_release(struct device *device)
> +{
> + struct auxiliary_device *auxdev = container_of(device, struct auxiliary_device, dev);
> + struct ras2_mem_ctx *ras2_ctx = container_of(auxdev, struct ras2_mem_ctx, adev);
> +
> + ida_free(&ras2_ida, auxdev->id);
> + ras2_remove_pcc(ras2_ctx);
> + kfree(ras2_ctx);
> +}
> +
> +static struct ras2_mem_ctx *ras2_add_aux_device(char *name, int channel)
> +{
> + struct ras2_mem_ctx *ras2_ctx;
> + int id, ret;
> +
> + ras2_ctx = kzalloc(sizeof(*ras2_ctx), GFP_KERNEL);
> + if (!ras2_ctx)
> + return ERR_PTR(-ENOMEM);
> +
> + mutex_init(&ras2_ctx->lock);
> +
> + ret = ras2_register_pcc_channel(ras2_ctx, channel);
> + if (ret < 0) {
> + pr_debug("failed to register pcc channel ret=%d\n", ret);
> + goto ctx_free;
> + }
> +
> + id = ida_alloc(&ras2_ida, GFP_KERNEL);
> + if (id < 0) {
> + ret = id;
> + goto pcc_free;
> + }
> + ras2_ctx->id = id;
> + ras2_ctx->adev.id = id;
> + ras2_ctx->adev.name = RAS2_MEM_DEV_ID_NAME;
> + ras2_ctx->adev.dev.release = ras2_release;
> + ras2_ctx->adev.dev.parent = ras2_ctx->dev;
> +
> + ret = auxiliary_device_init(&ras2_ctx->adev);
> + if (ret)
> + goto ida_free;
> +
> + ret = auxiliary_device_add(&ras2_ctx->adev);
> + if (ret) {
> + auxiliary_device_uninit(&ras2_ctx->adev);
> + return ERR_PTR(ret);
> + }
> +
> + return ras2_ctx;
> +
> +ida_free:
> + ida_free(&ras2_ida, id);
> +pcc_free:
> + ras2_remove_pcc(ras2_ctx);
> +ctx_free:
> + kfree(ras2_ctx);
> + return ERR_PTR(ret);
> +}
> +
> +static int __init ras2_acpi_init(void)
> +{
> + struct acpi_table_header *pAcpiTable = NULL;
> + struct acpi_ras2_pcc_desc *pcc_desc_list;
> + struct acpi_table_ras2 *pRas2Table;
> + struct ras2_mem_ctx *ras2_ctx;
> + int pcc_subspace_id;
> + acpi_size ras2_size;
> + acpi_status status;
> + u8 count = 0, i;
> + int ret = 0;
> +
> + status = acpi_get_table("RAS2", 0, &pAcpiTable);
> + if (ACPI_FAILURE(status) || !pAcpiTable) {
> + pr_err("ACPI RAS2 driver failed to initialize, get table failed\n");
> + return -EINVAL;
> + }
> +
> + ras2_size = pAcpiTable->length;
> + if (ras2_size < sizeof(struct acpi_table_ras2)) {
> + pr_err("ACPI RAS2 table present but broken (too short #1)\n");
> + ret = -EINVAL;
> + goto free_ras2_table;
> + }
> +
> + pRas2Table = (struct acpi_table_ras2 *)pAcpiTable;
> + if (pRas2Table->num_pcc_descs <= 0) {
> + pr_err("ACPI RAS2 table does not contain PCC descriptors\n");
> + ret = -EINVAL;
> + goto free_ras2_table;
> + }
> +
> + pcc_desc_list = (struct acpi_ras2_pcc_desc *)(pRas2Table + 1);
> + /* Double scan for the case of only one actual controller */
> + pcc_subspace_id = -1;
> + count = 0;
> + for (i = 0; i < pRas2Table->num_pcc_descs; i++, pcc_desc_list++) {
> + if (pcc_desc_list->feature_type != RAS2_FEATURE_TYPE_MEMORY)
> + continue;
> + if (pcc_subspace_id == -1) {
> + pcc_subspace_id = pcc_desc_list->channel_id;
> + count++;
> + }
> + if (pcc_desc_list->channel_id != pcc_subspace_id)
> + count++;
> + }
> + /*
> + * Workaround for the client platform with multiple scrub devices
> + * but uses single PCC subspace for communication.
> + */
> + if (count == 1) {
> + /* Add auxiliary device and bind ACPI RAS2 memory driver */
> + ras2_ctx = ras2_add_aux_device(RAS2_MEM_DEV_ID_NAME, pcc_subspace_id);
> + if (IS_ERR(ras2_ctx)) {
> + ret = PTR_ERR(ras2_ctx);
> + goto free_ras2_table;
> + }
> + acpi_put_table(pAcpiTable);
> + return 0;
> + }
> +
> + pcc_desc_list = (struct acpi_ras2_pcc_desc *)(pRas2Table + 1);
> + count = 0;
> + for (i = 0; i < pRas2Table->num_pcc_descs; i++, pcc_desc_list++) {
> + if (pcc_desc_list->feature_type != RAS2_FEATURE_TYPE_MEMORY)
> + continue;
> + pcc_subspace_id = pcc_desc_list->channel_id;
> + /* Add auxiliary device and bind ACPI RAS2 memory driver */
> + ras2_ctx = ras2_add_aux_device(RAS2_MEM_DEV_ID_NAME, pcc_subspace_id);
> + if (IS_ERR(ras2_ctx)) {
> + ret = PTR_ERR(ras2_ctx);
> + goto free_ras2_table;
> + }
> + }
> +
> +free_ras2_table:
> + acpi_put_table(pAcpiTable);
> + return ret;
> +}
> +late_initcall(ras2_acpi_init)
> diff --git a/include/acpi/ras2_acpi.h b/include/acpi/ras2_acpi.h
> new file mode 100644
> index 000000000000..7b32407ae2af
> --- /dev/null
> +++ b/include/acpi/ras2_acpi.h
> @@ -0,0 +1,45 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * RAS2 ACPI driver header file
> + *
> + * (C) Copyright 2014, 2015 Hewlett-Packard Enterprises
> + *
> + * Copyright (c) 2024 HiSilicon Limited
> + */
> +
> +#ifndef _RAS2_ACPI_H
> +#define _RAS2_ACPI_H
> +
> +#include <linux/acpi.h>
> +#include <linux/auxiliary_bus.h>
> +#include <linux/mailbox_client.h>
> +#include <linux/mutex.h>
> +#include <linux/types.h>
> +
> +#define RAS2_PCC_CMD_COMPLETE BIT(0)
> +#define RAS2_PCC_CMD_ERROR BIT(2)
> +
> +/* RAS2 specific PCC commands */
> +#define RAS2_PCC_CMD_EXEC 0x01
> +
> +#define RAS2_AUX_DEV_NAME "ras2"
> +#define RAS2_MEM_DEV_ID_NAME "acpi_ras2_mem"
> +
> +/* Data structure RAS2 table */
> +struct ras2_mem_ctx {
> + struct auxiliary_device adev;
> + /* Lock to provide mutually exclusive access to PCC channel */
> + struct mutex lock;
> + int id;
> + u8 instance;
> + bool bg;
> + u64 base, size;
> + u8 scrub_cycle_hrs, min_scrub_cycle, max_scrub_cycle;
> + struct device *dev;
> + struct device *scrub_dev;
> + void *pcc_subspace;
> + struct acpi_ras2_shared_memory __iomem *pcc_comm_addr;
> +};
Could we break the ras2_mem_ctx structure up a little bit so that when
we add a new feature, it will be easier to add a new context?
In the following example, we show what it *might* look like if we add
another feature (Address Translation). But the ask here, is to split the
ras2_mem_ctx structure into two structures: ras2_mem_scrub_ctx and
ras2_mem_ctx.
struct ras2_mem_address_translation_ctx {
struct device *at_dev;
...
};
struct ras2_mem_scrub_ctx {
struct device *scrub_dev;
bool bg;
u64 base, size;
u8 scrub_cycle_hrs, min_scrub_cycle, max_scrub_cycle;
};
/* Data structure RAS2 table */
struct ras2_mem_ctx {
struct auxiliary_device adev;
/* Lock to provide mutually exclusive access to PCC channel */
struct mutex lock;
int id;
u8 instance;
struct device *dev;
void *pcc_subspace;
struct acpi_ras2_shared_memory __iomem *pcc_comm_addr;
struct ras2_mem_scrub_ctx scrub;
struct ras2_mem_address_translation_ctx at;
};
Powered by blists - more mailing lists