linux-kernel - Re: [PATCH v18 05/19] ACPI:RAS2: Add ACPI RAS2 driver

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a562f557-0530-4e29-b0d0-78d4441e06e1@os.amperecomputing.com>
Date: Tue, 21 Jan 2025 15:01:07 -0800
From: Daniel Ferguson <danielf@...amperecomputing.com>
To: shiju.jose@...wei.com, linux-edac@...r.kernel.org,
 linux-cxl@...r.kernel.org, linux-acpi@...r.kernel.org, linux-mm@...ck.org,
 linux-kernel@...r.kernel.org
Cc: bp@...en8.de, tony.luck@...el.com, rafael@...nel.org, lenb@...nel.org,
 mchehab@...nel.org, dan.j.williams@...el.com, dave@...olabs.net,
 jonathan.cameron@...wei.com, dave.jiang@...el.com,
 alison.schofield@...el.com, vishal.l.verma@...el.com, ira.weiny@...el.com,
 david@...hat.com, Vilas.Sridharan@....com, leo.duran@....com,
 Yazen.Ghannam@....com, rientjes@...gle.com, jiaqiyan@...gle.com,
 Jon.Grimm@....com, dave.hansen@...ux.intel.com, naoya.horiguchi@....com,
 james.morse@....com, jthoughton@...gle.com, somasundaram.a@....com,
 erdemaktas@...gle.com, pgonda@...gle.com, duenwen@...gle.com,
 gthelen@...gle.com, wschwartz@...erecomputing.com,
 dferguson@...erecomputing.com, wbs@...amperecomputing.com,
 nifan.cxl@...il.com, tanxiaofei@...wei.com, prime.zeng@...ilicon.com,
 roberto.sassu@...wei.com, kangkang.shen@...urewei.com,
 wanghuiqiang@...wei.com, linuxarm@...wei.com
Subject: Re: [PATCH v18 05/19] ACPI:RAS2: Add ACPI RAS2 driver



On 1/6/2025 4:10 AM, shiju.jose@...wei.com wrote:
> +static int ras2_report_cap_error(u32 cap_status)
> +{
> +	switch (cap_status) {
> +	case ACPI_RAS2_NOT_VALID:
> +	case ACPI_RAS2_NOT_SUPPORTED:
> +		return -EPERM;
> +	case ACPI_RAS2_BUSY:
> +		return -EBUSY;
> +	case ACPI_RAS2_FAILED:
> +	case ACPI_RAS2_ABORTED:
> +	case ACPI_RAS2_INVALID_DATA:
> +		return -EINVAL;
> +	default: /* 0 or other, Success */
> +		return 0;
> +	}
> +}
> +
> +static int ras2_check_pcc_chan(struct ras2_pcc_subspace *pcc_subspace)
> +{
> +	struct acpi_ras2_shared_memory __iomem *generic_comm_base = pcc_subspace->pcc_comm_addr;
> +	ktime_t next_deadline = ktime_add(ktime_get(), pcc_subspace->deadline);
> +	u32 cap_status;
> +	u16 status;
> +	u32 ret;
> +
> +	while (!ktime_after(ktime_get(), next_deadline)) {
> +		/*
> +		 * As per ACPI spec, the PCC space will be initialized by
> +		 * platform and should have set the command completion bit when
> +		 * PCC can be used by OSPM
> +		 */
> +		status = readw_relaxed(&generic_comm_base->status);
> +		if (status & RAS2_PCC_CMD_ERROR) {
> +			cap_status = readw_relaxed(&generic_comm_base->set_capabilities_status);
> +			ret = ras2_report_cap_error(cap_status);

There is some new information:

The Scrub parameter block intends to get its own Status field, and the
set_capabilities_status field is being deprecated. This also causes a
revision bump in the spec.

See [1]

Assuming this change is ratified (not guaranteed, still pending):
This change implies that we cannot centrally decode errors, as is done
here and now. Instead error decoding must be done after some
feature-specific routine calls ras2_send_pcc_cmd. It should be the case
that each new feature, moving forward, will likely have their own status.

Please see my follow up comment on [PATCH v18 06/19]

---
[1] https://github.com/tianocore/edk2/issues/10540
---

> +
> +			status &= ~RAS2_PCC_CMD_ERROR;
> +			writew_relaxed(status, &generic_comm_base->status);
> +			return ret;
> +		}
> +		if (status & RAS2_PCC_CMD_COMPLETE)
> +			return 0;
> +		/*
> +		 * Reducing the bus traffic in case this loop takes longer than
> +		 * a few retries.
> +		 */
> +		msleep(10);
> +	}
> +
> +	return -EIO;
> +}
> +
> +/**
> + * ras2_send_pcc_cmd() - Send RAS2 command via PCC channel
> + * @ras2_ctx:	pointer to the RAS2 context structure
> + * @cmd:	command to send
> + *
> + * Returns: 0 on success, an error otherwise
> + */
> +int ras2_send_pcc_cmd(struct ras2_mem_ctx *ras2_ctx, u16 cmd)
> +{
> +	struct ras2_pcc_subspace *pcc_subspace = ras2_ctx->pcc_subspace;
> +	struct acpi_ras2_shared_memory *generic_comm_base = pcc_subspace->pcc_comm_addr;
> +	static ktime_t last_cmd_cmpl_time, last_mpar_reset;
> +	struct mbox_chan *pcc_channel;
> +	unsigned int time_delta;
> +	static int mpar_count;
> +	int ret;
> +
> +	guard(mutex)(&ras2_pcc_subspace_lock);
> +	ret = ras2_check_pcc_chan(pcc_subspace);
> +	if (ret < 0)
> +		return ret;
> +	pcc_channel = pcc_subspace->pcc_chan->mchan;
> +
> +	/*
> +	 * Handle the Minimum Request Turnaround Time(MRTT)
> +	 * "The minimum amount of time that OSPM must wait after the completion
> +	 * of a command before issuing the next command, in microseconds"
> +	 */
> +	if (pcc_subspace->pcc_mrtt) {
> +		time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time);
> +		if (pcc_subspace->pcc_mrtt > time_delta)
> +			udelay(pcc_subspace->pcc_mrtt - time_delta);
> +	}
> +
> +	/*
> +	 * Handle the non-zero Maximum Periodic Access Rate(MPAR)
> +	 * "The maximum number of periodic requests that the subspace channel can
> +	 * support, reported in commands per minute. 0 indicates no limitation."
> +	 *
> +	 * This parameter should be ideally zero or large enough so that it can
> +	 * handle maximum number of requests that all the cores in the system can
> +	 * collectively generate. If it is not, we will follow the spec and just
> +	 * not send the request to the platform after hitting the MPAR limit in
> +	 * any 60s window
> +	 */
> +	if (pcc_subspace->pcc_mpar) {
> +		if (mpar_count == 0) {
> +			time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset);
> +			if (time_delta < 60 * MSEC_PER_SEC) {
> +				dev_dbg(ras2_ctx->dev,
> +					"PCC cmd not sent due to MPAR limit");
> +				return -EIO;
> +			}
> +			last_mpar_reset = ktime_get();
> +			mpar_count = pcc_subspace->pcc_mpar;
> +		}
> +		mpar_count--;
> +	}
> +
> +	/* Write to the shared comm region. */
> +	writew_relaxed(cmd, &generic_comm_base->command);
> +
> +	/* Flip CMD COMPLETE bit */
> +	writew_relaxed(0, &generic_comm_base->status);
> +
> +	/* Ring doorbell */
> +	ret = mbox_send_message(pcc_channel, &cmd);
> +	if (ret < 0) {
> +		dev_err(ras2_ctx->dev,
> +			"Err sending PCC mbox message. cmd:%d, ret:%d\n",
> +			cmd, ret);
> +		return ret;
> +	}
> +
> +	/*
> +	 * If Minimum Request Turnaround Time is non-zero, we need
> +	 * to record the completion time of both READ and WRITE
> +	 * command for proper handling of MRTT, so we need to check
> +	 * for pcc_mrtt in addition to CMD_READ
> +	 */
> +	if (cmd == RAS2_PCC_CMD_EXEC || pcc_subspace->pcc_mrtt) {
> +		ret = ras2_check_pcc_chan(pcc_subspace);
> +		if (pcc_subspace->pcc_mrtt)
> +			last_cmd_cmpl_time = ktime_get();
> +	}
> +
> +	if (pcc_channel->mbox->txdone_irq)
> +		mbox_chan_txdone(pcc_channel, ret);
> +	else
> +		mbox_client_txdone(pcc_channel, ret);
> +
> +	return ret >= 0 ? 0 : ret;
> +}
> +EXPORT_SYMBOL_GPL(ras2_send_pcc_cmd);
> +
> +static int ras2_register_pcc_channel(struct ras2_mem_ctx *ras2_ctx, int pcc_subspace_id)
> +{
> +	struct ras2_pcc_subspace *pcc_subspace;
> +	struct pcc_mbox_chan *pcc_chan;
> +	struct mbox_client *mbox_cl;
> +
> +	if (pcc_subspace_id < 0)
> +		return -EINVAL;
> +
> +	mutex_lock(&ras2_pcc_subspace_lock);
> +	list_for_each_entry(pcc_subspace, &ras2_pcc_subspaces, elem) {
> +		if (pcc_subspace->pcc_subspace_id == pcc_subspace_id) {
> +			ras2_ctx->pcc_subspace = pcc_subspace;
> +			pcc_subspace->ref_count++;
> +			mutex_unlock(&ras2_pcc_subspace_lock);
> +			return 0;
> +		}
> +	}
> +	mutex_unlock(&ras2_pcc_subspace_lock);
> +
> +	pcc_subspace = kcalloc(1, sizeof(*pcc_subspace), GFP_KERNEL);
> +	if (!pcc_subspace)
> +		return -ENOMEM;
> +	mbox_cl = &pcc_subspace->mbox_client;
> +	mbox_cl->knows_txdone = true;
> +
> +	pcc_chan = pcc_mbox_request_channel(mbox_cl, pcc_subspace_id);
> +	if (IS_ERR(pcc_chan)) {
> +		kfree(pcc_subspace);
> +		return PTR_ERR(pcc_chan);
> +	}
> +	*pcc_subspace = (struct ras2_pcc_subspace) {
> +		.pcc_subspace_id = pcc_subspace_id,
> +		.pcc_chan = pcc_chan,
> +		.pcc_comm_addr = acpi_os_ioremap(pcc_chan->shmem_base_addr,
> +						 pcc_chan->shmem_size),
> +		.deadline = ns_to_ktime(RAS2_NUM_RETRIES *
> +					pcc_chan->latency *
> +					NSEC_PER_USEC),
> +		.pcc_mrtt = pcc_chan->min_turnaround_time,
> +		.pcc_mpar = pcc_chan->max_access_rate,
> +		.mbox_client = {
> +			.knows_txdone = true,
> +		},
> +		.pcc_channel_acquired = true,
> +	};
> +	mutex_lock(&ras2_pcc_subspace_lock);
> +	list_add(&pcc_subspace->elem, &ras2_pcc_subspaces);
> +	pcc_subspace->ref_count++;
> +	mutex_unlock(&ras2_pcc_subspace_lock);
> +	ras2_ctx->pcc_subspace = pcc_subspace;
> +	ras2_ctx->pcc_comm_addr = pcc_subspace->pcc_comm_addr;
> +	ras2_ctx->dev = pcc_chan->mchan->mbox->dev;
> +
> +	return 0;
> +}
> +
> +static DEFINE_IDA(ras2_ida);
> +static void ras2_remove_pcc(struct ras2_mem_ctx *ras2_ctx)
> +{
> +	struct ras2_pcc_subspace *pcc_subspace = ras2_ctx->pcc_subspace;
> +
> +	guard(mutex)(&ras2_pcc_subspace_lock);
> +	if (pcc_subspace->ref_count > 0)
> +		pcc_subspace->ref_count--;
> +	if (!pcc_subspace->ref_count) {
> +		list_del(&pcc_subspace->elem);
> +		pcc_mbox_free_channel(pcc_subspace->pcc_chan);
> +		kfree(pcc_subspace);
> +	}
> +}
> +
> +static void ras2_release(struct device *device)
> +{
> +	struct auxiliary_device *auxdev = container_of(device, struct auxiliary_device, dev);
> +	struct ras2_mem_ctx *ras2_ctx = container_of(auxdev, struct ras2_mem_ctx, adev);
> +
> +	ida_free(&ras2_ida, auxdev->id);
> +	ras2_remove_pcc(ras2_ctx);
> +	kfree(ras2_ctx);
> +}
> +
> +static struct ras2_mem_ctx *ras2_add_aux_device(char *name, int channel)
> +{
> +	struct ras2_mem_ctx *ras2_ctx;
> +	int id, ret;
> +
> +	ras2_ctx = kzalloc(sizeof(*ras2_ctx), GFP_KERNEL);
> +	if (!ras2_ctx)
> +		return ERR_PTR(-ENOMEM);
> +
> +	mutex_init(&ras2_ctx->lock);
> +
> +	ret = ras2_register_pcc_channel(ras2_ctx, channel);
> +	if (ret < 0) {
> +		pr_debug("failed to register pcc channel ret=%d\n", ret);
> +		goto ctx_free;
> +	}
> +
> +	id = ida_alloc(&ras2_ida, GFP_KERNEL);
> +	if (id < 0) {
> +		ret = id;
> +		goto pcc_free;
> +	}
> +	ras2_ctx->id = id;
> +	ras2_ctx->adev.id = id;
> +	ras2_ctx->adev.name = RAS2_MEM_DEV_ID_NAME;
> +	ras2_ctx->adev.dev.release = ras2_release;
> +	ras2_ctx->adev.dev.parent = ras2_ctx->dev;
> +
> +	ret = auxiliary_device_init(&ras2_ctx->adev);
> +	if (ret)
> +		goto ida_free;
> +
> +	ret = auxiliary_device_add(&ras2_ctx->adev);
> +	if (ret) {
> +		auxiliary_device_uninit(&ras2_ctx->adev);
> +		return ERR_PTR(ret);
> +	}
> +
> +	return ras2_ctx;
> +
> +ida_free:
> +	ida_free(&ras2_ida, id);
> +pcc_free:
> +	ras2_remove_pcc(ras2_ctx);
> +ctx_free:
> +	kfree(ras2_ctx);
> +	return ERR_PTR(ret);
> +}
> +
> +static int __init ras2_acpi_init(void)
> +{
> +	struct acpi_table_header *pAcpiTable = NULL;
> +	struct acpi_ras2_pcc_desc *pcc_desc_list;
> +	struct acpi_table_ras2 *pRas2Table;
> +	struct ras2_mem_ctx *ras2_ctx;
> +	int pcc_subspace_id;
> +	acpi_size ras2_size;
> +	acpi_status status;
> +	u8 count = 0, i;
> +	int ret = 0;
> +
> +	status = acpi_get_table("RAS2", 0, &pAcpiTable);
> +	if (ACPI_FAILURE(status) || !pAcpiTable) {
> +		pr_err("ACPI RAS2 driver failed to initialize, get table failed\n");
> +		return -EINVAL;
> +	}
> +
> +	ras2_size = pAcpiTable->length;
> +	if (ras2_size < sizeof(struct acpi_table_ras2)) {
> +		pr_err("ACPI RAS2 table present but broken (too short #1)\n");
> +		ret = -EINVAL;
> +		goto free_ras2_table;
> +	}
> +
> +	pRas2Table = (struct acpi_table_ras2 *)pAcpiTable;
> +	if (pRas2Table->num_pcc_descs <= 0) {
> +		pr_err("ACPI RAS2 table does not contain PCC descriptors\n");
> +		ret = -EINVAL;
> +		goto free_ras2_table;
> +	}
> +
> +	pcc_desc_list = (struct acpi_ras2_pcc_desc *)(pRas2Table + 1);
> +	/* Double scan for the case of only one actual controller */
> +	pcc_subspace_id = -1;
> +	count = 0;
> +	for (i = 0; i < pRas2Table->num_pcc_descs; i++, pcc_desc_list++) {
> +		if (pcc_desc_list->feature_type != RAS2_FEATURE_TYPE_MEMORY)
> +			continue;
> +		if (pcc_subspace_id == -1) {
> +			pcc_subspace_id = pcc_desc_list->channel_id;
> +			count++;
> +		}
> +		if (pcc_desc_list->channel_id != pcc_subspace_id)
> +			count++;
> +	}
> +	/*
> +	 * Workaround for the client platform with multiple scrub devices
> +	 * but uses single PCC subspace for communication.
> +	 */
> +	if (count == 1) {
> +		/* Add auxiliary device and bind ACPI RAS2 memory driver */
> +		ras2_ctx = ras2_add_aux_device(RAS2_MEM_DEV_ID_NAME, pcc_subspace_id);
> +		if (IS_ERR(ras2_ctx)) {
> +			ret = PTR_ERR(ras2_ctx);
> +			goto free_ras2_table;
> +		}
> +		acpi_put_table(pAcpiTable);
> +		return 0;
> +	}
> +
> +	pcc_desc_list = (struct acpi_ras2_pcc_desc *)(pRas2Table + 1);
> +	count = 0;
> +	for (i = 0; i < pRas2Table->num_pcc_descs; i++, pcc_desc_list++) {
> +		if (pcc_desc_list->feature_type != RAS2_FEATURE_TYPE_MEMORY)
> +			continue;
> +		pcc_subspace_id = pcc_desc_list->channel_id;
> +		/* Add auxiliary device and bind ACPI RAS2 memory driver */
> +		ras2_ctx = ras2_add_aux_device(RAS2_MEM_DEV_ID_NAME, pcc_subspace_id);
> +		if (IS_ERR(ras2_ctx)) {
> +			ret = PTR_ERR(ras2_ctx);
> +			goto free_ras2_table;
> +		}
> +	}
> +
> +free_ras2_table:
> +	acpi_put_table(pAcpiTable);
> +	return ret;
> +}
> +late_initcall(ras2_acpi_init)
> diff --git a/include/acpi/ras2_acpi.h b/include/acpi/ras2_acpi.h
> new file mode 100644
> index 000000000000..7b32407ae2af
> --- /dev/null
> +++ b/include/acpi/ras2_acpi.h
> @@ -0,0 +1,45 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * RAS2 ACPI driver header file
> + *
> + * (C) Copyright 2014, 2015 Hewlett-Packard Enterprises
> + *
> + * Copyright (c) 2024 HiSilicon Limited
> + */
> +
> +#ifndef _RAS2_ACPI_H
> +#define _RAS2_ACPI_H
> +
> +#include <linux/acpi.h>
> +#include <linux/auxiliary_bus.h>
> +#include <linux/mailbox_client.h>
> +#include <linux/mutex.h>
> +#include <linux/types.h>
> +
> +#define RAS2_PCC_CMD_COMPLETE	BIT(0)
> +#define RAS2_PCC_CMD_ERROR	BIT(2)
> +
> +/* RAS2 specific PCC commands */
> +#define RAS2_PCC_CMD_EXEC 0x01
> +
> +#define RAS2_AUX_DEV_NAME "ras2"
> +#define RAS2_MEM_DEV_ID_NAME "acpi_ras2_mem"
> +
> +/* Data structure RAS2 table */
> +struct ras2_mem_ctx {
> +	struct auxiliary_device adev;
> +	/* Lock to provide mutually exclusive access to PCC channel */
> +	struct mutex lock;
> +	int id;
> +	u8 instance;
> +	bool bg;
> +	u64 base, size;
> +	u8 scrub_cycle_hrs, min_scrub_cycle, max_scrub_cycle;
> +	struct device *dev;
> +	struct device *scrub_dev;
> +	void *pcc_subspace;
> +	struct acpi_ras2_shared_memory __iomem *pcc_comm_addr;
> +};


Could we break the ras2_mem_ctx structure up a little bit so that when
we add a new feature, it will be easier to add a new context?

In the following example, we show what it *might* look like if we add
another feature (Address Translation). But the ask here, is to split the
ras2_mem_ctx structure into two structures: ras2_mem_scrub_ctx  and
ras2_mem_ctx.

struct ras2_mem_address_translation_ctx {
	struct device *at_dev;
	...
};

struct ras2_mem_scrub_ctx {
	struct device *scrub_dev;
	bool bg;
	u64 base, size;
	u8 scrub_cycle_hrs, min_scrub_cycle, max_scrub_cycle;
};


/* Data structure RAS2 table */
struct ras2_mem_ctx {
	struct auxiliary_device adev;
	/* Lock to provide mutually exclusive access to PCC channel */
	struct mutex lock;
	int id;
	u8 instance;
	struct device *dev;
	void *pcc_subspace;
	struct acpi_ras2_shared_memory __iomem *pcc_comm_addr;

	struct ras2_mem_scrub_ctx scrub;
	struct ras2_mem_address_translation_ctx at;
};