lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3546729d-d67d-fc93-5c03-fc864fd23b4e@intel.com>
Date:   Thu, 31 Aug 2023 10:28:09 -0700
From:   Dave Jiang <dave.jiang@...el.com>
To:     Ira Weiny <ira.weiny@...el.com>,
        Dan Williams <dan.j.williams@...el.com>
CC:     Navneet Singh <navneet.singh@...el.com>,
        Fan Ni <fan.ni@...sung.com>,
        Jonathan Cameron <Jonathan.Cameron@...wei.com>,
        Davidlohr Bueso <dave@...olabs.net>,
        Alison Schofield <alison.schofield@...el.com>,
        "Vishal Verma" <vishal.l.verma@...el.com>,
        <linux-cxl@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH RFC v2 10/18] cxl/mem: Handle DCD add and release capacity
 events.



On 8/28/23 22:21, Ira Weiny wrote:
> A Dynamic Capacity Device (DCD) utilizes events to signal the host about
> the changes to the allocation of Dynamic Capacity (DC) extents. The
> device communicates the state of DC extents through an extent list that
> describes the starting DPA, length, and meta data of the blocks the host
> can access.
> 
> Process the dynamic capacity add and release events.  The addition or
> removal of extents can occur at any time.  Adding asynchronous memory is
> straight forward.  Also remember the host is under no obligation to
> respond to a release event until it is done with the memory.  Introduce
> extent kref's to handle the delay of extent release.
> 
> In the case of a force removal, access to the memory will fail and may
> cause a crash.  However, the extent tracking object is preserved for the
> region to safely tear down as long as the memory is not accessed.
> 
> Signed-off-by: Navneet Singh <navneet.singh@...el.com>
> Co-developed-by: Ira Weiny <ira.weiny@...el.com>
> Signed-off-by: Ira Weiny <ira.weiny@...el.com>
> 
> ---
> changes for v2:
> [iweiny: Totally new version of the patch]
> [iweiny: use kref to track when to release an extent]
> [iweiny: rebased to latest master/type2 work]
> [iweiny: use a kref to track if extents are being referenced]
> [alison: align commit message paragraphs]
> [alison: remove unnecessary return]
> [iweiny: Adjust for the new __devm_cxl_add_dax_region()]
> [navneet: Fix debug prints in adding/releasing extent]
> [alison: deal with odd if/else logic]
> [alison: reverse x-tree]
> [alison: reverse x-tree]
> [alison: s/total_extent_cnt/count/]
> [alison: make handle event reverse x-tree]
> [alison: cleanup/shorten/remove handle event comment]
> [iweiny/Alison: refactor cxl_handle_dcd_event_records function]
> [iweiny: keep cxl_dc_extent_data local to mbox.c]
> [jonathan: eliminate 'rc']
> [iweiny: use proper type for mailbox size]
> [jonathan: put dc_extents on the stack]
> [jonathan: use direct returns instead of goto]
> [iweiny: Clean up comment]
> [Jonathan: define CXL_DC_EXTENT_TAG_LEN]
> [Jonathan: remove extraneous changes]
> [Jonathan: fix blank line issues]
> ---
>   drivers/cxl/core/mbox.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++-
>   drivers/cxl/cxl.h       |   9 +++
>   drivers/cxl/cxlmem.h    |  30 ++++++++
>   3 files changed, 224 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 9b08c40ef484..8474a28b16ca 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -839,6 +839,8 @@ static int cxl_store_dc_extent(struct cxl_memdev_state *mds,
>   	extent->length = le64_to_cpu(dc_extent->length);
>   	memcpy(extent->tag, dc_extent->tag, sizeof(extent->tag));
>   	extent->shared_extent_seq = le16_to_cpu(dc_extent->shared_extn_seq);
> +	kref_init(&extent->region_ref);
> +	extent->mds = mds;
>   
>   	dev_dbg(dev, "dynamic capacity extent DPA:0x%llx LEN:%llx\n",
>   		extent->dpa_start, extent->length);
> @@ -879,6 +881,14 @@ static const uuid_t mem_mod_event_uuid =
>   	UUID_INIT(0xfe927475, 0xdd59, 0x4339,
>   		  0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
>   
> +/*
> + * Dynamic Capacity Event Record
> + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
> + */
> +static const uuid_t dc_event_uuid =
> +	UUID_INIT(0xca95afa7, 0xf183, 0x4018, 0x8c,
> +		  0x2f, 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a);
> +
>   static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
>   				   enum cxl_event_log_type type,
>   				   struct cxl_event_record_raw *record)
> @@ -973,6 +983,171 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
>   	return rc;
>   }
>   
> +static int cxl_send_dc_cap_response(struct cxl_memdev_state *mds,
> +				struct cxl_mbox_dc_response *res,
> +				int extent_cnt, int opcode)
> +{
> +	struct cxl_mbox_cmd mbox_cmd;
> +	size_t size;
> +
> +	size = struct_size(res, extent_list, extent_cnt);
> +	res->extent_list_size = cpu_to_le32(extent_cnt);
> +
> +	mbox_cmd = (struct cxl_mbox_cmd) {
> +		.opcode = opcode,
> +		.size_in = size,
> +		.payload_in = res,
> +	};
> +
> +	return cxl_internal_send_cmd(mds, &mbox_cmd);
> +}
> +
> +static int cxl_prepare_ext_list(struct cxl_mbox_dc_response **res,
> +				int *n, struct range *extent)
> +{
> +	struct cxl_mbox_dc_response *dc_res;
> +	unsigned int size;
> +
> +	if (!extent)
> +		size = struct_size(dc_res, extent_list, 0);
> +	else
> +		size = struct_size(dc_res, extent_list, *n + 1);
> +
> +	dc_res = krealloc(*res, size, GFP_KERNEL);
> +	if (!dc_res)
> +		return -ENOMEM;
> +
> +	if (extent) {
> +		dc_res->extent_list[*n].dpa_start = cpu_to_le64(extent->start);
> +		memset(dc_res->extent_list[*n].reserved, 0, 8);
> +		dc_res->extent_list[*n].length = cpu_to_le64(range_len(extent));
> +		(*n)++;
> +	}
> +
> +	*res = dc_res;
> +	return 0;
> +}
> +
> +static void dc_extent_release(struct kref *kref)
> +{
> +	struct cxl_dc_extent_data *extent = container_of(kref,
> +						struct cxl_dc_extent_data,
> +						region_ref);
> +	struct cxl_memdev_state *mds = extent->mds;
> +	struct cxl_mbox_dc_response *dc_res = NULL;
> +	struct range rel_range = (struct range) {
> +		.start = extent->dpa_start,
> +		.end = extent->dpa_start + extent->length - 1,
> +	};
> +	struct device *dev = mds->cxlds.dev;
> +	int extent_cnt = 0, rc;
> +
> +	rc = cxl_prepare_ext_list(&dc_res, &extent_cnt, &rel_range);
> +	if (rc < 0) {
> +		dev_err(dev, "Failed to create release response %d\n", rc);
> +		goto free_extent;
> +	}
> +	rc = cxl_send_dc_cap_response(mds, dc_res, extent_cnt,
> +				      CXL_MBOX_OP_RELEASE_DC);
> +	kfree(dc_res);
> +
> +free_extent:
> +	kfree(extent);
> +}
> +
> +void cxl_dc_extent_put(struct cxl_dc_extent_data *extent)
> +{
> +	kref_put(&extent->region_ref, dc_extent_release);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_dc_extent_put, CXL);
> +
> +static int cxl_handle_dcd_release_event(struct cxl_memdev_state *mds,
> +					struct cxl_dc_extent *rel_extent)
> +{
> +	struct device *dev = mds->cxlds.dev;
> +	struct cxl_dc_extent_data *extent;
> +	resource_size_t dpa, size;
> +
> +	dpa = le64_to_cpu(rel_extent->start_dpa);
> +	size = le64_to_cpu(rel_extent->length);
> +	dev_dbg(dev, "Release DC extent DPA:0x%llx LEN:%llx\n",
> +		dpa, size);
> +
> +	extent = xa_erase(&mds->dc_extent_list, dpa);
> +	if (!extent) {
> +		dev_err(dev, "No extent found with DPA:0x%llx\n", dpa);
> +		return -EINVAL;
> +	}
> +	cxl_dc_extent_put(extent);
> +	return 0;
> +}
> +
> +static int cxl_handle_dcd_add_event(struct cxl_memdev_state *mds,
> +				    struct cxl_dc_extent *add_extent)
> +{
> +	struct cxl_mbox_dc_response *dc_res = NULL;
> +	struct range alloc_range, *resp_range;
> +	struct device *dev = mds->cxlds.dev;
> +	int extent_cnt = 0;
> +	int rc;
> +
> +	dev_dbg(dev, "Add DC extent DPA:0x%llx LEN:%llx\n",
> +		le64_to_cpu(add_extent->start_dpa),
> +		le64_to_cpu(add_extent->length));
> +
> +	alloc_range = (struct range){
> +		.start = le64_to_cpu(add_extent->start_dpa),
> +		.end = le64_to_cpu(add_extent->start_dpa) +
> +			le64_to_cpu(add_extent->length) - 1,
> +	};
> +	resp_range = &alloc_range;
> +
> +	rc = cxl_store_dc_extent(mds, add_extent);
> +	if (rc) {
> +		dev_dbg(dev, "unconsumed DC extent DPA:0x%llx LEN:%llx\n",
> +			le64_to_cpu(add_extent->start_dpa),
> +			le64_to_cpu(add_extent->length));
> +		resp_range = NULL;
> +	}
> +
> +	rc = cxl_prepare_ext_list(&dc_res, &extent_cnt, resp_range);
> +	if (rc < 0) {
> +		dev_err(dev, "Couldn't create extent list %d\n", rc);
> +		return rc;
> +	}
> +
> +	rc = cxl_send_dc_cap_response(mds, dc_res, extent_cnt,
> +				      CXL_MBOX_OP_ADD_DC_RESPONSE);
> +	kfree(dc_res);
> +	return rc;
> +}
> +
> +/* Returns 0 if the event was handled successfully. */
Is this comment necessary?

> +static int cxl_handle_dcd_event_records(struct cxl_memdev_state *mds,
> +					struct cxl_event_record_raw *rec)
> +{
> +	struct dcd_event_dyn_cap *record = (struct dcd_event_dyn_cap *)rec;
> +	uuid_t *id = &rec->hdr.id;
> +	int rc;
> +
> +	if (!uuid_equal(id, &dc_event_uuid))
> +		return -EINVAL;
> +
> +	switch (record->data.event_type) {
> +	case DCD_ADD_CAPACITY:
> +		rc = cxl_handle_dcd_add_event(mds, &record->data.extent);

Just return?
> +		break;
> +	case DCD_RELEASE_CAPACITY:
> +        case DCD_FORCED_CAPACITY_RELEASE:

Extra 2 spaces of indentation?

> +		rc = cxl_handle_dcd_release_event(mds, &record->data.extent);

Same here about return.

DJ

> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return rc;
> +}
> +
>   static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>   				    enum cxl_event_log_type type)
>   {
> @@ -1016,6 +1191,13 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>   				le16_to_cpu(payload->records[i].hdr.handle));
>   			cxl_event_trace_record(cxlmd, type,
>   					       &payload->records[i]);
> +			if (type == CXL_EVENT_TYPE_DCD) {
> +				rc = cxl_handle_dcd_event_records(mds,
> +								  &payload->records[i]);
> +				if (rc)
> +					dev_err_ratelimited(dev, "dcd event failed: %d\n",
> +							    rc);
> +			}
>   		}
>   
>   		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
> @@ -1056,6 +1238,8 @@ void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status)
>   		cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_WARN);
>   	if (status & CXLDEV_EVENT_STATUS_INFO)
>   		cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_INFO);
> +	if (status & CXLDEV_EVENT_STATUS_DCD)
> +		cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_DCD);
>   }
>   EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
>   
> @@ -1712,7 +1896,7 @@ static void cxl_destroy_mds(void *_mds)
>   
>   	xa_for_each(&mds->dc_extent_list, index, extent) {
>   		xa_erase(&mds->dc_extent_list, index);
> -		kfree(extent);
> +		cxl_dc_extent_put(extent);
>   	}
>   	xa_destroy(&mds->dc_extent_list);
>   }
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 0a225b0c20bf..81ca76ae1d02 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -163,6 +163,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
>   #define CXLDEV_EVENT_STATUS_WARN		BIT(1)
>   #define CXLDEV_EVENT_STATUS_FAIL		BIT(2)
>   #define CXLDEV_EVENT_STATUS_FATAL		BIT(3)
> +#define CXLDEV_EVENT_STATUS_DCD                 BIT(4)
>   
>   #define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO |	\
>   				 CXLDEV_EVENT_STATUS_WARN |	\
> @@ -601,6 +602,14 @@ struct cxl_pmem_region {
>   	struct cxl_pmem_region_mapping mapping[];
>   };
>   
> +/* See CXL 3.0 8.2.9.2.1.5 */
> +enum dc_event {
> +        DCD_ADD_CAPACITY,
> +        DCD_RELEASE_CAPACITY,
> +        DCD_FORCED_CAPACITY_RELEASE,
> +        DCD_REGION_CONFIGURATION_UPDATED,
> +};
> +
>   struct cxl_dax_region {
>   	struct device dev;
>   	struct cxl_region *cxlr;
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index ad690600c1b9..118392229174 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -582,6 +582,16 @@ enum cxl_opcode {
>   	UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19,     \
>   		  0x40, 0x3d, 0x86)
>   
> +struct cxl_mbox_dc_response {
> +	__le32 extent_list_size;
> +	u8 reserved[4];
> +	struct updated_extent_list {
> +		__le64 dpa_start;
> +		__le64 length;
> +		u8 reserved[8];
> +	} __packed extent_list[];
> +} __packed;
> +
>   struct cxl_mbox_get_supported_logs {
>   	__le16 entries;
>   	u8 rsvd[6];
> @@ -667,6 +677,7 @@ enum cxl_event_log_type {
>   	CXL_EVENT_TYPE_WARN,
>   	CXL_EVENT_TYPE_FAIL,
>   	CXL_EVENT_TYPE_FATAL,
> +	CXL_EVENT_TYPE_DCD,
>   	CXL_EVENT_TYPE_MAX
>   };
>   
> @@ -757,6 +768,8 @@ struct cxl_dc_extent_data {
>   	u64 length;
>   	u8 tag[CXL_DC_EXTENT_TAG_LEN];
>   	u16 shared_extent_seq;
> +	struct cxl_memdev_state *mds;
> +	struct kref region_ref;
>   };
>   
>   /*
> @@ -771,6 +784,21 @@ struct cxl_dc_extent {
>   	u8 reserved[6];
>   } __packed;
>   
> +struct dcd_record_data {
> +	u8 event_type;
> +	u8 reserved;
> +	__le16 host_id;
> +	u8 region_index;
> +	u8 reserved1[3];
> +	struct cxl_dc_extent extent;
> +	u8 reserved2[32];
> +} __packed;
> +
> +struct dcd_event_dyn_cap {
> +	struct cxl_event_record_hdr hdr;
> +	struct dcd_record_data data;
> +} __packed;
> +
>   struct cxl_mbox_get_partition_info {
>   	__le64 active_volatile_cap;
>   	__le64 active_persistent_cap;
> @@ -974,6 +1002,8 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
>   int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
>   int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
>   
> +void cxl_dc_extent_put(struct cxl_dc_extent_data *extent);
> +
>   #ifdef CONFIG_CXL_SUSPEND
>   void cxl_mem_active_inc(void);
>   void cxl_mem_active_dec(void);
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ