netdev - Re: [PATCH v2 12/15] cxl: allow region creation by type2 drivers

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ab8b5f77-7a53-a716-a1e6-bf1f7e06eb50@amd.com>
Date: Mon, 19 Aug 2024 17:11:45 +0100
From: Alejandro Lucero Palau <alucerop@....com>
To: Jonathan Cameron <Jonathan.Cameron@...wei.com>,
 alejandro.lucero-palau@....com
Cc: linux-cxl@...r.kernel.org, netdev@...r.kernel.org,
 dan.j.williams@...el.com, martin.habets@...inx.com, edward.cree@....com,
 davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com,
 edumazet@...gle.com, richard.hughes@....com
Subject: Re: [PATCH v2 12/15] cxl: allow region creation by type2 drivers


On 8/4/24 19:29, Jonathan Cameron wrote:
> On Mon, 15 Jul 2024 18:28:32 +0100
> alejandro.lucero-palau@....com wrote:
>
>> From: Alejandro Lucero <alucerop@....com>
>>
>> Creating a CXL region requires userspace intervention through the cxl
>> sysfs files. Type2 support should allow accelerator drivers to create
>> such cxl region from kernel code.
>>
>> Adding that functionality and integrating it with current support for
>> memory expanders.
>>
>> Based on https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
>> Signed-off-by: Alejandro Lucero <alucerop@....com>
>> Signed-off-by: Dan Williams <dan.j.williams@...el.com>
> Needs a co-developed or similar given Dan didn't email this patch
> (which this sign off list suggests he did).


Yes, I'll fix it.


>
> I'll take another look at the locking, but my main comment is
> that it is really confusing so I have no idea if it's right.
> Consider different ways of breaking up the code you need
> to try and keep the locking obvious.


I have to agree and this means I need to work on it. I know it works for 
my case, what was my main focus for the RFC, but not looked at it with 
the right mindset.

I take your next comments as valuable inputs for the required work.

Thanks!


> Jonathan
>
>> +
>> +static ssize_t interleave_ways_store(struct device *dev,
>> +				     struct device_attribute *attr,
>> +				     const char *buf, size_t len)
>> +{
>> +	struct cxl_region *cxlr = to_cxl_region(dev);
>> +	unsigned int val;
>> +	int rc;
>> +
>> +	rc = kstrtouint(buf, 0, &val);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = down_write_killable(&cxl_region_rwsem);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = set_interleave_ways(cxlr, val);
>>   	up_write(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>>   	return len;
>>   }
>> +
> This was probably intentional. Common to group a macro like this
> with the function it is using by not having a blank line.
>>   static DEVICE_ATTR_RW(interleave_ways);
>>   
>>   static ssize_t interleave_granularity_show(struct device *dev,
>> @@ -547,21 +556,14 @@ static ssize_t interleave_granularity_show(struct device *dev,
>>   	return rc;
>>   }
>> +static ssize_t interleave_granularity_store(struct device *dev,
>> +					    struct device_attribute *attr,
>> +					    const char *buf, size_t len)
>> +{
>> +	struct cxl_region *cxlr = to_cxl_region(dev);
>> +	int rc, val;
>> +
>> +	rc = kstrtoint(buf, 0, &val);
>> +	if (rc)
>> +		return rc;
>> +
>>   	rc = down_write_killable(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> -		rc = -EBUSY;
>> -		goto out;
>> -	}
>>   
>> -	p->interleave_granularity = val;
>> -out:
>> +	rc = set_interleave_granularity(cxlr, val);
>>   	up_write(&cxl_region_rwsem);
>>   	if (rc)
>>   		return rc;
>>   	return len;
>>   }
>> +
> grump.
>
>>   static DEVICE_ATTR_RW(interleave_granularity);
>> +/* Establish an empty region covering the given HPA range */
>> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>> +					   struct cxl_endpoint_decoder *cxled)
>> +{
>> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> +	struct range *hpa = &cxled->cxld.hpa_range;
>> +	struct cxl_region_params *p;
>> +	struct cxl_region *cxlr;
>> +	struct resource *res;
>> +	int rc;
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled);
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>>   
>>   	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>   
>>   	res = kmalloc(sizeof(*res), GFP_KERNEL);
>>   	if (!res) {
>>   		rc = -ENOMEM;
>> -		goto err;
>> +		goto out;
>>   	}
>>   
>>   	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>>   				    dev_name(&cxlr->dev));
>> +
>>   	rc = insert_resource(cxlrd->res, res);
>>   	if (rc) {
>>   		/*
>> @@ -3412,6 +3462,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   			 __func__, dev_name(&cxlr->dev));
>>   	}
>>   
>> +	p = &cxlr->params;
>>   	p->res = res;
>>   	p->interleave_ways = cxled->cxld.interleave_ways;
>>   	p->interleave_granularity = cxled->cxld.interleave_granularity;
>> @@ -3419,24 +3470,124 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   
>>   	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
>>   	if (rc)
>> -		goto err;
>> +		goto out;
>>   
>>   	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
>> -		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
>> -		dev_name(&cxlr->dev), p->res, p->interleave_ways,
>> -		p->interleave_granularity);
>> +				   dev_name(&cxlmd->dev),
>> +				   dev_name(&cxled->cxld.dev), __func__,
>> +				   dev_name(&cxlr->dev), p->res,
>> +				   p->interleave_ways,
>> +				   p->interleave_granularity);
>>   
>>   	/* ...to match put_device() in cxl_add_to_region() */
>>   	get_device(&cxlr->dev);
>>   	up_write(&cxl_region_rwsem);
>> +out:
>> +	construct_region_end();
> two calls to up_write(&cxl_region_rwsem) next to each other?
>
>> +	if (rc) {
>> +		drop_region(cxlr);
>> +		return ERR_PTR(rc);
>> +	}
>> +	return cxlr;
>> +}
>> +
>> +static struct cxl_region *
>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>> +		       struct cxl_endpoint_decoder **cxled, int ways)
>> +{
>> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>> +	struct cxl_region_params *p;
>> +	resource_size_t size = 0;
>> +	struct cxl_region *cxlr;
>> +	int rc, i;
>> +
>> +	/* If interleaving is not supported, why does ways need to be at least 1? */
> I think 1 means no interleave. It's simpler to do this than have 0 and 1 both
> mean no interleave because 1 works for programmable decoders.
>
>> +	if (ways < 1)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled[0]);
> rethink how this broken up.  Taking the cxl_dpa_rwsem
> inside this function and is really hard to follow.  Ideally
> manage it with scoped_guard()
>
>
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>> +
>> +	rc = set_interleave_ways(cxlr, ways);
>> +	if (rc)
>> +		goto out;
>> +
>> +	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
>> +	if (rc)
> here I think cxl_dpa_rwsem is held.
>> +		goto out;
>> +
>> +	down_read(&cxl_dpa_rwsem);
>> +	for (i = 0; i < ways; i++) {
>> +		if (!cxled[i]->dpa_res)
>> +			break;
>> +		size += resource_size(cxled[i]->dpa_res);
>> +	}
>> +	up_read(&cxl_dpa_rwsem);
>> +
>> +	if (i < ways)
> but not here and they go to the same place.
>
>> +		goto out;
>> +
>> +	rc = alloc_hpa(cxlr, size);
>> +	if (rc)
>> +		goto out;
>> +
>> +	down_read(&cxl_dpa_rwsem);
>> +	for (i = 0; i < ways; i++) {
>> +		rc = cxl_region_attach(cxlr, cxled[i], i);
>> +		if (rc)
>> +			break;
>> +	}
>> +	up_read(&cxl_dpa_rwsem);
>> +
>> +	if (rc)
>> +		goto out;
>> +
>> +	rc = cxl_region_decode_commit(cxlr);
>> +	if (rc)
>> +		goto out;
>>   
>> +	p = &cxlr->params;
>> +	p->state = CXL_CONFIG_COMMIT;
>> +out:
>> +	construct_region_end();
>> +	if (rc) {
>> +		drop_region(cxlr);
>> +		return ERR_PTR(rc);
>> +	}
>>   	return cxlr;
>> +}
>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>> index a0e0795ec064..377bb3cd2d47 100644
>> --- a/drivers/cxl/cxlmem.h
>> +++ b/drivers/cxl/cxlmem.h
>> @@ -881,5 +881,7 @@ struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_port *endpoint,
>>   					       int interleave_ways,
>>   					       unsigned long flags,
>>   					       resource_size_t *max);
>> -
> Avoid whitespace noise.
>
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> +				     struct cxl_endpoint_decoder **cxled,
>> +				     int ways);
>>   #endif /* __CXL_MEM_H__ */