lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5FBEB4F9.3060008@oracle.com>
Date:   Wed, 25 Nov 2020 11:48:09 -0800
From:   si-wei liu <si-wei.liu@...cle.com>
To:     "Michael S. Tsirkin" <mst@...hat.com>
CC:     jasowang@...hat.com, lingshan.zhu@...el.com,
        joao.m.martins@...cle.com, boris.ostrovsky@...cle.com,
        linux-kernel@...r.kernel.org,
        virtualization@...ts.linux-foundation.org
Subject: Re: [PATCH v3] vhost-vdpa: fix page pinning leakage in error path
 (rework)


On 11/25/2020 1:30 AM, Michael S. Tsirkin wrote:
> On Thu, Nov 05, 2020 at 06:26:33PM -0500, Si-Wei Liu wrote:
>> Pinned pages are not properly accounted particularly when
>> mapping error occurs on IOTLB update. Clean up dangling
>> pinned pages for the error path.
>>
>> The memory usage for bookkeeping pinned pages is reverted
>> to what it was before: only one single free page is needed.
>> This helps reduce the host memory demand for VM with a large
>> amount of memory, or in the situation where host is running
>> short of free memory.
>>
>> Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
>> Signed-off-by: Si-Wei Liu <si-wei.liu@...cle.com>
>
> Not sure which tree this is against, I had to apply this with
> minor tweaks. Pls take a look at the vhost tree and
> let me know whether it looks ok to you.
Thanks Michael, the commit ad89653f79f1882d55d9df76c9b2b94f008c4e27 in 
the vhost tree looks good. Sorry, I don't think I ever attempted to 
merge with linux-next when v3 was posted, although I did it for the 
first two submissions. I will pay attention to it next time.

Thanks,
-Siwei

>
>> ---
>> Changes in v3:
>> - Turn explicit last_pfn check to a WARN_ON() (Jason)
>>
>> Changes in v2:
>> - Drop the reversion patch
>> - Fix unhandled page leak towards the end of page_list
>>
>>   drivers/vhost/vdpa.c | 80 ++++++++++++++++++++++++++++++++++++++++------------
>>   1 file changed, 62 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index b6d9016..5b13dfd 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
>>   
>>   	if (r)
>>   		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
>> +	else
>> +		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
>>   
>>   	return r;
>>   }
>> @@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>   	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
>>   	unsigned int gup_flags = FOLL_LONGTERM;
>>   	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
>> -	unsigned long locked, lock_limit, pinned, i;
>> +	unsigned long lock_limit, sz2pin, nchunks, i;
>>   	u64 iova = msg->iova;
>> +	long pinned;
>>   	int ret = 0;
>>   
>>   	if (vhost_iotlb_itree_first(iotlb, msg->iova,
>>   				    msg->iova + msg->size - 1))
>>   		return -EEXIST;
>>   
>> +	/* Limit the use of memory for bookkeeping */
>>   	page_list = (struct page **) __get_free_page(GFP_KERNEL);
>>   	if (!page_list)
>>   		return -ENOMEM;
>> @@ -607,52 +611,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>   		gup_flags |= FOLL_WRITE;
>>   
>>   	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
>> -	if (!npages)
>> -		return -EINVAL;
>> +	if (!npages) {
>> +		ret = -EINVAL;
>> +		goto free;
>> +	}
>>   
>>   	mmap_read_lock(dev->mm);
>>   
>> -	locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
>>   	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> -
>> -	if (locked > lock_limit) {
>> +	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
>>   		ret = -ENOMEM;
>> -		goto out;
>> +		goto unlock;
>>   	}
>>   
>>   	cur_base = msg->uaddr & PAGE_MASK;
>>   	iova &= PAGE_MASK;
>> +	nchunks = 0;
>>   
>>   	while (npages) {
>> -		pinned = min_t(unsigned long, npages, list_size);
>> -		ret = pin_user_pages(cur_base, pinned,
>> -				     gup_flags, page_list, NULL);
>> -		if (ret != pinned)
>> +		sz2pin = min_t(unsigned long, npages, list_size);
>> +		pinned = pin_user_pages(cur_base, sz2pin,
>> +					gup_flags, page_list, NULL);
>> +		if (sz2pin != pinned) {
>> +			if (pinned < 0) {
>> +				ret = pinned;
>> +			} else {
>> +				unpin_user_pages(page_list, pinned);
>> +				ret = -ENOMEM;
>> +			}
>>   			goto out;
>> +		}
>> +		nchunks++;
>>   
>>   		if (!last_pfn)
>>   			map_pfn = page_to_pfn(page_list[0]);
>>   
>> -		for (i = 0; i < ret; i++) {
>> +		for (i = 0; i < pinned; i++) {
>>   			unsigned long this_pfn = page_to_pfn(page_list[i]);
>>   			u64 csize;
>>   
>>   			if (last_pfn && (this_pfn != last_pfn + 1)) {
>>   				/* Pin a contiguous chunk of memory */
>>   				csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
>> -				if (vhost_vdpa_map(v, iova, csize,
>> -						   map_pfn << PAGE_SHIFT,
>> -						   msg->perm))
>> +				ret = vhost_vdpa_map(v, iova, csize,
>> +						     map_pfn << PAGE_SHIFT,
>> +						     msg->perm);
>> +				if (ret) {
>> +					/*
>> +					 * Unpin the pages that are left unmapped
>> +					 * from this point on in the current
>> +					 * page_list. The remaining outstanding
>> +					 * ones which may stride across several
>> +					 * chunks will be covered in the common
>> +					 * error path subsequently.
>> +					 */
>> +					unpin_user_pages(&page_list[i],
>> +							 pinned - i);
>>   					goto out;
>> +				}
>> +
>>   				map_pfn = this_pfn;
>>   				iova += csize;
>> +				nchunks = 0;
>>   			}
>>   
>>   			last_pfn = this_pfn;
>>   		}
>>   
>> -		cur_base += ret << PAGE_SHIFT;
>> -		npages -= ret;
>> +		cur_base += pinned << PAGE_SHIFT;
>> +		npages -= pinned;
>>   	}
>>   
>>   	/* Pin the rest chunk */
>> @@ -660,10 +687,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>   			     map_pfn << PAGE_SHIFT, msg->perm);
>>   out:
>>   	if (ret) {
>> +		if (nchunks) {
>> +			unsigned long pfn;
>> +
>> +			/*
>> +			 * Unpin the outstanding pages which are yet to be
>> +			 * mapped but haven't due to vdpa_map() or
>> +			 * pin_user_pages() failure.
>> +			 *
>> +			 * Mapped pages are accounted in vdpa_map(), hence
>> +			 * the corresponding unpinning will be handled by
>> +			 * vdpa_unmap().
>> +			 */
>> +			WARN_ON(!last_pfn);
>> +			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
>> +				unpin_user_page(pfn_to_page(pfn));
>> +		}
>>   		vhost_vdpa_unmap(v, msg->iova, msg->size);
>> -		atomic64_sub(npages, &dev->mm->pinned_vm);
>>   	}
>> +unlock:
>>   	mmap_read_unlock(dev->mm);
>> +free:
>>   	free_page((unsigned long)page_list);
>>   	return ret;
>>   }
>> -- 
>> 1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ