lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 5 Mar 2013 09:18:58 -0500
From:	Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
To:	Roger Pau Monné <roger.pau@...rix.com>
Cc:	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"xen-devel@...ts.xen.org" <xen-devel@...ts.xen.org>
Subject: Re: [PATCH RFC 04/12] xen-blkfront: pre-allocate pages for requests

On Tue, Mar 05, 2013 at 12:04:41PM +0100, Roger Pau Monné wrote:
> On 04/03/13 20:39, Konrad Rzeszutek Wilk wrote:
> > On Thu, Feb 28, 2013 at 11:28:47AM +0100, Roger Pau Monne wrote:
> >> This prevents us from having to call alloc_page while we are preparing
> >> the request. Since blkfront was calling alloc_page with a spinlock
> >> held we used GFP_ATOMIC, which can fail if we are requesting a lot of
> >> pages since it is using the emergency memory pools.
> >>
> >> Allocating all the pages at init prevents us from having to call
> >> alloc_page, thus preventing possible failures.
> >>
> >> Signed-off-by: Roger Pau Monné <roger.pau@...rix.com>
> >> Cc: Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
> >> Cc: xen-devel@...ts.xen.org
> >> ---
> >>  drivers/block/xen-blkfront.c |  120 +++++++++++++++++++++++++++--------------
> >>  1 files changed, 79 insertions(+), 41 deletions(-)
> >>
> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> >> index 2e39eaf..5ba6b87 100644
> >> --- a/drivers/block/xen-blkfront.c
> >> +++ b/drivers/block/xen-blkfront.c
> >> @@ -165,6 +165,69 @@ static int add_id_to_freelist(struct blkfront_info *info,
> >>  	return 0;
> >>  }
> >>  
> >> +static int fill_grant_buffer(struct blkfront_info *info, int num)
> >> +{
> >> +	struct page *granted_page;
> >> +	struct grant *gnt_list_entry, *n;
> >> +	int i = 0;
> >> +
> >> +	while(i < num) {
> >> +		gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
> > 
> > GFP_NORMAL ?
> 
> drivers/block/xen-blkfront.c:175: error: ‘GFP_NORMAL’ undeclared (first
> use in this function)
> 
> Did you mean GFP_KERNEL? I think GFP_NOIO is more suitable, it can block
> but no IO will be performed.

<sigh> I meant GFP_KERNEL. Sorry about the incorrect advice. The GFP_KERNEL
is the more general purpose pool - is there a good reason to use _NOIO?
This is after all during initialization when there is no IO using this driver.

> 
> > 
> >> +		if (!gnt_list_entry)
> >> +			goto out_of_memory;
> > 
> > Hmm, I guess another patch could be to convert this to a fail-safe
> > mechanism. Meaning if we fail here, we just cap our maximum amount of
> > grants we have up to 'i'.
> > 
> > 
> >> +
> >> +		granted_page = alloc_page(GFP_NOIO);
> > 
> > GFP_NORMAL

GFP_KERNEL of course.
> > 
> >> +		if (!granted_page) {
> >> +			kfree(gnt_list_entry);
> >> +			goto out_of_memory;
> >> +		}
> >> +
> >> +		gnt_list_entry->pfn = page_to_pfn(granted_page);
> >> +		gnt_list_entry->gref = GRANT_INVALID_REF;
> >> +		list_add(&gnt_list_entry->node, &info->persistent_gnts);
> >> +		i++;
> >> +	}
> >> +
> >> +	return 0;
> >> +
> >> +out_of_memory:
> >> +	list_for_each_entry_safe(gnt_list_entry, n,
> >> +	                         &info->persistent_gnts, node) {
> >> +		list_del(&gnt_list_entry->node);
> >> +		__free_page(pfn_to_page(gnt_list_entry->pfn));
> >> +		kfree(gnt_list_entry);
> >> +		i--;
> >> +	}
> >> +	BUG_ON(i != 0);
> >> +	return -ENOMEM;
> >> +}
> >> +
> >> +static struct grant *get_grant(grant_ref_t *gref_head,
> >> +                               struct blkfront_info *info)
> >> +{
> >> +	struct grant *gnt_list_entry;
> >> +	unsigned long buffer_mfn;
> >> +
> >> +	BUG_ON(list_empty(&info->persistent_gnts));
> >> +	gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant,
> >> +	                                  node);
> >> +	list_del(&gnt_list_entry->node);
> >> +
> >> +	if (gnt_list_entry->gref != GRANT_INVALID_REF) {
> >> +		info->persistent_gnts_c--;
> >> +		return gnt_list_entry;
> >> +	}
> >> +
> >> +	/* Assign a gref to this page */
> >> +	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
> >> +	BUG_ON(gnt_list_entry->gref == -ENOSPC);
> >> +	buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
> >> +	gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
> >> +	                                info->xbdev->otherend_id,
> >> +	                                buffer_mfn, 0);
> >> +	return gnt_list_entry;
> >> +}
> >> +
> >>  static const char *op_name(int op)
> >>  {
> >>  	static const char *const names[] = {
> >> @@ -306,7 +369,6 @@ static int blkif_queue_request(struct request *req)
> >>  	 */
> >>  	bool new_persistent_gnts;
> >>  	grant_ref_t gref_head;
> >> -	struct page *granted_page;
> >>  	struct grant *gnt_list_entry = NULL;
> >>  	struct scatterlist *sg;
> >>  
> >> @@ -370,42 +432,9 @@ static int blkif_queue_request(struct request *req)
> >>  			fsect = sg->offset >> 9;
> >>  			lsect = fsect + (sg->length >> 9) - 1;
> >>  
> >> -			if (info->persistent_gnts_c) {
> >> -				BUG_ON(list_empty(&info->persistent_gnts));
> >> -				gnt_list_entry = list_first_entry(
> >> -				                      &info->persistent_gnts,
> >> -				                      struct grant, node);
> >> -				list_del(&gnt_list_entry->node);
> >> -
> >> -				ref = gnt_list_entry->gref;
> >> -				buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
> >> -				info->persistent_gnts_c--;
> >> -			} else {
> >> -				ref = gnttab_claim_grant_reference(&gref_head);
> >> -				BUG_ON(ref == -ENOSPC);
> >> -
> >> -				gnt_list_entry =
> >> -					kmalloc(sizeof(struct grant),
> >> -							 GFP_ATOMIC);
> >> -				if (!gnt_list_entry)
> >> -					return -ENOMEM;
> >> -
> >> -				granted_page = alloc_page(GFP_ATOMIC);
> >> -				if (!granted_page) {
> >> -					kfree(gnt_list_entry);
> >> -					return -ENOMEM;
> >> -				}
> >> -
> >> -				gnt_list_entry->pfn =
> >> -					page_to_pfn(granted_page);
> >> -				gnt_list_entry->gref = ref;
> >> -
> >> -				buffer_mfn = pfn_to_mfn(page_to_pfn(
> >> -								granted_page));
> >> -				gnttab_grant_foreign_access_ref(ref,
> >> -					info->xbdev->otherend_id,
> >> -					buffer_mfn, 0);
> >> -			}
> >> +			gnt_list_entry = get_grant(&gref_head, info);
> >> +			ref = gnt_list_entry->gref;
> >> +			buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
> >>  
> >>  			info->shadow[id].grants_used[i] = gnt_list_entry;
> >>  
> >> @@ -803,17 +832,20 @@ static void blkif_free(struct blkfront_info *info, int suspend)
> >>  		blk_stop_queue(info->rq);
> >>  
> >>  	/* Remove all persistent grants */
> >> -	if (info->persistent_gnts_c) {
> >> +	if (!list_empty(&info->persistent_gnts)) {
> >>  		list_for_each_entry_safe(persistent_gnt, n,
> >>  		                         &info->persistent_gnts, node) {
> >>  			list_del(&persistent_gnt->node);
> >> -			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
> >> +			if (persistent_gnt->gref != GRANT_INVALID_REF) {
> >> +				gnttab_end_foreign_access(persistent_gnt->gref,
> >> +				                          0, 0UL);
> >> +				info->persistent_gnts_c--;
> >> +			}
> >>  			__free_page(pfn_to_page(persistent_gnt->pfn));
> >>  			kfree(persistent_gnt);
> >> -			info->persistent_gnts_c--;
> >>  		}
> >> -		BUG_ON(info->persistent_gnts_c != 0);
> >>  	}
> >> +	BUG_ON(info->persistent_gnts_c != 0);
> > 
> > So if the guest _never_ sent any I/Os and just attached/detached the device - won't
> > we fail here?.
> 
> persistent_gnts_c is initialized to 0, so if we don't perform IO it
> should still be 0 at this point. Since we have just cleaned the
> persistent grants lists this should always be 0 at this point.

OK.
> 
> >>  
> >>  	/* No more gnttab callback work. */
> >>  	gnttab_cancel_free_callback(&info->callback);
> >> @@ -1088,6 +1120,12 @@ again:
> >>  		goto destroy_blkring;
> >>  	}
> >>  
> >> +	/* Allocate memory for grants */
> >> +	err = fill_grant_buffer(info, BLK_RING_SIZE *
> >> +	                              BLKIF_MAX_SEGMENTS_PER_REQUEST);
> >> +	if (err)
> >> +		goto out;
> > 
> > That looks to be in the wrong function - talk_to_blkback function is
> > to talk to the blkback. Not do initialization type operations.
> 
> Yes, I know it's not the best place to place it. It's here mainly
> because that's the only function that gets called by both driver
> initialization and resume.
> 
> Last patch moves this to a more sensible place.

Lets make it part of this patch from the start. We still have two
months of time before the next merge window opens - so we have
time to make it nice and clean.

> 
> > 
> > Also I think this means that on resume - we would try to allocate
> > again the grants?
> 
> Yes, grants are cleaned on resume and reallocated.
> 
> >> +
> >>  	xenbus_switch_state(dev, XenbusStateInitialised);
> >>  
> >>  	return 0;
> >> -- 
> >> 1.7.7.5 (Apple Git-26)
> >>
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ