lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <597125fd-4c85-fde0-9d5b-a9cc13a81ccc@oracle.com>
Date:   Thu, 12 May 2022 16:01:09 -0400
From:   Boris Ostrovsky <boris.ostrovsky@...cle.com>
To:     Oleksandr Tyshchenko <olekstysh@...il.com>,
        xen-devel@...ts.xenproject.org, linux-kernel@...r.kernel.org,
        linux-arm-kernel@...ts.infradead.org
Cc:     Juergen Gross <jgross@...e.com>,
        Stefano Stabellini <sstabellini@...nel.org>,
        Julien Grall <julien@....org>,
        Oleksandr Tyshchenko <oleksandr_tyshchenko@...m.com>,
        "Michael S. Tsirkin" <mst@...hat.com>,
        Christoph Hellwig <hch@...radead.org>
Subject: Re: [PATCH V2 2/7] xen/grants: support allocating consecutive grants


On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
> +
> +/*
> + * Handling of free grants:
> + *
> + * Free grants are in a simple list anchored in gnttab_free_head. They are
> + * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
> + * of free entries is stored in gnttab_free_count.
> + * Additionally there is a bitmap of free entries anchored in
> + * gnttab_free_bitmap. This is being used for simplifying allocation of
> + * multiple consecutive grants, which is needed e.g. for support of virtio.
> + * gnttab_last_free is used to add free entries of new frames at the end of
> + * the free list.
> + * gnttab_free_tail_ptr specifies the variable which references the start


If this references the start of the free interval, why is it called gnttab_free_*tail*_ptr?


> + * of consecutive free grants ending with gnttab_last_free. This pointer is
> + * updated in a rather defensive way, in order to avoid performance hits in
> + * hot paths.
> + * All those variables are protected by gnttab_list_lock.
> + */
>   static int gnttab_free_count;
> -static grant_ref_t gnttab_free_head;
> +static unsigned int gnttab_size;
> +static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
> +static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
> +static grant_ref_t *gnttab_free_tail_ptr;
> +static unsigned long *gnttab_free_bitmap;
>   static DEFINE_SPINLOCK(gnttab_list_lock);
> +
>   struct grant_frames xen_auto_xlat_grant_frames;
>   static unsigned int xen_gnttab_version;
>   module_param_named(version, xen_gnttab_version, uint, 0);
> @@ -170,16 +194,111 @@ static int get_free_entries(unsigned count)
>   
>   	ref = head = gnttab_free_head;
>   	gnttab_free_count -= count;
> -	while (count-- > 1)
> -		head = gnttab_entry(head);
> +	while (count--) {
> +		bitmap_clear(gnttab_free_bitmap, head, 1);
> +		if (gnttab_free_tail_ptr == __gnttab_entry(head))
> +			gnttab_free_tail_ptr = &gnttab_free_head;
> +		if (count)
> +			head = gnttab_entry(head);
> +	}
>   	gnttab_free_head = gnttab_entry(head);
>   	gnttab_entry(head) = GNTTAB_LIST_END;
>   
> +	if (!gnttab_free_count) {
> +		gnttab_last_free = GNTTAB_LIST_END;
> +		gnttab_free_tail_ptr = NULL;
> +	}
> +
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   
>   	return ref;
>   }
>   
> +static int get_seq_entry_count(void)
> +{
> +	if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
> +	    *gnttab_free_tail_ptr == GNTTAB_LIST_END)
> +		return 0;
> +
> +	return gnttab_last_free - *gnttab_free_tail_ptr + 1;
> +}
> +
> +/* Rebuilds the free grant list and tries to find count consecutive entries. */
> +static int get_free_seq(unsigned int count)
> +{
> +	int ret = -ENOSPC;
> +	unsigned int from, to;
> +	grant_ref_t *last;
> +
> +	gnttab_free_tail_ptr = &gnttab_free_head;
> +	last = &gnttab_free_head;
> +
> +	for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
> +	     from < gnttab_size;
> +	     from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
> +		to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
> +					from + 1);
> +		if (ret < 0 && to - from >= count) {
> +			ret = from;
> +			bitmap_clear(gnttab_free_bitmap, ret, count);
> +			from += count;
> +			gnttab_free_count -= count;


IIUIC we can have multiple passes over this, meaning that the gnttab_free_count may be decremented more than once. Is that intentional?


> +			if (from == to)
> +				continue;
> +		}
> +
> +		while (from < to) {
> +			*last = from;
> +			last = __gnttab_entry(from);
> +			gnttab_last_free = from;
> +			from++;
> +		}


I have been looking at this loop and I can't understand what it is doing ;-( Can you enlighten me?



-boris



> +		if (to < gnttab_size)
> +			gnttab_free_tail_ptr = __gnttab_entry(to - 1);
> +	}
> +
> +	*last = GNTTAB_LIST_END;
> +	if (gnttab_last_free != gnttab_size - 1)
> +		gnttab_free_tail_ptr = NULL;
> +
> +	return ret;
> +}
> +
> +static int get_free_entries_seq(unsigned int count)
> +{
> +	unsigned long flags;
> +	int ret = 0;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +
> +	if (gnttab_free_count < count) {
> +		ret = gnttab_expand(count - gnttab_free_count);
> +		if (ret < 0)
> +			goto out;
> +	}
> +
> +	if (get_seq_entry_count() < count) {
> +		ret = get_free_seq(count);
> +		if (ret >= 0)
> +			goto out;
> +		ret = gnttab_expand(count - get_seq_entry_count());
> +		if (ret < 0)
> +			goto out;
> +	}
> +
> +	ret = *gnttab_free_tail_ptr;
> +	*gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
> +	gnttab_free_count -= count;
> +	if (!gnttab_free_count)
> +		gnttab_free_tail_ptr = NULL;
> +	bitmap_clear(gnttab_free_bitmap, ret, count);
> +
> + out:
> +	spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +
> +	return ret;
> +}
> +
>   static void do_free_callbacks(void)
>   {
>   	struct gnttab_free_callback *callback, *next;
> @@ -206,17 +325,48 @@ static inline void check_free_callbacks(void)
>   		do_free_callbacks();
>   }
>   
> -static void put_free_entry(grant_ref_t ref)
> +static void put_free_entry_locked(grant_ref_t ref)
>   {
> -	unsigned long flags;
> -	spin_lock_irqsave(&gnttab_list_lock, flags);
>   	gnttab_entry(ref) = gnttab_free_head;
>   	gnttab_free_head = ref;
> +	if (!gnttab_free_count)
> +		gnttab_last_free = ref;
> +	if (gnttab_free_tail_ptr == &gnttab_free_head)
> +		gnttab_free_tail_ptr = __gnttab_entry(ref);
>   	gnttab_free_count++;
> +	bitmap_set(gnttab_free_bitmap, ref, 1);
> +}
> +
> +static void put_free_entry(grant_ref_t ref)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +	put_free_entry_locked(ref);
>   	check_free_callbacks();
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   }
>   
> +static void gnttab_set_free(unsigned int start, unsigned int n)
> +{
> +	unsigned int i;
> +
> +	for (i = start; i < start + n - 1; i++)
> +		gnttab_entry(i) = i + 1;
> +
> +	gnttab_entry(i) = GNTTAB_LIST_END;
> +	if (!gnttab_free_count) {
> +		gnttab_free_head = start;
> +		gnttab_free_tail_ptr = &gnttab_free_head;
> +	} else {
> +		gnttab_entry(gnttab_last_free) = start;
> +	}
> +	gnttab_free_count += n;
> +	gnttab_last_free = i;
> +
> +	bitmap_set(gnttab_free_bitmap, start, n);
> +}
> +
>   /*
>    * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
>    * Introducing a valid entry into the grant table:
> @@ -448,23 +598,31 @@ void gnttab_free_grant_references(grant_ref_t head)
>   {
>   	grant_ref_t ref;
>   	unsigned long flags;
> -	int count = 1;
> -	if (head == GNTTAB_LIST_END)
> -		return;
> +
>   	spin_lock_irqsave(&gnttab_list_lock, flags);
> -	ref = head;
> -	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
> -		ref = gnttab_entry(ref);
> -		count++;
> +	while (head != GNTTAB_LIST_END) {
> +		ref = gnttab_entry(head);
> +		put_free_entry_locked(head);
> +		head = ref;
>   	}
> -	gnttab_entry(ref) = gnttab_free_head;
> -	gnttab_free_head = head;
> -	gnttab_free_count += count;
>   	check_free_callbacks();
>   	spin_unlock_irqrestore(&gnttab_list_lock, flags);
>   }
>   EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
>   
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
> +{
> +	unsigned long flags;
> +	unsigned int i;
> +
> +	spin_lock_irqsave(&gnttab_list_lock, flags);
> +	for (i = count; i > 0; i--)
> +		put_free_entry_locked(head + i - 1);
> +	check_free_callbacks();
> +	spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
> +
>   int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
>   {
>   	int h = get_free_entries(count);
> @@ -478,6 +636,24 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
>   }
>   EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
>   
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
> +{
> +	int h;
> +
> +	if (count == 1)
> +		h = get_free_entries(1);
> +	else
> +		h = get_free_entries_seq(count);
> +
> +	if (h < 0)
> +		return -ENOSPC;
> +
> +	*first = h;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
> +
>   int gnttab_empty_grant_references(const grant_ref_t *private_head)
>   {
>   	return (*private_head == GNTTAB_LIST_END);
> @@ -570,16 +746,13 @@ static int grow_gnttab_list(unsigned int more_frames)
>   			goto grow_nomem;
>   	}
>   
> +	gnttab_set_free(gnttab_size, extra_entries);
>   
> -	for (i = grefs_per_frame * nr_grant_frames;
> -	     i < grefs_per_frame * new_nr_grant_frames - 1; i++)
> -		gnttab_entry(i) = i + 1;
> -
> -	gnttab_entry(i) = gnttab_free_head;
> -	gnttab_free_head = grefs_per_frame * nr_grant_frames;
> -	gnttab_free_count += extra_entries;
> +	if (!gnttab_free_tail_ptr)
> +		gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
>   
>   	nr_grant_frames = new_nr_grant_frames;
> +	gnttab_size += extra_entries;
>   
>   	check_free_callbacks();
>   
> @@ -1424,7 +1597,6 @@ int gnttab_init(void)
>   	int i;
>   	unsigned long max_nr_grant_frames;
>   	unsigned int max_nr_glist_frames, nr_glist_frames;
> -	unsigned int nr_init_grefs;
>   	int ret;
>   
>   	gnttab_request_version();
> @@ -1452,6 +1624,13 @@ int gnttab_init(void)
>   		}
>   	}
>   
> +	i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
> +	gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
> +	if (!gnttab_free_bitmap) {
> +		ret = -ENOMEM;
> +		goto ini_nomem;
> +	}
> +
>   	ret = arch_gnttab_init(max_nr_grant_frames,
>   			       nr_status_frames(max_nr_grant_frames));
>   	if (ret < 0)
> @@ -1462,15 +1641,9 @@ int gnttab_init(void)
>   		goto ini_nomem;
>   	}
>   
> -	nr_init_grefs = nr_grant_frames *
> -			gnttab_interface->grefs_per_grant_frame;
> -
> -	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
> -		gnttab_entry(i) = i + 1;
> +	gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
>   
> -	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
> -	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
> -	gnttab_free_head  = NR_RESERVED_ENTRIES;
> +	gnttab_set_free(NR_RESERVED_ENTRIES, gnttab_size - NR_RESERVED_ENTRIES);
>   
>   	printk("Grant table initialized\n");
>   	return 0;
> @@ -1479,6 +1652,7 @@ int gnttab_init(void)
>   	for (i--; i >= 0; i--)
>   		free_page((unsigned long)gnttab_list[i]);
>   	kfree(gnttab_list);
> +	bitmap_free(gnttab_free_bitmap);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(gnttab_init);
> diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
> index dfd5bf3..d815e1d 100644
> --- a/include/xen/grant_table.h
> +++ b/include/xen/grant_table.h
> @@ -129,10 +129,14 @@ int gnttab_try_end_foreign_access(grant_ref_t ref);
>    */
>   int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
>   
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first);
> +
>   void gnttab_free_grant_reference(grant_ref_t ref);
>   
>   void gnttab_free_grant_references(grant_ref_t head);
>   
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count);
> +
>   int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
>   
>   int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ