[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <597125fd-4c85-fde0-9d5b-a9cc13a81ccc@oracle.com>
Date: Thu, 12 May 2022 16:01:09 -0400
From: Boris Ostrovsky <boris.ostrovsky@...cle.com>
To: Oleksandr Tyshchenko <olekstysh@...il.com>,
xen-devel@...ts.xenproject.org, linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org
Cc: Juergen Gross <jgross@...e.com>,
Stefano Stabellini <sstabellini@...nel.org>,
Julien Grall <julien@....org>,
Oleksandr Tyshchenko <oleksandr_tyshchenko@...m.com>,
"Michael S. Tsirkin" <mst@...hat.com>,
Christoph Hellwig <hch@...radead.org>
Subject: Re: [PATCH V2 2/7] xen/grants: support allocating consecutive grants
On 5/7/22 2:19 PM, Oleksandr Tyshchenko wrote:
> +
> +/*
> + * Handling of free grants:
> + *
> + * Free grants are in a simple list anchored in gnttab_free_head. They are
> + * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
> + * of free entries is stored in gnttab_free_count.
> + * Additionally there is a bitmap of free entries anchored in
> + * gnttab_free_bitmap. This is being used for simplifying allocation of
> + * multiple consecutive grants, which is needed e.g. for support of virtio.
> + * gnttab_last_free is used to add free entries of new frames at the end of
> + * the free list.
> + * gnttab_free_tail_ptr specifies the variable which references the start
If this references the start of the free interval, why is it called gnttab_free_*tail*_ptr?
> + * of consecutive free grants ending with gnttab_last_free. This pointer is
> + * updated in a rather defensive way, in order to avoid performance hits in
> + * hot paths.
> + * All those variables are protected by gnttab_list_lock.
> + */
> static int gnttab_free_count;
> -static grant_ref_t gnttab_free_head;
> +static unsigned int gnttab_size;
> +static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
> +static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
> +static grant_ref_t *gnttab_free_tail_ptr;
> +static unsigned long *gnttab_free_bitmap;
> static DEFINE_SPINLOCK(gnttab_list_lock);
> +
> struct grant_frames xen_auto_xlat_grant_frames;
> static unsigned int xen_gnttab_version;
> module_param_named(version, xen_gnttab_version, uint, 0);
> @@ -170,16 +194,111 @@ static int get_free_entries(unsigned count)
>
> ref = head = gnttab_free_head;
> gnttab_free_count -= count;
> - while (count-- > 1)
> - head = gnttab_entry(head);
> + while (count--) {
> + bitmap_clear(gnttab_free_bitmap, head, 1);
> + if (gnttab_free_tail_ptr == __gnttab_entry(head))
> + gnttab_free_tail_ptr = &gnttab_free_head;
> + if (count)
> + head = gnttab_entry(head);
> + }
> gnttab_free_head = gnttab_entry(head);
> gnttab_entry(head) = GNTTAB_LIST_END;
>
> + if (!gnttab_free_count) {
> + gnttab_last_free = GNTTAB_LIST_END;
> + gnttab_free_tail_ptr = NULL;
> + }
> +
> spin_unlock_irqrestore(&gnttab_list_lock, flags);
>
> return ref;
> }
>
> +static int get_seq_entry_count(void)
> +{
> + if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
> + *gnttab_free_tail_ptr == GNTTAB_LIST_END)
> + return 0;
> +
> + return gnttab_last_free - *gnttab_free_tail_ptr + 1;
> +}
> +
> +/* Rebuilds the free grant list and tries to find count consecutive entries. */
> +static int get_free_seq(unsigned int count)
> +{
> + int ret = -ENOSPC;
> + unsigned int from, to;
> + grant_ref_t *last;
> +
> + gnttab_free_tail_ptr = &gnttab_free_head;
> + last = &gnttab_free_head;
> +
> + for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
> + from < gnttab_size;
> + from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
> + to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
> + from + 1);
> + if (ret < 0 && to - from >= count) {
> + ret = from;
> + bitmap_clear(gnttab_free_bitmap, ret, count);
> + from += count;
> + gnttab_free_count -= count;
IIUIC we can have multiple passes over this, meaning that the gnttab_free_count may be decremented more than once. Is that intentional?
> + if (from == to)
> + continue;
> + }
> +
> + while (from < to) {
> + *last = from;
> + last = __gnttab_entry(from);
> + gnttab_last_free = from;
> + from++;
> + }
I have been looking at this loop and I can't understand what it is doing ;-( Can you enlighten me?
-boris
> + if (to < gnttab_size)
> + gnttab_free_tail_ptr = __gnttab_entry(to - 1);
> + }
> +
> + *last = GNTTAB_LIST_END;
> + if (gnttab_last_free != gnttab_size - 1)
> + gnttab_free_tail_ptr = NULL;
> +
> + return ret;
> +}
> +
> +static int get_free_entries_seq(unsigned int count)
> +{
> + unsigned long flags;
> + int ret = 0;
> +
> + spin_lock_irqsave(&gnttab_list_lock, flags);
> +
> + if (gnttab_free_count < count) {
> + ret = gnttab_expand(count - gnttab_free_count);
> + if (ret < 0)
> + goto out;
> + }
> +
> + if (get_seq_entry_count() < count) {
> + ret = get_free_seq(count);
> + if (ret >= 0)
> + goto out;
> + ret = gnttab_expand(count - get_seq_entry_count());
> + if (ret < 0)
> + goto out;
> + }
> +
> + ret = *gnttab_free_tail_ptr;
> + *gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
> + gnttab_free_count -= count;
> + if (!gnttab_free_count)
> + gnttab_free_tail_ptr = NULL;
> + bitmap_clear(gnttab_free_bitmap, ret, count);
> +
> + out:
> + spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +
> + return ret;
> +}
> +
> static void do_free_callbacks(void)
> {
> struct gnttab_free_callback *callback, *next;
> @@ -206,17 +325,48 @@ static inline void check_free_callbacks(void)
> do_free_callbacks();
> }
>
> -static void put_free_entry(grant_ref_t ref)
> +static void put_free_entry_locked(grant_ref_t ref)
> {
> - unsigned long flags;
> - spin_lock_irqsave(&gnttab_list_lock, flags);
> gnttab_entry(ref) = gnttab_free_head;
> gnttab_free_head = ref;
> + if (!gnttab_free_count)
> + gnttab_last_free = ref;
> + if (gnttab_free_tail_ptr == &gnttab_free_head)
> + gnttab_free_tail_ptr = __gnttab_entry(ref);
> gnttab_free_count++;
> + bitmap_set(gnttab_free_bitmap, ref, 1);
> +}
> +
> +static void put_free_entry(grant_ref_t ref)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&gnttab_list_lock, flags);
> + put_free_entry_locked(ref);
> check_free_callbacks();
> spin_unlock_irqrestore(&gnttab_list_lock, flags);
> }
>
> +static void gnttab_set_free(unsigned int start, unsigned int n)
> +{
> + unsigned int i;
> +
> + for (i = start; i < start + n - 1; i++)
> + gnttab_entry(i) = i + 1;
> +
> + gnttab_entry(i) = GNTTAB_LIST_END;
> + if (!gnttab_free_count) {
> + gnttab_free_head = start;
> + gnttab_free_tail_ptr = &gnttab_free_head;
> + } else {
> + gnttab_entry(gnttab_last_free) = start;
> + }
> + gnttab_free_count += n;
> + gnttab_last_free = i;
> +
> + bitmap_set(gnttab_free_bitmap, start, n);
> +}
> +
> /*
> * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
> * Introducing a valid entry into the grant table:
> @@ -448,23 +598,31 @@ void gnttab_free_grant_references(grant_ref_t head)
> {
> grant_ref_t ref;
> unsigned long flags;
> - int count = 1;
> - if (head == GNTTAB_LIST_END)
> - return;
> +
> spin_lock_irqsave(&gnttab_list_lock, flags);
> - ref = head;
> - while (gnttab_entry(ref) != GNTTAB_LIST_END) {
> - ref = gnttab_entry(ref);
> - count++;
> + while (head != GNTTAB_LIST_END) {
> + ref = gnttab_entry(head);
> + put_free_entry_locked(head);
> + head = ref;
> }
> - gnttab_entry(ref) = gnttab_free_head;
> - gnttab_free_head = head;
> - gnttab_free_count += count;
> check_free_callbacks();
> spin_unlock_irqrestore(&gnttab_list_lock, flags);
> }
> EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
>
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
> +{
> + unsigned long flags;
> + unsigned int i;
> +
> + spin_lock_irqsave(&gnttab_list_lock, flags);
> + for (i = count; i > 0; i--)
> + put_free_entry_locked(head + i - 1);
> + check_free_callbacks();
> + spin_unlock_irqrestore(&gnttab_list_lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
> +
> int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
> {
> int h = get_free_entries(count);
> @@ -478,6 +636,24 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
> }
> EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
>
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
> +{
> + int h;
> +
> + if (count == 1)
> + h = get_free_entries(1);
> + else
> + h = get_free_entries_seq(count);
> +
> + if (h < 0)
> + return -ENOSPC;
> +
> + *first = h;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
> +
> int gnttab_empty_grant_references(const grant_ref_t *private_head)
> {
> return (*private_head == GNTTAB_LIST_END);
> @@ -570,16 +746,13 @@ static int grow_gnttab_list(unsigned int more_frames)
> goto grow_nomem;
> }
>
> + gnttab_set_free(gnttab_size, extra_entries);
>
> - for (i = grefs_per_frame * nr_grant_frames;
> - i < grefs_per_frame * new_nr_grant_frames - 1; i++)
> - gnttab_entry(i) = i + 1;
> -
> - gnttab_entry(i) = gnttab_free_head;
> - gnttab_free_head = grefs_per_frame * nr_grant_frames;
> - gnttab_free_count += extra_entries;
> + if (!gnttab_free_tail_ptr)
> + gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
>
> nr_grant_frames = new_nr_grant_frames;
> + gnttab_size += extra_entries;
>
> check_free_callbacks();
>
> @@ -1424,7 +1597,6 @@ int gnttab_init(void)
> int i;
> unsigned long max_nr_grant_frames;
> unsigned int max_nr_glist_frames, nr_glist_frames;
> - unsigned int nr_init_grefs;
> int ret;
>
> gnttab_request_version();
> @@ -1452,6 +1624,13 @@ int gnttab_init(void)
> }
> }
>
> + i = gnttab_interface->grefs_per_grant_frame * max_nr_grant_frames;
> + gnttab_free_bitmap = bitmap_zalloc(i, GFP_KERNEL);
> + if (!gnttab_free_bitmap) {
> + ret = -ENOMEM;
> + goto ini_nomem;
> + }
> +
> ret = arch_gnttab_init(max_nr_grant_frames,
> nr_status_frames(max_nr_grant_frames));
> if (ret < 0)
> @@ -1462,15 +1641,9 @@ int gnttab_init(void)
> goto ini_nomem;
> }
>
> - nr_init_grefs = nr_grant_frames *
> - gnttab_interface->grefs_per_grant_frame;
> -
> - for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
> - gnttab_entry(i) = i + 1;
> + gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
>
> - gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
> - gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
> - gnttab_free_head = NR_RESERVED_ENTRIES;
> + gnttab_set_free(NR_RESERVED_ENTRIES, gnttab_size - NR_RESERVED_ENTRIES);
>
> printk("Grant table initialized\n");
> return 0;
> @@ -1479,6 +1652,7 @@ int gnttab_init(void)
> for (i--; i >= 0; i--)
> free_page((unsigned long)gnttab_list[i]);
> kfree(gnttab_list);
> + bitmap_free(gnttab_free_bitmap);
> return ret;
> }
> EXPORT_SYMBOL_GPL(gnttab_init);
> diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
> index dfd5bf3..d815e1d 100644
> --- a/include/xen/grant_table.h
> +++ b/include/xen/grant_table.h
> @@ -129,10 +129,14 @@ int gnttab_try_end_foreign_access(grant_ref_t ref);
> */
> int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
>
> +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first);
> +
> void gnttab_free_grant_reference(grant_ref_t ref);
>
> void gnttab_free_grant_references(grant_ref_t head);
>
> +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count);
> +
> int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
>
> int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
Powered by blists - more mailing lists