lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <70fa216c-c6cc-73b1-cc43-24c3740a2984@arm.com>
Date:   Mon, 21 Aug 2023 13:28:30 +0100
From:   Robin Murphy <robin.murphy@....com>
To:     John Garry <john.g.garry@...cle.com>, joro@...tes.org
Cc:     will@...nel.org, iommu@...ts.linux.dev,
        linux-kernel@...r.kernel.org, zhangzekun11@...wei.com
Subject: Re: [PATCH 1/2] iommu/iova: Make the rcache depot scale better

On 2023-08-21 13:02, John Garry wrote:
> On 14/08/2023 18:53, Robin Murphy wrote:
>> The algorithm in the original paper specifies the storage of full
>> magazines in the depot as an unbounded list rather than a fixed-size
>> array. It turns out to be pretty straightforward to do this in our
>> implementation with no significant loss of efficiency. This allows
>> the depot to scale up to the working set sizes of larger systems,
>> while also potentially saving some memory on smaller ones too.
>>
>> Signed-off-by: Robin Murphy <robin.murphy@....com>
> 
> This looks ok (ignoring the crash reported), so feel free to add:
> 
> Reviewed-by: John Garry <john.g.garry@...cle.com>

Thanks!

> A small comment and question below.
> 
>> ---
>>   drivers/iommu/iova.c | 65 ++++++++++++++++++++++++--------------------
>>   1 file changed, 36 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>> index 10b964600948..d2de6fb0e9f4 100644
>> --- a/drivers/iommu/iova.c
>> +++ b/drivers/iommu/iova.c
>> @@ -625,10 +625,16 @@ EXPORT_SYMBOL_GPL(reserve_iova);
>>    * will be wasted.
>>    */
>>   #define IOVA_MAG_SIZE 127
>> -#define MAX_GLOBAL_MAGS 32    /* magazines per bin */
>>   struct iova_magazine {
>> -    unsigned long size;
>> +    /*
>> +     * Only full magazines are inserted into the depot, so we can avoid
>> +     * a separate list head and preserve maximum space-efficiency.
> 
> It might be worth explicitly mentioning that we try to keep total mag 
> size as power-of-2

Sure, I can tie it in with the existing comment above, which might 
actually end up more readable anyway.

>> +     */
>> +    union {
>> +        unsigned long size;
>> +        struct iova_magazine *next;
>> +    };
>>       unsigned long pfns[IOVA_MAG_SIZE];
>>   };
>> @@ -640,8 +646,7 @@ struct iova_cpu_rcache {
>>   struct iova_rcache {
>>       spinlock_t lock;
>> -    unsigned long depot_size;
>> -    struct iova_magazine *depot[MAX_GLOBAL_MAGS];
>> +    struct iova_magazine *depot;
>>       struct iova_cpu_rcache __percpu *cpu_rcaches;
>>   };
>> @@ -717,6 +722,21 @@ static void iova_magazine_push(struct 
>> iova_magazine *mag, unsigned long pfn)
>>       mag->pfns[mag->size++] = pfn;
>>   }
>> +static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
>> +{
>> +    struct iova_magazine *mag = rcache->depot;
>> +
>> +    rcache->depot = mag->next;
>> +    mag->size = IOVA_MAG_SIZE;
>> +    return mag;
>> +}
>> +
>> +static void iova_depot_push(struct iova_rcache *rcache, struct 
>> iova_magazine *mag)
>> +{
>> +    mag->next = rcache->depot;
>> +    rcache->depot = mag;
>> +}
>> +
>>   int iova_domain_init_rcaches(struct iova_domain *iovad)
>>   {
>>       unsigned int cpu;
>> @@ -734,7 +754,6 @@ int iova_domain_init_rcaches(struct iova_domain 
>> *iovad)
>>           rcache = &iovad->rcaches[i];
>>           spin_lock_init(&rcache->lock);
>> -        rcache->depot_size = 0;
>>           rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
>>                                cache_line_size());
>>           if (!rcache->cpu_rcaches) {
>> @@ -776,7 +795,6 @@ static bool __iova_rcache_insert(struct 
>> iova_domain *iovad,
>>                    struct iova_rcache *rcache,
>>                    unsigned long iova_pfn)
>>   {
>> -    struct iova_magazine *mag_to_free = NULL;
>>       struct iova_cpu_rcache *cpu_rcache;
>>       bool can_insert = false;
>>       unsigned long flags;
>> @@ -794,12 +812,7 @@ static bool __iova_rcache_insert(struct 
>> iova_domain *iovad,
>>           if (new_mag) {
>>               spin_lock(&rcache->lock);
>> -            if (rcache->depot_size < MAX_GLOBAL_MAGS) {
>> -                rcache->depot[rcache->depot_size++] =
>> -                        cpu_rcache->loaded;
>> -            } else {
>> -                mag_to_free = cpu_rcache->loaded;
>> -            }
>> +            iova_depot_push(rcache, cpu_rcache->loaded);
>>               spin_unlock(&rcache->lock);
> 
> Out of curiosity, do you know why we take the approach (prior to this 
> change) to free the loaded mag and alloc a new empty mag? Why not 
> instead just say that we can't insert and bail out?

I have a feeling it must have been mentioned at some point, since my 
memory says there was a deliberate intent to keep the flow through the 
critical section simple and consistent, and minimise time spent holding 
the rcache lock, and I'm 99% sure that isn't my own inferred reasoning...

Cheers,
Robin.

>>               cpu_rcache->loaded = new_mag;
>> @@ -812,11 +825,6 @@ static bool __iova_rcache_insert(struct 
>> iova_domain *iovad,
>>       spin_unlock_irqrestore(&cpu_rcache->lock, flags);
>> -    if (mag_to_free) {
>> -        iova_magazine_free_pfns(mag_to_free, iovad);
>> -        iova_magazine_free(mag_to_free);
>> -    }
>> -
>>       return can_insert;
>>   }
>> @@ -854,9 +862,9 @@ static unsigned long __iova_rcache_get(struct 
>> iova_rcache *rcache,
>>           has_pfn = true;
>>       } else {
>>           spin_lock(&rcache->lock);
>> -        if (rcache->depot_size > 0) {
>> +        if (rcache->depot) {
>>               iova_magazine_free(cpu_rcache->loaded);
>> -            cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
>> +            cpu_rcache->loaded = iova_depot_pop(rcache);
>>               has_pfn = true;
>>           }
>>           spin_unlock(&rcache->lock);
>> @@ -894,10 +902,10 @@ static void free_iova_rcaches(struct iova_domain 
>> *iovad)
>>   {
>>       struct iova_rcache *rcache;
>>       struct iova_cpu_rcache *cpu_rcache;
>> +    struct iova_magazine *mag;
>>       unsigned int cpu;
>> -    int i, j;
>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>> +    for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>>           rcache = &iovad->rcaches[i];
>>           if (!rcache->cpu_rcaches)
>>               break;
>> @@ -907,8 +915,8 @@ static void free_iova_rcaches(struct iova_domain 
>> *iovad)
>>               iova_magazine_free(cpu_rcache->prev);
>>           }
>>           free_percpu(rcache->cpu_rcaches);
>> -        for (j = 0; j < rcache->depot_size; ++j)
>> -            iova_magazine_free(rcache->depot[j]);
>> +        while ((mag = iova_depot_pop(rcache)))
>> +            iova_magazine_free(mag);
>>       }
>>       kfree(iovad->rcaches);
>> @@ -941,17 +949,16 @@ static void free_cpu_cached_iovas(unsigned int 
>> cpu, struct iova_domain *iovad)
>>   static void free_global_cached_iovas(struct iova_domain *iovad)
>>   {
>>       struct iova_rcache *rcache;
>> +    struct iova_magazine *mag;
>>       unsigned long flags;
>> -    int i, j;
>> -    for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>> +    for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
>>           rcache = &iovad->rcaches[i];
>>           spin_lock_irqsave(&rcache->lock, flags);
>> -        for (j = 0; j < rcache->depot_size; ++j) {
>> -            iova_magazine_free_pfns(rcache->depot[j], iovad);
>> -            iova_magazine_free(rcache->depot[j]);
>> +        while ((mag = iova_depot_pop(rcache))) {
>> +            iova_magazine_free_pfns(mag, iovad);
>> +            iova_magazine_free(mag);
>>           }
>> -        rcache->depot_size = 0;
>>           spin_unlock_irqrestore(&rcache->lock, flags);
>>       }
>>   }
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ