lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <619cbc61-d40f-a19f-179d-1ae35a1a17d4@redhat.com>
Date:   Mon, 7 Aug 2023 20:54:25 +0200
From:   Danilo Krummrich <dakr@...hat.com>
To:     Christian König <christian.koenig@....com>,
        airlied@...il.com, daniel@...ll.ch, tzimmermann@...e.de,
        mripard@...nel.org, corbet@....net, bskeggs@...hat.com,
        Liam.Howlett@...cle.com, matthew.brost@...el.com,
        boris.brezillon@...labora.com, alexdeucher@...il.com,
        ogabbay@...nel.org, bagasdotme@...il.com, willy@...radead.org,
        jason@...kstrand.net, donald.robson@...tec.com
Cc:     dri-devel@...ts.freedesktop.org, nouveau@...ts.freedesktop.org,
        linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH drm-misc-next v9 06/11] drm/nouveau: fence: separate fence
 alloc and emit

Hi Christian,

On 8/7/23 20:07, Christian König wrote:
> Am 03.08.23 um 18:52 schrieb Danilo Krummrich:
>> The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
>> in order to emit fences within DMA fence signalling critical sections
>> (e.g. as typically done in the DRM GPU schedulers run_job() callback) we
>> need to separate fence allocation and fence emitting.
> 
> At least from the description that sounds like it might be illegal. 
> Daniel can you take a look as well.
> 
> What exactly are you doing here?

I'm basically doing exactly the same as amdgpu_fence_emit() does in 
amdgpu_ib_schedule() called by amdgpu_job_run().

The difference - and this is what this patch is for - is that I separate 
the fence allocation from emitting the fence, such that the fence 
structure is allocated before the job is submitted to the GPU scheduler. 
amdgpu solves this with GFP_ATOMIC within amdgpu_fence_emit() to 
allocate the fence structure in this case.

- Danilo

> 
> Regards,
> Christian.
> 
>>
>> Signed-off-by: Danilo Krummrich <dakr@...hat.com>
>> ---
>>   drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 ++++-
>>   drivers/gpu/drm/nouveau/nouveau_bo.c    | 52 +++++++++++++++----------
>>   drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
>>   drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
>>   drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-----
>>   drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
>>   drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
>>   7 files changed, 59 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c 
>> b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>> index a6f2e681bde9..a34924523133 100644
>> --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>> +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>> @@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
>>       PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
>>       PUSH_KICK(push);
>> -    ret = nouveau_fence_new(chan, false, pfence);
>> +    ret = nouveau_fence_new(pfence);
>>       if (ret)
>>           goto fail;
>> +    ret = nouveau_fence_emit(*pfence, chan);
>> +    if (ret)
>> +        goto fail_fence_unref;
>> +
>>       return 0;
>> +
>> +fail_fence_unref:
>> +    nouveau_fence_unref(pfence);
>>   fail:
>>       spin_lock_irqsave(&dev->event_lock, flags);
>>       list_del(&s->head);
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
>> b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> index 057bc995f19b..e9cbbf594e6f 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> @@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object 
>> *bo, int evict,
>>           mutex_lock(&cli->mutex);
>>       else
>>           mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
>> +
>>       ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, 
>> ctx->interruptible);
>> -    if (ret == 0) {
>> -        ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
>> -        if (ret == 0) {
>> -            ret = nouveau_fence_new(chan, false, &fence);
>> -            if (ret == 0) {
>> -                /* TODO: figure out a better solution here
>> -                 *
>> -                 * wait on the fence here explicitly as going through
>> -                 * ttm_bo_move_accel_cleanup somehow doesn't seem to 
>> do it.
>> -                 *
>> -                 * Without this the operation can timeout and we'll 
>> fallback to a
>> -                 * software copy, which might take several minutes to 
>> finish.
>> -                 */
>> -                nouveau_fence_wait(fence, false, false);
>> -                ret = ttm_bo_move_accel_cleanup(bo,
>> -                                &fence->base,
>> -                                evict, false,
>> -                                new_reg);
>> -                nouveau_fence_unref(&fence);
>> -            }
>> -        }
>> +    if (ret)
>> +        goto out_unlock;
>> +
>> +    ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
>> +    if (ret)
>> +        goto out_unlock;
>> +
>> +    ret = nouveau_fence_new(&fence);
>> +    if (ret)
>> +        goto out_unlock;
>> +
>> +    ret = nouveau_fence_emit(fence, chan);
>> +    if (ret) {
>> +        nouveau_fence_unref(&fence);
>> +        goto out_unlock;
>>       }
>> +
>> +    /* TODO: figure out a better solution here
>> +     *
>> +     * wait on the fence here explicitly as going through
>> +     * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
>> +     *
>> +     * Without this the operation can timeout and we'll fallback to a
>> +     * software copy, which might take several minutes to finish.
>> +     */
>> +    nouveau_fence_wait(fence, false, false);
>> +    ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
>> +                    new_reg);
>> +    nouveau_fence_unref(&fence);
>> +
>> +out_unlock:
>>       mutex_unlock(&cli->mutex);
>>       return ret;
>>   }
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
>> b/drivers/gpu/drm/nouveau/nouveau_chan.c
>> index 6d639314250a..f69be4c8f9f2 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_chan.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
>> @@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
>>           struct nouveau_fence *fence = NULL;
>>           int ret;
>> -        ret = nouveau_fence_new(chan, false, &fence);
>> +        ret = nouveau_fence_new(&fence);
>>           if (!ret) {
>> -            ret = nouveau_fence_wait(fence, false, false);
>> +            ret = nouveau_fence_emit(fence, chan);
>> +            if (!ret)
>> +                ret = nouveau_fence_wait(fence, false, false);
>>               nouveau_fence_unref(&fence);
>>           }
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
>> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> index 789857faa048..4ad40e42cae1 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> @@ -209,7 +209,8 @@ static vm_fault_t 
>> nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>>           goto done;
>>       }
>> -    nouveau_fence_new(dmem->migrate.chan, false, &fence);
>> +    if (!nouveau_fence_new(&fence))
>> +        nouveau_fence_emit(fence, dmem->migrate.chan);
>>       migrate_vma_pages(&args);
>>       nouveau_dmem_fence_done(&fence);
>>       dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, 
>> DMA_BIDIRECTIONAL);
>> @@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk 
>> *chunk)
>>           }
>>       }
>> -    nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
>> +    if (!nouveau_fence_new(&fence))
>> +        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>>       migrate_device_pages(src_pfns, dst_pfns, npages);
>>       nouveau_dmem_fence_done(&fence);
>>       migrate_device_finalize(src_pfns, dst_pfns, npages);
>> @@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct 
>> nouveau_drm *drm,
>>           addr += PAGE_SIZE;
>>       }
>> -    nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
>> +    if (!nouveau_fence_new(&fence))
>> +        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>>       migrate_vma_pages(args);
>>       nouveau_dmem_fence_done(&fence);
>>       nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
>> b/drivers/gpu/drm/nouveau/nouveau_fence.c
>> index ee5e9d40c166..e946408f945b 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
>> @@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, 
>> struct nouveau_channel *chan)
>>       struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
>>       int ret;
>> +    if (unlikely(!chan->fence))
>> +        return -ENODEV;
>> +
>>       fence->channel  = chan;
>>       fence->timeout  = jiffies + (15 * HZ);
>> @@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
>>   }
>>   int
>> -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
>> -          struct nouveau_fence **pfence)
>> +nouveau_fence_new(struct nouveau_fence **pfence)
>>   {
>>       struct nouveau_fence *fence;
>> -    int ret = 0;
>> -
>> -    if (unlikely(!chan->fence))
>> -        return -ENODEV;
>>       fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>       if (!fence)
>>           return -ENOMEM;
>> -    ret = nouveau_fence_emit(fence, chan);
>> -    if (ret)
>> -        nouveau_fence_unref(&fence);
>> -
>>       *pfence = fence;
>> -    return ret;
>> +    return 0;
>>   }
>>   static const char *nouveau_fence_get_get_driver_name(struct 
>> dma_fence *fence)
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
>> b/drivers/gpu/drm/nouveau/nouveau_fence.h
>> index 0ca2bc85adf6..7c73c7c9834a 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
>> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
>> @@ -17,8 +17,7 @@ struct nouveau_fence {
>>       unsigned long timeout;
>>   };
>> -int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
>> -               struct nouveau_fence **);
>> +int  nouveau_fence_new(struct nouveau_fence **);
>>   void nouveau_fence_unref(struct nouveau_fence **);
>>   int  nouveau_fence_emit(struct nouveau_fence *, struct 
>> nouveau_channel *);
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
>> b/drivers/gpu/drm/nouveau/nouveau_gem.c
>> index a48f42aaeab9..9c8d1b911a01 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
>> @@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, 
>> void *data,
>>           }
>>       }
>> -    ret = nouveau_fence_new(chan, false, &fence);
>> +    ret = nouveau_fence_new(&fence);
>> +    if (!ret)
>> +        ret = nouveau_fence_emit(fence, chan);
>>       if (ret) {
>> +        nouveau_fence_unref(&fence);
>>           NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
>>           WIND_RING(chan);
>>           goto out;
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ